Skip to content

Commit d623622

Browse files
authored
Merge branch 'huggingface:main' into add_gb10
2 parents 8bec996 + 2c2de89 commit d623622

File tree

23 files changed

+693
-8
lines changed

23 files changed

+693
-8
lines changed

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import type {
2828
ImageToImageTaskHelper,
2929
ImageToTextTaskHelper,
3030
ImageToVideoTaskHelper,
31+
ImageTextToImageTaskHelper,
32+
ImageTextToVideoTaskHelper,
3133
ObjectDetectionTaskHelper,
3234
QuestionAnsweringTaskHelper,
3335
SentenceSimilarityTaskHelper,
@@ -276,6 +278,14 @@ export function getProviderHelper(
276278
provider: InferenceProviderOrPolicy,
277279
task: "image-to-video"
278280
): ImageToVideoTaskHelper & TaskProviderHelper;
281+
export function getProviderHelper(
282+
provider: InferenceProviderOrPolicy,
283+
task: "image-text-to-image"
284+
): ImageTextToImageTaskHelper & TaskProviderHelper;
285+
export function getProviderHelper(
286+
provider: InferenceProviderOrPolicy,
287+
task: "image-text-to-video"
288+
): ImageTextToVideoTaskHelper & TaskProviderHelper;
279289
export function getProviderHelper(
280290
provider: InferenceProviderOrPolicy,
281291
task: "sentence-similarity"

packages/inference/src/providers/providerHelper.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import type {
1919
ImageToTextInput,
2020
ImageToTextOutput,
2121
ImageToVideoInput,
22+
ImageTextToImageInput,
23+
ImageTextToVideoInput,
2224
ObjectDetectionInput,
2325
ObjectDetectionOutput,
2426
QuestionAnsweringInput,
@@ -54,6 +56,8 @@ import { toArray } from "../utils/toArray.js";
5456
import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js";
5557
import type { AutomaticSpeechRecognitionArgs } from "../tasks/audio/automaticSpeechRecognition.js";
5658
import type { ImageToVideoArgs } from "../tasks/cv/imageToVideo.js";
59+
import type { ImageTextToImageArgs } from "../tasks/cv/imageTextToImage.js";
60+
import type { ImageTextToVideoArgs } from "../tasks/cv/imageTextToVideo.js";
5761
import type { ImageSegmentationArgs } from "../tasks/cv/imageSegmentation.js";
5862

5963
/**
@@ -159,6 +163,18 @@ export interface ImageToVideoTaskHelper {
159163
preparePayloadAsync(args: ImageToVideoArgs): Promise<RequestArgs>;
160164
}
161165

166+
export interface ImageTextToImageTaskHelper {
167+
getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<Blob>;
168+
preparePayload(params: BodyParams<ImageTextToImageInput & BaseArgs>): Record<string, unknown>;
169+
preparePayloadAsync(args: ImageTextToImageArgs): Promise<RequestArgs>;
170+
}
171+
172+
export interface ImageTextToVideoTaskHelper {
173+
getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<Blob>;
174+
preparePayload(params: BodyParams<ImageTextToVideoInput & BaseArgs>): Record<string, unknown>;
175+
preparePayloadAsync(args: ImageTextToVideoArgs): Promise<RequestArgs>;
176+
}
177+
162178
export interface ImageSegmentationTaskHelper {
163179
getResponse(response: unknown, url?: string, headers?: HeadersInit): Promise<ImageSegmentationOutput>;
164180
preparePayload(params: BodyParams<ImageSegmentationInput & BaseArgs>): Record<string, unknown> | BodyInit;

packages/inference/src/snippets/getInferenceSnippets.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ const HF_PYTHON_METHODS: Partial<Record<WidgetType, string>> = {
9696
"image-classification": "image_classification",
9797
"image-segmentation": "image_segmentation",
9898
"image-to-image": "image_to_image",
99+
"image-to-video": "image_to_video",
99100
"image-to-text": "image_to_text",
101+
"image-text-to-image": "image_text_to_image",
102+
"image-text-to-video": "image_text_to_video",
100103
"object-detection": "object_detection",
101104
"question-answering": "question_answering",
102105
"sentence-similarity": "sentence_similarity",
@@ -390,7 +393,9 @@ const snippets: Partial<
390393
"fill-mask": snippetGenerator("basic"),
391394
"image-classification": snippetGenerator("basicImage"),
392395
"image-segmentation": snippetGenerator("basicImage"),
396+
"image-text-to-image": snippetGenerator("imageToImage", prepareImageToImageInput),
393397
"image-text-to-text": snippetGenerator("conversational"),
398+
"image-text-to-video": snippetGenerator("imageToVideo", prepareImageToImageInput),
394399
"image-to-image": snippetGenerator("imageToImage", prepareImageToImageInput),
395400
"image-to-text": snippetGenerator("basicImage"),
396401
"image-to-video": snippetGenerator("imageToVideo", prepareImageToImageInput),
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import type { ImageTextToImageInput } from "@huggingface/tasks";
2+
import { resolveProvider } from "../../lib/getInferenceProviderMapping.js";
3+
import { getProviderHelper } from "../../lib/getProviderHelper.js";
4+
import type { BaseArgs, Options } from "../../types.js";
5+
import { innerRequest } from "../../utils/request.js";
6+
7+
export type ImageTextToImageArgs = BaseArgs & ImageTextToImageInput;
8+
9+
/**
10+
* This task takes an image and text input and outputs a new generated image.
11+
* Recommended model: black-forest-labs/FLUX.2-dev
12+
*/
13+
export async function imageTextToImage(args: ImageTextToImageArgs, options?: Options): Promise<Blob> {
14+
const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
15+
const providerHelper = getProviderHelper(provider, "image-text-to-image");
16+
const payload = await providerHelper.preparePayloadAsync(args);
17+
const { data: res, requestContext } = await innerRequest<Blob>(payload, providerHelper, {
18+
...options,
19+
task: "image-text-to-image",
20+
});
21+
return providerHelper.getResponse(res, requestContext.url, requestContext.info.headers as Record<string, string>);
22+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import type { ImageTextToVideoInput } from "@huggingface/tasks";
2+
import { resolveProvider } from "../../lib/getInferenceProviderMapping.js";
3+
import { getProviderHelper } from "../../lib/getProviderHelper.js";
4+
import type { BaseArgs, Options } from "../../types.js";
5+
import { innerRequest } from "../../utils/request.js";
6+
7+
export type ImageTextToVideoArgs = BaseArgs & ImageTextToVideoInput;
8+
9+
/**
10+
* This task takes an image and text input and outputs a generated video.
11+
* Recommended model: Lightricks/LTX-Video
12+
*/
13+
export async function imageTextToVideo(args: ImageTextToVideoArgs, options?: Options): Promise<Blob> {
14+
const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
15+
const providerHelper = getProviderHelper(provider, "image-text-to-video");
16+
const payload = await providerHelper.preparePayloadAsync(args);
17+
const { data: res, requestContext } = await innerRequest<Blob>(payload, providerHelper, {
18+
...options,
19+
task: "image-text-to-video",
20+
});
21+
return providerHelper.getResponse(res, requestContext.url, requestContext.info.headers as Record<string, string>);
22+
}

packages/inference/src/tasks/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ export * from "./cv/imageSegmentation.js";
1414
export * from "./cv/imageToImage.js";
1515
export * from "./cv/imageToText.js";
1616
export * from "./cv/imageToVideo.js";
17+
export * from "./cv/imageTextToImage.js";
18+
export * from "./cv/imageTextToVideo.js";
1719
export * from "./cv/objectDetection.js";
1820
export * from "./cv/textToImage.js";
1921
export * from "./cv/textToVideo.js";

packages/tasks/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/tasks",
33
"packageManager": "pnpm@10.10.0",
4-
"version": "0.19.65",
4+
"version": "0.19.66",
55
"description": "List of ML tasks for huggingface.co/tasks",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/tasks/src/local-apps.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -517,13 +517,6 @@ export const LOCAL_APPS = {
517517
model.tags.includes("coreml") && model.tags.includes("joyfusion") && model.pipeline_tag === "text-to-image",
518518
deeplink: (model) => new URL(`https://joyfusion.app/import_from_hf?repo_id=${model.id}`),
519519
},
520-
invoke: {
521-
prettyLabel: "Invoke",
522-
docsUrl: "https://github.com/invoke-ai/InvokeAI",
523-
mainTask: "text-to-image",
524-
displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
525-
deeplink: (model) => new URL(`https://models.invoke.ai/huggingface/${model.id}`),
526-
},
527520
ollama: {
528521
prettyLabel: "Ollama",
529522
docsUrl: "https://ollama.com",

packages/tasks/src/model-libraries-snippets.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,19 @@ output = model.generate(text)
331331
sf.write("simple.mp3", output, 44100)`,
332332
];
333333

334+
export const dia2 = (model: ModelData): string[] => [
335+
`from dia2 import Dia2, GenerationConfig, SamplingConfig
336+
337+
dia = Dia2.from_repo("${model.id}", device="cuda", dtype="bfloat16")
338+
config = GenerationConfig(
339+
cfg_scale=2.0,
340+
audio=SamplingConfig(temperature=0.8, top_k=50),
341+
use_cuda_graph=True,
342+
)
343+
result = dia.generate("[S1] Hello Dia2!", config=config, output_wav="hello.wav", verbose=True)
344+
`,
345+
];
346+
334347
export const describe_anything = (model: ModelData): string[] => [
335348
`# pip install git+https://github.com/NVlabs/describe-anything
336349
from huggingface_hub import snapshot_download

packages/tasks/src/model-libraries.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
293293
snippets: snippets.dia,
294294
filter: false,
295295
},
296+
dia2: {
297+
prettyLabel: "Dia2",
298+
repoName: "Dia2",
299+
repoUrl: "https://github.com/nari-labs/dia2",
300+
snippets: snippets.dia2,
301+
filter: false,
302+
},
296303
"diff-interpretation-tuning": {
297304
prettyLabel: "Diff Interpretation Tuning",
298305
repoName: "Diff Interpretation Tuning",
@@ -413,6 +420,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
413420
filter: true,
414421
countDownloads: `path_extension:"bin"`,
415422
},
423+
fixer: {
424+
prettyLabel: "Fixer",
425+
repoName: "Fixer",
426+
repoUrl: "https://github.com/nv-tlabs/Fixer",
427+
filter: false,
428+
countDownloads: `path:"pretrained/pretrained_fixer.pkl"`,
429+
},
416430
flair: {
417431
prettyLabel: "Flair",
418432
repoName: "Flair",

0 commit comments

Comments
 (0)