Skip to content

Use makeRequestOptions to generate inference snippets #1273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a3d70c0
focus on conversational for now
Wauplin Mar 12, 2025
c8aae06
Split makeRequestOptions into sync and async parts
Wauplin Mar 12, 2025
ea13266
Make snippets depend on makeRequestOptionsFromResolvedModel
Wauplin Mar 12, 2025
8ab7172
revert back to hf_token
Wauplin Mar 12, 2025
359a87e
ASR
Wauplin Mar 12, 2025
b7ac932
conversational-llm-stream
Wauplin Mar 12, 2025
dc76e7b
VLMs + fix in InferenceClient
Wauplin Mar 12, 2025
a6e6f25
DQA
Wauplin Mar 12, 2025
6ebe167
imageToImage
Wauplin Mar 12, 2025
5287f97
tabular
Wauplin Mar 12, 2025
bea1f97
text-to-aduio
Wauplin Mar 12, 2025
0a1ab31
text-to-image
Wauplin Mar 12, 2025
1925c61
text-to-video
Wauplin Mar 12, 2025
c489845
basic
Wauplin Mar 12, 2025
281ce27
text-classification
Wauplin Mar 12, 2025
f1c3367
basic-snippet--token-classification
Wauplin Mar 12, 2025
d776a4b
zero shot class
Wauplin Mar 12, 2025
bd36835
zero shot image class
Wauplin Mar 12, 2025
8c82e7a
textToAduio
Wauplin Mar 12, 2025
2cbdcec
remove conversational requests for now
Wauplin Mar 12, 2025
8a482e0
fix typing
Wauplin Mar 12, 2025
20ec0b0
Merge branch 'main' into fix-openai-inference-snippets
Wauplin Mar 12, 2025
ace6c41
fix lock
Wauplin Mar 12, 2025
8562523
might fix?
Wauplin Mar 12, 2025
52d3245
add Python request conversational
Wauplin Mar 13, 2025
c26b0bb
Add conversational snippets for requests
Wauplin Mar 13, 2025
7f574bd
better
Wauplin Mar 13, 2025
d443069
Pass provider inputs in raw payloads
Wauplin Mar 13, 2025
bfc4b4d
Merge branch 'main' into fix-openai-inference-snippets
Wauplin Mar 13, 2025
3d904d2
Merge branch 'main' into fix-openai-inference-snippets
Wauplin Mar 14, 2025
fc97b6a
Merge branch 'main' into fix-openai-inference-snippets
Wauplin Mar 17, 2025
c706553
Update packages/inference/src/lib/makeRequestOptions.ts
Wauplin Mar 18, 2025
7f6c7dd
Update packages/inference/src/providers/novita.ts
Wauplin Mar 18, 2025
df783f8
fix DQA
Wauplin Mar 18, 2025
16ae575
fix
Wauplin Mar 18, 2025
186a347
Generate js and curl snippets using templates (#1291)
Wauplin Mar 18, 2025
633847b
Merge branch 'main' into fix-openai-inference-snippets
Wauplin Mar 18, 2025
7686739
Merge branch 'main' into fix-openai-inference-snippets
Wauplin Mar 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion packages/inference/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
"prepublishOnly": "pnpm run build",
"test": "vitest run --config vitest.config.mts",
"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
"check": "tsc"
"check": "tsc",
"dev": "tsup src/index.ts --format cjs,esm --watch"
},
"dependencies": {
"@huggingface/tasks": "workspace:^",
Expand Down
63 changes: 43 additions & 20 deletions packages/inference/src/lib/makeRequestOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ const providerConfigs: Record<InferenceProvider, ProviderConfig> = {
};

/**
* Helper that prepares request arguments
* Helper that prepares request arguments.
* This async version handle the model ID resolution step.
*/
export async function makeRequestOptions(
args: RequestArgs & {
Expand All @@ -56,17 +57,15 @@ export async function makeRequestOptions(
/** In most cases (unless we pass a endpointUrl) we know the task */
task?: InferenceTask;
chatCompletion?: boolean;
/* Used internally to generate inference snippets (in which case model mapping is done separately) */
skipModelIdResolution?: boolean;
}
): Promise<{ url: string; info: RequestInit }> {
const { accessToken, endpointUrl, provider: maybeProvider, model: maybeModel, ...remainingArgs } = args;
const { provider: maybeProvider, model: maybeModel } = args;
const provider = maybeProvider ?? "hf-inference";
const providerConfig = providerConfigs[provider];
const { task, chatCompletion } = options ?? {};

const { includeCredentials, task, chatCompletion, signal, skipModelIdResolution } = options ?? {};

if (endpointUrl && provider !== "hf-inference") {
// Validate inputs
if (args.endpointUrl && provider !== "hf-inference") {
throw new Error(`Cannot use endpointUrl with a third-party provider.`);
}
if (maybeModel && isUrl(maybeModel)) {
Expand All @@ -81,19 +80,43 @@ export async function makeRequestOptions(
if (providerConfig.clientSideRoutingOnly && !maybeModel) {
throw new Error(`Provider ${provider} requires a model ID to be passed directly.`);
}

// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const hfModel = maybeModel ?? (await loadDefaultModel(task!));
const model = skipModelIdResolution
? hfModel
: providerConfig.clientSideRoutingOnly
? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
removeProviderPrefix(maybeModel!, provider)
: // For closed-models API providers, one needs to pass the model ID directly (e.g. "gpt-3.5-turbo")
await getProviderModelId({ model: hfModel, provider }, args, {
task,
chatCompletion,
fetch: options?.fetch,
});
const resolvedModel = providerConfig.clientSideRoutingOnly
? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
removeProviderPrefix(maybeModel!, provider)
: await getProviderModelId({ model: hfModel, provider }, args, {
task,
chatCompletion,
fetch: options?.fetch,
});

// Use the sync version with the resolved model
return makeRequestOptionsFromResolvedModel(resolvedModel, args, options);
}

/**
* Helper that prepares request arguments. - for internal use only
* This sync version skips the model ID resolution step
*/
export function makeRequestOptionsFromResolvedModel(
resolvedModel: string,
args: RequestArgs & {
data?: Blob | ArrayBuffer;
stream?: boolean;
},
options?: Options & {
task?: InferenceTask;
chatCompletion?: boolean;
}
): { url: string; info: RequestInit } {
const { accessToken, endpointUrl, provider: maybeProvider, model, ...remainingArgs } = args;

const provider = maybeProvider ?? "hf-inference";
const providerConfig = providerConfigs[provider];

const { includeCredentials, task, chatCompletion, signal } = options ?? {};

const authMethod = (() => {
if (providerConfig.clientSideRoutingOnly) {
Expand Down Expand Up @@ -123,7 +146,7 @@ export async function makeRequestOptions(
authMethod !== "provider-key"
? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", provider)
: providerConfig.baseUrl,
model,
model: resolvedModel,
chatCompletion,
task,
});
Expand Down Expand Up @@ -154,7 +177,7 @@ export async function makeRequestOptions(
: JSON.stringify(
providerConfig.makeBody({
args: remainingArgs as Record<string, unknown>,
model,
model: resolvedModel,
task,
chatCompletion,
})
Expand Down
2 changes: 1 addition & 1 deletion packages/inference/src/providers/fireworks-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const makeHeaders = (params: HeaderParams): Record<string, string> => {
};

const makeUrl = (params: UrlParams): string => {
if (params.task === "text-generation" && params.chatCompletion) {
if (params.chatCompletion) {
return `${params.baseUrl}/inference/v1/chat/completions`;
}
return `${params.baseUrl}/inference`;
Expand Down
2 changes: 1 addition & 1 deletion packages/inference/src/providers/hf-inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const makeUrl = (params: UrlParams): string => {
/// when deployed on hf-inference, those two tasks are automatically compatible with one another.
return `${params.baseUrl}/pipeline/${params.task}/${params.model}`;
}
if (params.task === "text-generation" && params.chatCompletion) {
if (params.chatCompletion) {
return `${params.baseUrl}/models/${params.model}/v1/chat/completions`;
}
return `${params.baseUrl}/models/${params.model}`;
Expand Down
6 changes: 3 additions & 3 deletions packages/inference/src/providers/nebius.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ const makeUrl = (params: UrlParams): string => {
if (params.task === "text-to-image") {
return `${params.baseUrl}/v1/images/generations`;
}
if (params.chatCompletion) {
return `${params.baseUrl}/v1/chat/completions`;
}
if (params.task === "text-generation") {
if (params.chatCompletion) {
return `${params.baseUrl}/v1/chat/completions`;
}
return `${params.baseUrl}/v1/completions`;
}
return params.baseUrl;
Expand Down
7 changes: 3 additions & 4 deletions packages/inference/src/providers/novita.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ const makeHeaders = (params: HeaderParams): Record<string, string> => {
};

const makeUrl = (params: UrlParams): string => {
if (params.task === "text-generation") {
if (params.chatCompletion) {
return `${params.baseUrl}/v3/openai/chat/completions`;
}
if (params.chatCompletion) {
return `${params.baseUrl}/v3/openai/chat/completions`;
} else if (params.task === "text-generation") {
return `${params.baseUrl}/v3/openai/completions`;
} else if (params.task === "text-to-video") {
return `${params.baseUrl}/v3/hf/${params.model}`;
Expand Down
2 changes: 1 addition & 1 deletion packages/inference/src/providers/sambanova.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const makeHeaders = (params: HeaderParams): Record<string, string> => {
};

const makeUrl = (params: UrlParams): string => {
if (params.task === "text-generation" && params.chatCompletion) {
if (params.chatCompletion) {
return `${params.baseUrl}/v1/chat/completions`;
}
return params.baseUrl;
Expand Down
6 changes: 3 additions & 3 deletions packages/inference/src/providers/together.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ const makeUrl = (params: UrlParams): string => {
if (params.task === "text-to-image") {
return `${params.baseUrl}/v1/images/generations`;
}
if (params.chatCompletion) {
return `${params.baseUrl}/v1/chat/completions`;
}
if (params.task === "text-generation") {
if (params.chatCompletion) {
return `${params.baseUrl}/v1/chat/completions`;
}
return `${params.baseUrl}/v1/completions`;
}
return params.baseUrl;
Expand Down
177 changes: 0 additions & 177 deletions packages/inference/src/snippets/curl.ts

This file was deleted.

Loading