diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index eb26e8e94e..a2576169f7 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -133,7 +133,7 @@ class InferenceClient: path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api) documentation for details). When passing a URL as `model`, the client will not append any suffix path to it. provider (`str`, *optional*): - Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`. + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"centml"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`. defaults to hf-inference (Hugging Face Serverless Inference API). If model is a URL or `base_url` is passed, then `provider` is not used. token (`str`, *optional*): diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py index c96b8700e1..aa3d9b4392 100644 --- a/src/huggingface_hub/inference/_providers/__init__.py +++ b/src/huggingface_hub/inference/_providers/__init__.py @@ -19,10 +19,11 @@ from .replicate import ReplicateTask, ReplicateTextToSpeechTask from .sambanova import SambanovaConversationalTask from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask - +from .centml import CentmlConversationalTask, CentmlTextGenerationTask PROVIDER_T = Literal[ "black-forest-labs", + "centml", "cerebras", "cohere", "fal-ai", @@ -41,6 +42,10 @@ "black-forest-labs": { "text-to-image": BlackForestLabsTextToImageTask(), }, + "centml": { + "conversational": CentmlConversationalTask(), + "text-generation": CentmlTextGenerationTask(), + }, "cerebras": { "conversational": CerebrasConversationalTask(), }, diff --git a/src/huggingface_hub/inference/_providers/centml.py b/src/huggingface_hub/inference/_providers/centml.py new file mode 100644 index 0000000000..8619bf2b8f --- /dev/null +++ b/src/huggingface_hub/inference/_providers/centml.py @@ -0,0 +1,50 @@ +from typing import Optional + +from huggingface_hub.inference._providers._common import ( + BaseConversationalTask, + BaseTextGenerationTask, +) + + +class CentmlConversationalTask(BaseConversationalTask): + """ + Provider helper for centml conversational (chat completions) tasks. + This helper builds requests in the OpenAI API format. + """ + + def __init__(self): + # Set the provider name to "centml" and use the centml serverless endpoint URL. + super().__init__(provider="centml", base_url="https://api.centml.com/openai") + + def _prepare_api_key(self, api_key: Optional[str]) -> str: + if api_key is None: + raise ValueError( + "An API key must be provided to use the centml provider.") + return api_key + + def _prepare_mapped_model(self, model: Optional[str]) -> str: + if model is None: + raise ValueError("Please provide a centml model ID.") + return model + + +class CentmlTextGenerationTask(BaseTextGenerationTask): + """ + Provider helper for centml text generation (completions) tasks. + This helper builds requests in the OpenAI API format. + """ + + def __init__(self): + super().__init__(provider="centml", base_url="https://api.centml.com/openai") + + def _prepare_api_key(self, api_key: Optional[str]) -> str: + if api_key is None: + raise ValueError( + "An API key must be provided to use the centml provider.") + return api_key + + def _prepare_mapped_model(self, model: Optional[str]) -> str: + if model is None: + raise ValueError("Please provide a centml model ID.") + return model + diff --git a/tests/cassettes/TestInferenceClient.test_chat_completion_no_stream[centml,conversational].yaml b/tests/cassettes/TestInferenceClient.test_chat_completion_no_stream[centml,conversational].yaml new file mode 100644 index 0000000000..b3a977f1de --- /dev/null +++ b/tests/cassettes/TestInferenceClient.test_chat_completion_no_stream[centml,conversational].yaml @@ -0,0 +1,75 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is deep learning?"}], "model": "meta-llama/Llama-3.3-70B-Instruct", + "stream": false}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '195' + Content-Type: + - application/json + X-Amzn-Trace-Id: + - 9f95510c-8aae-4df7-820e-eafbc8ad396f + method: POST + uri: https://api.centml.com/openai/v1/chat/completions + response: + body: + string: '{"id":"chatcmpl-de1b282d4615cdcf51313490db81295a","object":"chat.completion","created":1742933815,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"message":{"role":"assistant","reasoning_content":null,"content":"**Deep + Learning: An Overview**\n=====================================\n\nDeep learning + is a subset of machine learning that involves the use of artificial neural + networks to analyze and interpret data. These neural networks are designed + to mimic the structure and function of the human brain, with multiple layers + of interconnected nodes (neurons) that process and transmit information.\n\n**Key + Characteristics:**\n\n1. **Artificial Neural Networks**: Deep learning models + are based on artificial neural networks, which are composed of multiple layers + of nodes (neurons) that process and transmit information.\n2. **Multiple Layers**: + Deep learning models have multiple layers, each of which performs a specific + function, such as feature extraction, feature transformation, or classification.\n3. + **Hierarchical Representation**: Deep learning models learn hierarchical representations + of data, with early layers learning low-level features and later layers learning + higher-level features.\n4. **Large Amounts of Data**: Deep learning models + require large amounts of data to train, as they need to learn complex patterns + and relationships in the data.\n\n**Types of Deep Learning Models:**\n\n1. + **Convolutional Neural Networks (CNNs)**: Used for image and video processing, + CNNs are designed to extract features from spatially structured data.\n2. + **Recurrent Neural Networks (RNNs)**: Used for sequential data, such as speech + or text, RNNs are designed to model temporal relationships in data.\n3. **Autoencoders**: + Used for dimensionality reduction and generative modeling, autoencoders are + designed to learn compact representations of data.\n\n**Applications:**\n\n1. + **Computer Vision**: Deep learning models are widely used in computer vision + applications, such as image classification, object detection, and segmentation.\n2. + **Natural Language Processing**: Deep learning models are used in NLP applications, + such as language modeling, text classification, and machine translation.\n3. + **Speech Recognition**: Deep learning models are used in speech recognition + applications, such as speech-to-text and voice recognition.\n\n**Advantages:**\n\n1. + **High Accuracy**: Deep learning models can achieve high accuracy in complex + tasks, such as image recognition and speech recognition.\n2. **Flexibility**: + Deep learning models can be used in a wide range of applications, from computer + vision to NLP.\n3. **Scalability**: Deep learning models can be trained on + large datasets and can scale to large applications.\n\n**Challenges:**\n\n1. + **Computational Requirements**: Deep learning models require significant computational + resources to train and deploy.\n2. **Data Requirements**: Deep learning models + require large amounts of data to train, which can be difficult to obtain.\n3. + **Interpretability**: Deep learning models can be difficult to interpret, + making it challenging to understand why a particular decision was made.","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":46,"total_tokens":595,"completion_tokens":549,"prompt_tokens_details":null},"prompt_logprobs":null}' + headers: + content-type: + - application/json + date: + - Tue, 25 Mar 2025 20:16:54 GMT + server: + - istio-envoy + transfer-encoding: + - chunked + x-envoy-upstream-service-time: + - '3844' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestInferenceClient.test_chat_completion_with_stream[centml,conversational].yaml b/tests/cassettes/TestInferenceClient.test_chat_completion_with_stream[centml,conversational].yaml new file mode 100644 index 0000000000..a1debc14af --- /dev/null +++ b/tests/cassettes/TestInferenceClient.test_chat_completion_with_stream[centml,conversational].yaml @@ -0,0 +1,70 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is deep learning?"}], "model": "meta-llama/Llama-3.3-70B-Instruct", + "max_tokens": 20, "stream": true}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '212' + Content-Type: + - application/json + X-Amzn-Trace-Id: + - ad425e9f-bc1f-48df-b84d-a5ebad74cd66 + method: POST + uri: https://api.centml.com/openai/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"**"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"Deep"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":" + Learning Overview"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":"**\n=========================\n\nDeep + learning is a"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":" + subset"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":" + of machine learning that involves"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-7694e7d5663b4d0e22706f8260bae6df","object":"chat.completion.chunk","created":1742933819,"model":"meta-llama/Llama-3.3-70B-Instruct","choices":[{"index":0,"delta":{"content":" + the use"},"logprobs":null,"finish_reason":"length","stop_reason":null}]} + + + data: [DONE] + + + ' + headers: + content-type: + - text/event-stream; charset=utf-8; charset=utf-8 + date: + - Tue, 25 Mar 2025 20:16:58 GMT + server: + - istio-envoy + transfer-encoding: + - chunked + x-envoy-upstream-service-time: + - '328' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py index 85b9593b55..4e328d519b 100644 --- a/tests/test_inference_client.py +++ b/tests/test_inference_client.py @@ -63,6 +63,10 @@ "black-forest-labs": { "text-to-image": "black-forest-labs/FLUX.1-dev", }, + "centml": { + "conversational": "meta-llama/Llama-3.3-70B-Instruct", + "text-generation": "meta-llama/Llama-3.2-3B-Instruct", + }, "cerebras": { "conversational": "meta-llama/Llama-3.3-70B-Instruct", },