xinnan-tech · Ballen2270 · Apr 11, 2025 · Apr 11, 2025 · Apr 14, 2025
diff --git a/main/xiaozhi-server/config.yaml b/main/xiaozhi-server/config.yaml
@@ -314,6 +314,13 @@ LLM:
     bot_id: "你的bot_id"
     user_id: "你的user_id"
     personal_access_token: 你的coze个人令牌
+  XaiLLM:
+    # 定义LLM API类型
+    type: openai
+    api_key: 你的Xai web key     # Xai API，需要先在xAI平台创建API密钥并获取api_key
+    model_name: "grok-3-fast-beta"
+    base_url: "https://api.x.ai/v1"
+    max_tokens: 131072
   LMStudioLLM:
     # 定义LLM API类型
     type: openai
@@ -344,7 +351,7 @@ LLM:
     model_name: qwen2.5:3b-AWQ  # 使用的小模型名称，用于意图识别
     base_url: http://localhost:9997  # Xinference服务地址
 TTS:
-  # 当前支持的type为edge、doubao，可自行适配
+  # 当前支持的type为edge、doubao、azure，可自行适配
   EdgeTTS:
     # 定义TTS API类型
     type: edge
@@ -365,6 +372,19 @@ TTS:
     appid: 你的火山引擎语音合成服务appid
     access_token: 你的火山引擎语音合成服务access_token
     cluster: volcano_tts
+  AzureTTS:
+    # 定义TTS API类型
+    type: azure
+    # Azure语音服务订阅密钥，可在Azure门户获取
+    # 创建资源地址：https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices
+    subscription_key: 你的Azure语音合成服务密钥
+    # 服务区域，如eastus、westus等
+    region: westus
+    # 语音名称，可选值参考：https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts
+    voice_name: zh-CN-XiaochenMultilingualNeural
+    # 输出格式，可选值参考：https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs
+    output_format: riff-16khz-16bit-mono-pcm
+    output_dir: tmp/
   CosyVoiceSiliconflow:
     type: siliconflow
     # 硅基流动TTS

diff --git a/main/xiaozhi-server/core/providers/tts/azure.py b/main/xiaozhi-server/core/providers/tts/azure.py
@@ -0,0 +1,80 @@
+import os
+import asyncio
+import aiohttp
+import time
+from .base import TTSProviderBase
+
+class TTSProvider(TTSProviderBase):
+    def __init__(self, config, delete_audio_file):
+        super().__init__(config, delete_audio_file)
+        self.subscription_key = config.get("subscription_key")
+        self.region = config.get("region", "eastus")
+        self.voice_name = config.get("voice_name", "zh-CN-YunxiNeural")
+        self.output_format = config.get("output_format", "audio-24khz-48kbitrate-mono-mp3")
+        self.api_url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
+        self.token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issueToken"
+        self.access_token = None
+        self.token_expiry = 0
+
+    def generate_filename(self, extension=".wav"):
+        """生成唯一的音频文件名"""
+        return os.path.join(self.output_file, f"azure_tts_{os.urandom(4).hex()}{extension}")
+
+    async def _get_access_token(self):
+        """获取Azure TTS访问令牌"""
+        if time.time() < self.token_expiry and self.access_token:
+            return self.access_token
+
+        headers = {
+            "Ocp-Apim-Subscription-Key": self.subscription_key,
+            "Content-Type": "application/x-www-form-urlencoded"
+        }
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    self.token_url,
+                    headers=headers
+                ) as response:
+                    if response.status == 200:
+                        self.access_token = await response.text()
+                        self.token_expiry = time.time() + 540  # 令牌有效期9分钟(540秒)
+                        return self.access_token
+                    else:
+                        error = await response.text()
+                        raise Exception(f"获取Azure TTS令牌失败: {response.status} - {error}")
+        except Exception as e:
+            raise Exception(f"获取Azure TTS令牌异常: {e}")
+
+    async def text_to_speak(self, text, output_file):
+        """调用Azure TTS API将文本转换为语音"""
+        token = await self._get_access_token()
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/ssml+xml",
+            "X-Microsoft-OutputFormat": self.output_format,
+            "User-Agent": "xiaozhi-server"
+        }
+
+        ssml = f"""<speak version='1.0' xml:lang='zh-CN'>
+            <voice name='{self.voice_name}'>
+                {text}
+            </voice>
+        </speak>"""
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    self.api_url,
+                    headers=headers,
+                    data=ssml.encode("utf-8")
+                ) as response:
+                    if response.status == 200:
+                        with open(output_file, "wb") as f:
+                            f.write(await response.read())
+                    else:
+                        error = await response.text()
+                        headers = response.headers
+                        raise Exception(f"Azure TTS请求失败: {response.status} - 错误信息: {error}, 完整响应: {response}")
+        except Exception as e:
+            raise Exception(f"Azure TTS请求异常: {e}")