diff --git a/main/xiaozhi-server/config.yaml b/main/xiaozhi-server/config.yaml index 9649244ba..c1e122a02 100644 --- a/main/xiaozhi-server/config.yaml +++ b/main/xiaozhi-server/config.yaml @@ -314,6 +314,13 @@ LLM: bot_id: "你的bot_id" user_id: "你的user_id" personal_access_token: 你的coze个人令牌 + XaiLLM: + # 定义LLM API类型 + type: openai + api_key: 你的Xai web key # Xai API,需要先在xAI平台创建API密钥并获取api_key + model_name: "grok-3-fast-beta" + base_url: "https://api.x.ai/v1" + max_tokens: 131072 LMStudioLLM: # 定义LLM API类型 type: openai @@ -344,7 +351,7 @@ LLM: model_name: qwen2.5:3b-AWQ # 使用的小模型名称,用于意图识别 base_url: http://localhost:9997 # Xinference服务地址 TTS: - # 当前支持的type为edge、doubao,可自行适配 + # 当前支持的type为edge、doubao、azure,可自行适配 EdgeTTS: # 定义TTS API类型 type: edge @@ -365,6 +372,19 @@ TTS: appid: 你的火山引擎语音合成服务appid access_token: 你的火山引擎语音合成服务access_token cluster: volcano_tts + AzureTTS: + # 定义TTS API类型 + type: azure + # Azure语音服务订阅密钥,可在Azure门户获取 + # 创建资源地址:https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices + subscription_key: 你的Azure语音合成服务密钥 + # 服务区域,如eastus、westus等 + region: westus + # 语音名称,可选值参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts + voice_name: zh-CN-XiaochenMultilingualNeural + # 输出格式,可选值参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs + output_format: riff-16khz-16bit-mono-pcm + output_dir: tmp/ CosyVoiceSiliconflow: type: siliconflow # 硅基流动TTS diff --git a/main/xiaozhi-server/core/providers/tts/azure.py b/main/xiaozhi-server/core/providers/tts/azure.py new file mode 100644 index 000000000..8978b0315 --- /dev/null +++ b/main/xiaozhi-server/core/providers/tts/azure.py @@ -0,0 +1,80 @@ +import os +import asyncio +import aiohttp +import time +from .base import TTSProviderBase + +class TTSProvider(TTSProviderBase): + def __init__(self, config, delete_audio_file): + super().__init__(config, delete_audio_file) + self.subscription_key = config.get("subscription_key") + self.region = config.get("region", "eastus") + self.voice_name = config.get("voice_name", "zh-CN-YunxiNeural") + self.output_format = config.get("output_format", "audio-24khz-48kbitrate-mono-mp3") + self.api_url = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1" + self.token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issueToken" + self.access_token = None + self.token_expiry = 0 + + def generate_filename(self, extension=".wav"): + """生成唯一的音频文件名""" + return os.path.join(self.output_file, f"azure_tts_{os.urandom(4).hex()}{extension}") + + async def _get_access_token(self): + """获取Azure TTS访问令牌""" + if time.time() < self.token_expiry and self.access_token: + return self.access_token + + headers = { + "Ocp-Apim-Subscription-Key": self.subscription_key, + "Content-Type": "application/x-www-form-urlencoded" + } + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + self.token_url, + headers=headers + ) as response: + if response.status == 200: + self.access_token = await response.text() + self.token_expiry = time.time() + 540 # 令牌有效期9分钟(540秒) + return self.access_token + else: + error = await response.text() + raise Exception(f"获取Azure TTS令牌失败: {response.status} - {error}") + except Exception as e: + raise Exception(f"获取Azure TTS令牌异常: {e}") + + async def text_to_speak(self, text, output_file): + """调用Azure TTS API将文本转换为语音""" + token = await self._get_access_token() + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/ssml+xml", + "X-Microsoft-OutputFormat": self.output_format, + "User-Agent": "xiaozhi-server" + } + + ssml = f""" + + {text} + + """ + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + self.api_url, + headers=headers, + data=ssml.encode("utf-8") + ) as response: + if response.status == 200: + with open(output_file, "wb") as f: + f.write(await response.read()) + else: + error = await response.text() + headers = response.headers + raise Exception(f"Azure TTS请求失败: {response.status} - 错误信息: {error}, 完整响应: {response}") + except Exception as e: + raise Exception(f"Azure TTS请求异常: {e}")