diff --git a/TTS/testTtsModel.py b/TTS/testTtsModel.py new file mode 100644 index 0000000..aa79d8a --- /dev/null +++ b/TTS/testTtsModel.py @@ -0,0 +1,57 @@ +# coding=utf-8 +# Installation instructions for pyaudio: +# CentOS +# sudo yum install -y portaudio portaudio-devel && pip install pyaudio +# Microsoft Windows +# python -m pip install pyaudio + +import os +import sys + +# 添加项目根目录到 sys.path 以便导入 Util +root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if root_dir not in sys.path: + sys.path.append(root_dir) + +from Util.AlyTtsKit import QwenTTSManager + +# ======= 测试配置 ======= +VOICE_FILE_PATH = os.path.join(os.path.dirname(__file__), "voice.mp3") + +def main(): + """ + 测试函数 + """ + print('[系统] 启动 Qwen TTS 测试 (使用 AlyTtsKit)...') + + # 待合成的文本 + test_texts = [ + '您好,我是您的 AI 助手。', + '现在正在测试抽取到 Util 目录下的 AlyTtsKit 功能。', + '希望这次重构能让代码更加模块化!' + ] + + try: + # 1. 初始化管理器 + tts_manager = QwenTTSManager() + + # 2. 创建/获取音色 + if not os.path.exists(VOICE_FILE_PATH): + print(f'[错误] 测试音频文件不存在: {VOICE_FILE_PATH}') + return + + print('[系统] 正在进行声音复刻...') + voice_id = tts_manager.create_voice_enrollment(VOICE_FILE_PATH) + print(f'[系统] 声音复刻成功,Voice ID: {voice_id}') + + # 3. 执行合成并播放 + print('[系统] 开始语音合成...') + tts_manager.start_synthesis(voice_id, test_texts) + + print('[系统] 测试完成。') + + except Exception as e: + print(f'[系统] 发生错误: {e}') + +if __name__ == '__main__': + main() diff --git a/Util/AlyTtsKit.py b/Util/AlyTtsKit.py new file mode 100644 index 0000000..c724ea8 --- /dev/null +++ b/Util/AlyTtsKit.py @@ -0,0 +1,136 @@ +# coding=utf-8 +import pyaudio +import os +import sys +import requests +import base64 +import pathlib +import threading +import time +import dashscope +from dashscope.audio.qwen_tts_realtime import QwenTtsRealtime, QwenTtsRealtimeCallback, AudioFormat + +# 添加项目根目录到 sys.path 以便导入 Config +root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if root_dir not in sys.path: + sys.path.append(root_dir) + +try: + from Config.Config import ALY_LLM_API_KEY +except ImportError: + ALY_LLM_API_KEY = os.getenv("DASHSCOPE_API_KEY") + +# ======= 常量配置 ======= +DEFAULT_TARGET_MODEL = "qwen3-tts-vc-realtime-2026-01-15" +DEFAULT_PREFERRED_NAME = "guanyu" +DEFAULT_AUDIO_MIME_TYPE = "audio/mpeg" + +class MyCallback(QwenTtsRealtimeCallback): + """ + 自定义 TTS 流式回调 + """ + def __init__(self): + self.complete_event = threading.Event() + self._player = pyaudio.PyAudio() + self._stream = self._player.open( + format=pyaudio.paInt16, channels=1, rate=24000, output=True + ) + + def on_open(self) -> None: + print('[TTS] 连接已建立') + + def on_close(self, close_status_code, close_msg) -> None: + if self._stream: + self._stream.stop_stream() + self._stream.close() + if self._player: + self._player.terminate() + print(f'[TTS] 连接关闭 code={close_status_code}, msg={close_msg}') + + def on_event(self, response: dict) -> None: + try: + event_type = response.get('type', '') + if event_type == 'session.created': + print(f'[TTS] 会话开始: {response["session"]["id"]}') + elif event_type == 'response.audio.delta': + audio_data = base64.b64decode(response['delta']) + self._stream.write(audio_data) + elif event_type == 'response.done': + print(f'[TTS] 响应完成') + elif event_type == 'session.finished': + print('[TTS] 会话结束') + self.complete_event.set() + except Exception as e: + print(f'[Error] 处理回调事件异常: {e}') + + def wait_for_finished(self): + self.complete_event.wait() + +class QwenTTSManager: + """ + 通义千问实时语音合成管理类 + """ + def __init__(self, api_key=ALY_LLM_API_KEY, model=DEFAULT_TARGET_MODEL): + self.api_key = api_key + self.model = model + dashscope.api_key = self.api_key + self.callback = None + self.qwen_tts_realtime = None + + def create_voice_enrollment(self, file_path, preferred_name=DEFAULT_PREFERRED_NAME): + """ + 创建声音复刻音色 + """ + file_path_obj = pathlib.Path(file_path) + if not file_path_obj.exists(): + raise FileNotFoundError(f"音频文件不存在: {file_path}") + + base64_str = base64.b64encode(file_path_obj.read_bytes()).decode() + data_uri = f"data:{DEFAULT_AUDIO_MIME_TYPE};base64,{base64_str}" + + url = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/customization" + payload = { + "model": "qwen-voice-enrollment", + "input": { + "action": "create", + "target_model": self.model, + "preferred_name": preferred_name, + "audio": {"data": data_uri} + } + } + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + resp = requests.post(url, json=payload, headers=headers) + if resp.status_code != 200: + raise RuntimeError(f"创建 voice 失败: {resp.status_code}, {resp.text}") + + return resp.json()["output"]["voice"] + + def start_synthesis(self, voice_id, text_list): + """ + 开始实时语音合成并播放 + """ + self.callback = MyCallback() + self.qwen_tts_realtime = QwenTtsRealtime( + model=self.model, + callback=self.callback, + url='wss://dashscope.aliyuncs.com/api-ws/v1/realtime' + ) + self.qwen_tts_realtime.connect() + + self.qwen_tts_realtime.update_session( + voice=voice_id, + response_format=AudioFormat.PCM_24000HZ_MONO_16BIT, + mode='server_commit' + ) + + for text in text_list: + print(f'[发送文本]: {text}') + self.qwen_tts_realtime.append_text(text) + time.sleep(0.1) + + self.qwen_tts_realtime.finish() + self.callback.wait_for_finished()