Files
Fay/tts/volcano_tts.py
xszyou 2a204a13ed fay进化
1、支持mcp sse服务管理及动态功具调用
--对摩搭社区提供的sse对接的mcp工具做过兼容测试
2、llm模块统一
--严格匹配openai兼容接口范式,包括token计算及流式控制等
--把认知模式作为llm对接的方式,并对认知模型进行了优化升级,可控制各用户聊天认知是否隔离
--使用qwen3-4b作为默认llm,可以平衡速度、角色模拟与MCP工具调用
3、提供配置管理中心,便于多个fay做配置管理
--当根目录中system.conf或config.json不存在,启动时会自动载入config_uitl.py中指定的配置
--配置中心代码支持多项目管理,单项目也支持多fay对接,源码地址:https://github.com/xszyou/fay_config_server
4、多个bug修复
--修复重复反思和重复保存的问题
--修正认知模型的反思和检索算法
--处理pygame报错程序不继续执行问题
--表情不进行tts
--运行时再判断加载qt5
--修复文字交互接口流式输出时不会结束问题
--修复文字交互接口流式输出qa不输出问题
2025-05-17 01:25:45 +08:00

92 lines
3.1 KiB
Python

import base64
import json
import uuid
import requests
import time
from utils import util, config_util
from utils import config_util as cfg
import wave
class Speech:
def __init__(self):
self.appid = cfg.volcano_tts_appid
self.access_token = cfg.volcano_tts_access_token
self.cluster = cfg.volcano_tts_cluster
self.__history_data = []
def connect(self):
pass
def __get_history(self, voice_name, style, text):
for data in self.__history_data:
if data[0] == voice_name and data[1] == style and data[2] == text:
return data[3]
return None
def to_sample(self, text, style) :
if cfg.volcano_tts_voice_type != None and cfg.volcano_tts_voice_type != '':
voice = cfg.volcano_tts_voice_type
else:
voice = config_util.config["attribute"]["voice"] if config_util.config["attribute"]["voice"] is not None and config_util.config["attribute"]["voice"].strip() != "" else "爽快思思/Skye"
try:
history = self.__get_history(voice, style, text)
if history is not None:
return history
host = "openspeech.bytedance.com"
api_url = f"https://{host}/api/v1/tts"
header = {"Authorization": f"Bearer;{self.access_token}"}
request_json = {
"app": {
"appid": self.appid,
"token": "access_token",
"cluster": self.cluster
},
"user": {
"uid": "388808087185088"
},
"audio": {
"voice_type": voice,
"encoding": "wav",
"speed_ratio": 1.0,
"volume_ratio": 1.0,
"pitch_ratio": 1.0,
},
"request": {
"reqid": str(uuid.uuid4()),
"text": text,
"text_type": "plain",
"operation": "query",
"with_frontend": 1,
"frontend_type": "unitTson"
}
}
response = requests.post(api_url, json.dumps(request_json), headers=header)
if "data" in response.json():
data = response.json()["data"]
file_url = './samples/sample-' + str(int(time.time() * 1000)) + '.wav'
with wave.open(file_url, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(base64.b64decode(data))
else :
util.log(1, "[x] 语音转换失败!")
file_url = None
return file_url
return file_url
except Exception as e :
util.log(1, "[x] 语音转换失败!")
util.log(1, "[x] 原因: " + str(str(e)))
file_url = None
return file_url
def close(self):
pass