Files
Fay/core/fay_core.py
xszyou 4889583cc1 自然进化
1.恢复文字、唤醒词、意图接口打断功能;
2、新增支持本地mcp工具调用;
3、支持mcp工具独立控制;
4、内置mcp工具箱及日程管理mcp工具;
5、结束fay时主动关闭(断开)mcp服务;
6、优化线程管理逻辑;
7、支持ctrl+c退出fay。
2025-08-28 00:24:21 +08:00

616 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#作用是处理交互逻辑,文字输入,语音、文字及情绪的发送、播放及展示输出
import math
from operator import index
import os
import time
import socket
import requests
from pydub import AudioSegment
from queue import Queue
import re # 添加正则表达式模块用于过滤表情符号
import uuid
# 适应模型使用
import numpy as np
from ai_module import baidu_emotion
from core import wsa_server
from core.interact import Interact
from tts.tts_voice import EnumVoice
from scheduler.thread_manager import MyThread
from tts import tts_voice
from utils import util, config_util
from core import qa_service
from utils import config_util as cfg
from core import content_db
from ai_module import nlp_cemotion
from llm import nlp_cognitive_stream
from core import stream_manager
from core import member_db
import threading
#加载配置
cfg.load_config()
if cfg.tts_module =='ali':
from tts.ali_tss import Speech
elif cfg.tts_module == 'gptsovits':
from tts.gptsovits import Speech
elif cfg.tts_module == 'gptsovits_v3':
from tts.gptsovits_v3 import Speech
elif cfg.tts_module == 'volcano':
from tts.volcano_tts import Speech
else:
from tts.ms_tts_sdk import Speech
#windows运行推送唇形数据
import platform
if platform.system() == "Windows":
import sys
sys.path.append("test/ovr_lipsync")
from test_olipsync import LipSyncGenerator
#可以使用自动播报的标记
can_auto_play = True
auto_play_lock = threading.RLock()
class FeiFei:
def __init__(self):
self.lock = threading.Lock()
self.nlp_streams = {} # 存储用户ID到句子缓存的映射
self.nlp_stream_lock = threading.Lock() # 保护nlp_streams字典的锁
self.mood = 0.0 # 情绪值
self.old_mood = 0.0
self.item_index = 0
self.X = np.array([1, 0, 0, 0, 0, 0, 0, 0]).reshape(1, -1) # 适应模型变量矩阵
# self.W = np.array([0.01577594,1.16119452,0.75828,0.207746,1.25017864,0.1044121,0.4294899,0.2770932]).reshape(-1,1) #适应模型变量矩阵
self.W = np.array([0.0, 0.6, 0.1, 0.7, 0.3, 0.0, 0.0, 0.0]).reshape(-1, 1) # 适应模型变量矩阵
self.wsParam = None
self.wss = None
self.sp = Speech()
self.speaking = False #声音是否在播放
self.__running = True
self.sp.connect() #TODO 预连接
self.cemotion = None
self.timer = None
self.sound_query = Queue()
self.think_mode_users = {} # 使用字典存储每个用户的think模式状态
self.think_time_users = {} #使用字典存储每个用户的think开始时间
self.user_conv_map = {} #存储用户对话id及句子流序号
def __remove_emojis(self, text):
"""
改进的表情包过滤避免误删除正常Unicode字符
"""
# 更精确的emoji范围避免误删除正常字符
emoji_pattern = re.compile(
"["
"\U0001F600-\U0001F64F" # 表情符号 (Emoticons)
"\U0001F300-\U0001F5FF" # 杂项符号和象形文字 (Miscellaneous Symbols and Pictographs)
"\U0001F680-\U0001F6FF" # 交通和地图符号 (Transport and Map Symbols)
"\U0001F1E0-\U0001F1FF" # 区域指示符号 (Regional Indicator Symbols)
"\U0001F900-\U0001F9FF" # 补充符号和象形文字 (Supplemental Symbols and Pictographs)
"\U0001FA70-\U0001FAFF" # 扩展A符号和象形文字 (Symbols and Pictographs Extended-A)
"\U00002600-\U000026FF" # 杂项符号 (Miscellaneous Symbols)
"\U00002700-\U000027BF" # 装饰符号 (Dingbats)
"\U0000FE00-\U0000FE0F" # 变体选择器 (Variation Selectors)
"\U0001F000-\U0001F02F" # 麻将牌 (Mahjong Tiles)
"\U0001F0A0-\U0001F0FF" # 扑克牌 (Playing Cards)
"]+",
flags=re.UNICODE,
)
# 保护常用的中文标点符号和特殊字符
protected_chars = ["", "", "", "", "", "", "", """, """, "'", "'", "", "", "", "", "", ""]
# 先保存保护字符的位置
protected_positions = {}
for i, char in enumerate(text):
if char in protected_chars:
protected_positions[i] = char
# 执行emoji过滤
filtered_text = emoji_pattern.sub('', text)
# 如果过滤后文本长度变化太大,可能误删了正常字符,返回原文本
if len(filtered_text) < len(text) * 0.5: # 如果删除了超过50%的内容
return text
return filtered_text
def __process_qa_stream(self, text, username):
"""
按流式方式分割和发送Q&A答案
使用安全的流式文本处理器和状态管理器
"""
if not text or text.strip() == "":
return
# 使用安全的流式文本处理器
from utils.stream_text_processor import get_processor
from utils.stream_state_manager import get_state_manager
processor = get_processor()
state_manager = get_state_manager()
# 处理Q&A流式文本is_qa=True表示Q&A模式
success = processor.process_stream_text(text, username, is_qa=True, session_type="qa")
if success:
# Q&A模式结束会话不再需要发送额外的结束标记
state_manager.end_session(username)
else:
util.log(1, f"Q&A流式处理失败文本长度: {len(text)}")
# 失败时也要确保结束会话
state_manager.force_reset_user_state(username)
#语音消息处理检查是否命中q&a
def __get_answer(self, interleaver, text):
answer = None
# 全局问答
answer, type = qa_service.QAService().question('qa',text)
if answer is not None:
return answer, type
else:
return None, None
#语音消息处理
def __process_interact(self, interact: Interact):
if self.__running:
try:
index = interact.interact_type
username = interact.data.get("user", "User")
uid = member_db.new_instance().find_user(username)
if index == 1: #语音文字交互
#记录用户问题,方便obs等调用
self.write_to_file("./logs", "asr_result.txt", interact.data["msg"])
#同步用户问题到数字人
if wsa_server.get_instance().is_connected(username):
content = {'Topic': 'human', 'Data': {'Key': 'question', 'Value': interact.data["msg"]}, 'Username' : interact.data.get("user")}
wsa_server.get_instance().add_cmd(content)
#记录用户问题
content_id = content_db.new_instance().add_content('member','speak',interact.data["msg"], username, uid)
if wsa_server.get_web_instance().is_connected(username):
wsa_server.get_web_instance().add_cmd({"panelReply": {"type":"member","content":interact.data["msg"], "username":username, "uid":uid, "id":content_id}, "Username" : username})
#确定是否命中q&a
answer, type = self.__get_answer(interact.interleaver, interact.data["msg"])
#大语言模型回复
text = ''
if answer is None or type != "qa":
if wsa_server.get_web_instance().is_connected(username):
wsa_server.get_web_instance().add_cmd({"panelMsg": "思考中...", "Username" : username, 'robot': f'{cfg.fay_url}/robot/Thinking.jpg'})
if wsa_server.get_instance().is_connected(username):
content = {'Topic': 'human', 'Data': {'Key': 'log', 'Value': "思考中..."}, 'Username' : username, 'robot': f'{cfg.fay_url}/robot/Thinking.jpg'}
wsa_server.get_instance().add_cmd(content)
text = nlp_cognitive_stream.question(interact.data["msg"], username, interact.data.get("observation", None))
else:
text = answer
# 使用流式分割处理Q&A答案
self.__process_qa_stream(text, username)
#完整文本记录回复并输出到各个终端
self.__process_text_output(text, username, uid )
return text
elif (index == 2):#透传模式用于适配自动播报控制及agent的通知工具
if interact.data.get("text"):
text = interact.data.get("text")
# 使用统一的文本处理方法,空列表表示没有额外回复
self.__process_text_output(text, username, uid)
MyThread(target=self.say, args=[interact, text]).start()
return 'success'
except BaseException as e:
print(e)
return e
else:
return "还没有开始运行"
#记录问答到log
def write_to_file(self, path, filename, content):
if not os.path.exists(path):
os.makedirs(path)
full_path = os.path.join(path, filename)
with open(full_path, 'w', encoding='utf-8') as file:
file.write(content)
file.flush()
os.fsync(file.fileno())
#触发语音交互
def on_interact(self, interact: Interact):
#创建用户
username = interact.data.get("user", "User")
if member_db.new_instance().is_username_exist(username) == "notexists":
member_db.new_instance().add_user(username)
MyThread(target=self.__process_interact, args=[interact]).start()
return None
#获取不同情绪声音
def __get_mood_voice(self):
voice = tts_voice.get_voice_of(config_util.config["attribute"]["voice"])
if voice is None:
voice = EnumVoice.XIAO_XIAO
styleList = voice.value["styleList"]
sayType = styleList["calm"]
return sayType
# 合成声音
def say(self, interact, text, type = ""):
try:
uid = member_db.new_instance().find_user(interact.data.get("user"))
is_end = interact.data.get("isend", False)
is_first = interact.data.get("isfirst", False)
if is_first == True:
conv = "conv_" + str(uuid.uuid4())
conv_no = 0
self.user_conv_map[interact.data.get("user", "User")] = {"conversation_id" : conv, "conversation_msg_no" : conv_no}
else:
self.user_conv_map[interact.data.get("user", "User")]["conversation_msg_no"] += 1
if not is_first and not is_end and (text is None or text.strip() == ""):
return None
self.__send_panel_message(text, interact.data.get('user'), uid, 0, type)
# 处理think标签
is_start_think = False
# 第一步:处理结束标记</think>
if "</think>" in text:
# 设置用户退出思考模式
self.think_mode_users[uid] = False
# 分割文本,提取</think>后面的内容
# 如果有多个</think>,我们只关心最后一个后面的内容
parts = text.split("</think>")
text = parts[-1].strip()
# 如果提取出的文本为空,则不需要继续处理
if text == "":
return None
# 第二步:处理开始标记<think>
# 注意这里要检查经过上面处理后的text
if "<think>" in text:
is_start_think = True
self.think_mode_users[uid] = True
self.think_time_users[uid] = time.time()
if self.think_mode_users.get(uid, False) and is_start_think:
if wsa_server.get_web_instance().is_connected(interact.data.get('user')):
wsa_server.get_web_instance().add_cmd({"panelMsg": "思考中...", "Username" : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Thinking.jpg'})
if wsa_server.get_instance().is_connected(interact.data.get("user")):
content = {'Topic': 'human', 'Data': {'Key': 'log', 'Value': "思考中..."}, 'Username' : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Thinking.jpg'}
wsa_server.get_instance().add_cmd(content)
if self.think_mode_users.get(uid, False) == True and time.time() - self.think_time_users[uid] >= 5:
self.think_time_users[uid] = time.time()
text = "请稍等..."
# 流式输出think中的内容
elif self.think_mode_users.get(uid, False) == True and "</think>" not in text:
return None
result = None
audio_url = interact.data.get('audio')#透传的音频
if audio_url is not None:#透传音频下载
file_name = 'sample-' + str(int(time.time() * 1000)) + audio_url[-4:]
result = self.download_wav(audio_url, './samples/', file_name)
elif config_util.config["interact"]["playSound"] or wsa_server.get_instance().is_connected(interact.data.get("user")) or self.__is_send_remote_device_audio(interact):#tts
if text != None and text.replace("*", "").strip() != "":
# 先过滤表情符号,然后再合成语音
filtered_text = self.__remove_emojis(text.replace("*", ""))
if filtered_text is not None and filtered_text.strip() != "":
util.printInfo(1, interact.data.get('user'), '合成音频...')
tm = time.time()
result = self.sp.to_sample(filtered_text, self.__get_mood_voice())
util.printInfo(1, interact.data.get("user"), "合成音频完成. 耗时: {} ms 文件:{}".format(math.floor((time.time() - tm) * 1000), result))
else:
if is_end and wsa_server.get_web_instance().is_connected(interact.data.get('user')):
wsa_server.get_web_instance().add_cmd({"panelMsg": "", 'Username' : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Normal.jpg'})
if result is not None or is_first or is_end:
if is_end:#如果结束标记则延迟1秒处理,免得is end比前面的音频tts要快
time.sleep(1)
MyThread(target=self.__process_output_audio, args=[result, interact, text]).start()
return result
except BaseException as e:
print(e)
return None
#下载wav
def download_wav(self, url, save_directory, filename):
try:
# 发送HTTP GET请求以获取WAV文件内容
response = requests.get(url, stream=True)
response.raise_for_status() # 检查请求是否成功
# 确保保存目录存在
if not os.path.exists(save_directory):
os.makedirs(save_directory)
# 构建保存文件的路径
save_path = os.path.join(save_directory, filename)
# 将WAV文件内容保存到指定文件
with open(save_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
return save_path
except requests.exceptions.RequestException as e:
print(f"[Error] Failed to download file: {e}")
return None
#面板播放声音
def __play_sound(self):
try:
import pygame
pygame.mixer.init() # 初始化pygame.mixer只需要在此处初始化一次, 如果初始化失败,则不播放音频
except Exception as e:
util.printInfo(1, "System", "音频播放初始化失败,本机无法播放音频")
return
while self.__running:
time.sleep(0.01)
if not self.sound_query.empty(): # 如果队列不为空则播放音频
file_url, audio_length, interact = self.sound_query.get()
is_first = interact.data.get('isfirst') is True
is_end = interact.data.get('isend') is True
if file_url is not None:
util.printInfo(1, interact.data.get('user'), '播放音频...')
if is_first:
self.speaking = True
elif not is_end:
self.speaking = True
#自动播报关闭
global auto_play_lock
global can_auto_play
with auto_play_lock:
if self.timer is not None:
self.timer.cancel()
self.timer = None
can_auto_play = False
if wsa_server.get_web_instance().is_connected(interact.data.get('user')):
wsa_server.get_web_instance().add_cmd({"panelMsg": "播放中 ...", "Username" : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Speaking.jpg'})
if file_url is not None:
pygame.mixer.music.load(file_url)
pygame.mixer.music.play()
# 播放过程中计时,直到音频播放完毕
length = 0
while length < audio_length:
length += 0.01
time.sleep(0.01)
if is_end:
self.play_end(interact)
if wsa_server.get_web_instance().is_connected(interact.data.get('user')):
wsa_server.get_web_instance().add_cmd({"panelMsg": "", "Username" : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Normal.jpg'})
# 播放完毕后通知
if wsa_server.get_web_instance().is_connected(interact.data.get("user")):
wsa_server.get_web_instance().add_cmd({"panelMsg": "", 'Username': interact.data.get('user')})
#推送远程音频
def __send_remote_device_audio(self, file_url, interact):
if file_url is None:
return
delkey = None
for key, value in fay_booter.DeviceInputListenerDict.items():
if value.username == interact.data.get("user") and value.isOutput: #按username选择推送booter.devicelistenerdice按用户名记录
try:
value.deviceConnector.send(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08") # 发送音频开始标志,同时也检查设备是否在线
wavfile = open(os.path.abspath(file_url), "rb")
data = wavfile.read(102400)
total = 0
while data:
total += len(data)
value.deviceConnector.send(data)
data = wavfile.read(102400)
time.sleep(0.0001)
value.deviceConnector.send(b'\x08\x07\x06\x05\x04\x03\x02\x01\x00')# 发送音频结束标志
util.printInfo(1, value.username, "远程音频发送完成:{}".format(total))
except socket.error as serr:
util.printInfo(1, value.username, "远程音频输入输出设备已经断开:{}".format(key))
value.stop()
delkey = key
if delkey:
value = fay_booter.DeviceInputListenerDict.pop(delkey)
if wsa_server.get_web_instance().is_connected(interact.data.get('user')):
wsa_server.get_web_instance().add_cmd({"remote_audio_connect": False, "Username" : interact.data.get('user')})
def __is_send_remote_device_audio(self, interact):
for key, value in fay_booter.DeviceInputListenerDict.items():
if value.username == interact.data.get("user") and value.isOutput:
return True
return False
#输出音频处理
def __process_output_audio(self, file_url, interact, text):
try:
try:
if file_url is None:
audio_length = 0
elif file_url.endswith('.wav'):
audio = AudioSegment.from_wav(file_url)
audio_length = len(audio) / 1000.0 # 时长以秒为单位
elif file_url.endswith('.mp3'):
audio = AudioSegment.from_mp3(file_url)
audio_length = len(audio) / 1000.0 # 时长以秒为单位
except Exception as e:
audio_length = 3
#推送远程音频
if file_url is not None:
MyThread(target=self.__send_remote_device_audio, args=[file_url, interact]).start()
#发送音频给数字人接口
if file_url is not None and wsa_server.get_instance().is_connected(interact.data.get("user")):
content = {'Topic': 'human', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'{cfg.fay_url}/audio/' + os.path.basename(file_url), 'Text': text, 'Time': audio_length, 'Type': interact.interleaver, 'IsFirst': 1 if interact.data.get("isfirst", False) else 0, 'IsEnd': 1 if interact.data.get("isend", False) else 0, 'CONV_ID' : self.user_conv_map[interact.data.get("user", "User")]["conversation_id"], 'CONV_MSG_NO' : self.user_conv_map[interact.data.get("user", "User")]["conversation_msg_no"] }, 'Username' : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Speaking.jpg'}
#计算lips
if platform.system() == "Windows":
try:
lip_sync_generator = LipSyncGenerator()
viseme_list = lip_sync_generator.generate_visemes(os.path.abspath(file_url))
consolidated_visemes = lip_sync_generator.consolidate_visemes(viseme_list)
content["Data"]["Lips"] = consolidated_visemes
except Exception as e:
print(e)
util.printInfo(1, interact.data.get("user"), "唇型数据生成失败")
wsa_server.get_instance().add_cmd(content)
util.printInfo(1, interact.data.get("user"), "数字人接口发送音频数据成功")
#面板播放
config_util.load_config()
if config_util.config["interact"]["playSound"]:
self.sound_query.put((file_url, audio_length, interact))
else:
if wsa_server.get_web_instance().is_connected(interact.data.get('user')):
wsa_server.get_web_instance().add_cmd({"panelMsg": "", 'Username' : interact.data.get('user'), 'robot': f'{cfg.fay_url}/robot/Normal.jpg'})
except Exception as e:
print(e)
def play_end(self, interact):
self.speaking = False
global can_auto_play
global auto_play_lock
with auto_play_lock:
if self.timer:
self.timer.cancel()
self.timer = None
if interact.interleaver != 'auto_play': #交互后暂停自动播报30秒
self.timer = threading.Timer(30, self.set_auto_play)
self.timer.start()
else:
can_auto_play = True
#恢复自动播报(如果有)
def set_auto_play(self):
global auto_play_lock
global can_auto_play
with auto_play_lock:
can_auto_play = True
self.timer = None
#启动核心服务
def start(self):
if cfg.ltp_mode == "cemotion":
from cemotion import Cemotion
self.cemotion = Cemotion()
MyThread(target=self.__play_sound).start()
#停止核心服务
def stop(self):
self.__running = False
self.speaking = False
self.sp.close()
wsa_server.get_web_instance().add_cmd({"panelMsg": ""})
content = {'Topic': 'human', 'Data': {'Key': 'log', 'Value': ""}}
wsa_server.get_instance().add_cmd(content)
def __record_response(self, text, username, uid):
"""
记录AI的回复内容
:param text: 回复文本
:param username: 用户名
:param uid: 用户ID
:return: content_id
"""
self.write_to_file("./logs", "answer_result.txt", text)
return content_db.new_instance().add_content('fay', 'speak', text, username, uid)
def __send_panel_message(self, text, username, uid, content_id=None, type=None):
"""
发送消息到Web面板
:param text: 消息文本
:param username: 用户名
:param uid: 用户ID
:param content_id: 内容ID
:param type: 消息类型
"""
if not wsa_server.get_web_instance().is_connected(username):
return
# 发送基本消息
wsa_server.get_web_instance().add_cmd({
"panelMsg": text,
"Username": username
})
# 如果有content_id发送回复消息
if content_id is not None:
wsa_server.get_web_instance().add_cmd({
"panelReply": {
"type": "fay",
"content": text,
"username": username,
"uid": uid,
"id": content_id,
"is_adopted": type == 'qa'
},
"Username": username
})
def __send_digital_human_message(self, text, username):
"""
发送消息到数字人语音应该在say方法驱动数字人输出
:param text: 消息文本
:param username: 用户名
"""
full_text = self.__remove_emojis(text.replace("*", ""))
if wsa_server.get_instance().is_connected(username):
content = {
'Topic': 'human',
'Data': {
'Key': 'text',
'Value': full_text
},
'Username': username
}
wsa_server.get_instance().add_cmd(content)
def __process_text_output(self, text, username, uid):
"""
处理文本输出到各个终端
:param text: 主要回复文本
:param textlist: 额外回复列表
:param username: 用户名
:param uid: 用户ID
:param type: 消息类型
"""
if text:
text = text.strip()
# 记录主回复
content_id = self.__record_response(text, username, uid)
# 发送主回复到面板和数字人
# self.__send_panel_message(text, username, uid, content_id, type)
self.__send_digital_human_message(text, username)
# 打印日志
util.printInfo(1, username, '({}) {}'.format(self.__get_mood_voice(), text))
import importlib
fay_booter = importlib.import_module('fay_booter')