年番更新

1、增加一组意图接口:唤醒、打招呼、打断;
2、增加一个自启动脚本列表;
3、修复aliyun asr错误后无法继续拾音问题;
4、优化本地拾音逻辑:麦克风启动时间减小、VAD判断时间减小、错误检查机制、拾音音量动态阈值、调整缓冲区大小;
5、补充自动播放时推送数据的检测;
6、更正qa.csv文件默认编码;
7、去掉http请求日志信息。
This commit is contained in:
xszyou
2025-01-09 01:36:33 +08:00
parent 421157c667
commit 9186f91afa
10 changed files with 252 additions and 88 deletions

View File

@@ -114,12 +114,11 @@ class ALiNls:
def on_close(self, ws, code, msg):
self.__endding = True
self.__is_close = True
if msg:
print("aliyun asr服务不太稳定:", msg)
# 收到websocket错误的处理
def on_error(self, ws, error):
print("aliyun asr error:", error)
self.started = True #避免在aliyun asr出错时recorder一直等待start状态返回
# 收到websocket连接建立的处理
def on_open(self, ws):

View File

@@ -445,7 +445,7 @@ class FeiFei:
#发送音频给数字人接口
if wsa_server.get_instance().is_connected(interact.data.get("user")):
content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url), 'Text': text, 'Time': audio_length, 'Type': 'hello' if interact.interleaver == 'hello' else ('interact' if interact.interact_type == 1 else 'auto_play')}, 'Username' : interact.data.get('user')}
content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url), 'Text': text, 'Time': audio_length, 'Type': interact.interleaver}, 'Username' : interact.data.get('user')}
#计算lips
if platform.system() == "Windows":
try:

View File

@@ -1,4 +1,4 @@
#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互2、穿透。
#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互2、透
class Interact:
def __init__(self, interleaver: str, interact_type: int, data: dict):

View File

@@ -16,11 +16,11 @@ import tempfile
import wave
from core import fay_core
from core import interact
# 启动时间 (秒)
_ATTACK = 0.2
# 麦克风启动时间 (秒)
_ATTACK = 0.1
# 释放时间 (秒)
_RELEASE = 0.7
# 麦克风释放时间 (秒)
_RELEASE = 0.5
class Recorder:
@@ -49,6 +49,10 @@ class Recorder:
self.is_reading = False
self.stream = None
self.__last_ws_notify_time = 0
self.__ws_notify_interval = 0.5 # 最小通知间隔(秒)
self.__ws_notify_thread = None
def asrclient(self):
if self.ASRMode == "ali":
asrcli = ALiNls(self.username)
@@ -196,8 +200,8 @@ class Recorder:
util.printInfo(1, self.username, "请检查设备是否有误,再重新启动!")
return
isSpeaking = False
last_mute_time = time.time()
last_speaking_time = time.time()
last_mute_time = time.time() #用户上次说话完话的时刻用于VAD的开始判断也会影响fay说完话到收听用户说话的时间间隔
last_speaking_time = time.time()#用户上次说话的时刻用于VAD的结束判断
data = None
concatenated_audio = bytearray()
audio_data_list = []
@@ -248,56 +252,84 @@ class Recorder:
self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * 1
#激活拾音
if percentage > self.__dynamic_threshold:
last_speaking_time = time.time()
#用户正在说话,激活拾音
try:
if percentage > self.__dynamic_threshold:
last_speaking_time = time.time()
if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
isSpeaking = True #用户正在说话
util.printInfo(1, self.username,"聆听中...")
if wsa_server.get_web_instance().is_connected(self.username):
wsa_server.get_web_instance().add_cmd({"panelMsg": "聆听中...", 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'})
if wsa_server.get_instance().is_connected(self.username):
content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "聆听中..."}, 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'}
wsa_server.get_instance().add_cmd(content)
concatenated_audio.clear()
self.__aLiNls = self.asrclient()
try:
if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
isSpeaking = True #用户正在说话
util.printInfo(1, self.username,"聆听中...")
self.__notify_listening_status() # 使用新方法发送通知
concatenated_audio.clear()
self.__aLiNls = self.asrclient()
task_id = self.__aLiNls.start()
while not self.__aLiNls.started:
time.sleep(0.01)
except Exception as e:
print(e)
util.printInfo(1, self.username, "aliyun asr 连接受限")
for i in range(len(self.__history_data) - 1): #当前data在下面会做发送这里是发送激活前的音频数据以免漏掉信息
buf = self.__history_data[i]
audio_data_list.append(self.__process_audio_data(buf, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
self.__history_data.clear()
else:#结束拾音
last_mute_time = time.time()
if isSpeaking:
if time.time() - last_speaking_time > _RELEASE: #TODO 更换的vad更靠谱
isSpeaking = False
self.__aLiNls.end()
util.printInfo(1, self.username, "语音处理中...")
self.__waitingResult(self.__aLiNls, concatenated_audio)
for i in range(len(self.__history_data) - 1): #当前data在下面会做发送这里是发送激活前的音频数据以免漏掉信息
buf = self.__history_data[i]
audio_data_list.append(self.__process_audio_data(buf, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
self.__history_data.clear()
else:#结束拾音
last_mute_time = time.time()
if isSpeaking:
if time.time() - last_speaking_time > _RELEASE:
isSpeaking = False
self.__aLiNls.end()
util.printInfo(1, self.username, "语音处理中...")
self.__waitingResult(self.__aLiNls, concatenated_audio)
mono_data = self.__concatenate_audio_data(audio_data_list)
self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
audio_data_list = []
#拾音中
if isSpeaking:
audio_data_list.append(self.__process_audio_data(data, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
mono_data = self.__concatenate_audio_data(audio_data_list)
self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
audio_data_list = []
#拾音中
if isSpeaking:
audio_data_list.append(self.__process_audio_data(data, self.channels))
if self.ASRMode == "ali":
self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
else:
concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
except Exception as e:
printInfo(1, self.username, "录音失败: " + str(e))
#异步发送 WebSocket 通知
def __notify_listening_status(self):
current_time = time.time()
if current_time - self.__last_ws_notify_time < self.__ws_notify_interval:
return
def send_ws_notification():
try:
if wsa_server.get_web_instance().is_connected(self.username):
wsa_server.get_web_instance().add_cmd({
"panelMsg": "聆听中...",
'Username': self.username,
'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
})
if wsa_server.get_instance().is_connected(self.username):
content = {
'Topic': 'Unreal',
'Data': {'Key': 'log', 'Value': "聆听中..."},
'Username': self.username,
'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
}
wsa_server.get_instance().add_cmd(content)
except Exception as e:
util.log(1, f"发送 WebSocket 通知失败: {e}")
# 如果之前的通知线程还在运行,就不启动新的
if self.__ws_notify_thread is None or not self.__ws_notify_thread.is_alive():
self.__ws_notify_thread = threading.Thread(target=send_ws_notification)
self.__ws_notify_thread.daemon = True
self.__ws_notify_thread.start()
self.__last_ws_notify_time = current_time
def __save_audio_to_wav(self, data, sample_rate, filename):
# 确保数据类型为 int16
if data.dtype != np.int16:

View File

@@ -1,4 +1,5 @@
#核心启动模块
import os
import time
import re
import pyaudio
@@ -15,6 +16,7 @@ from core.wsa_server import MyServer
from core import wsa_server
from core import socket_bridge_service
from llm.agent import agent_service
import subprocess
feiFei: fay_core.FeiFei = None
recorderListener: Recorder = None
@@ -34,13 +36,12 @@ class RecorderListener(Recorder):
def __init__(self, device, fei):
self.__device = device
self.__RATE = 16000
self.__FORMAT = pyaudio.paInt16
self.__running = False
self.username = 'User'
self.channels = 1
self.sample_rate = 16000
# 这两个参数会在 get_stream 中根据实际设备更新
self.channels = None
self.sample_rate = None
super().__init__(fei)
def on_speaking(self, text):
@@ -51,51 +52,59 @@ class RecorderListener(Recorder):
def get_stream(self):
try:
#是否录音的控制是放在recorder.py的这里的作用是防止没有麦克风的设备出错
while True:
record = config_util.config['source']['record']
if record['enabled']:
break
time.sleep(0.1)
self.paudio = pyaudio.PyAudio()
device_id = 0 # 或者根据需要选择其他设备
# 获取设备信息
device_info = self.paudio.get_device_info_by_index(device_id)
self.channels = device_info.get('maxInputChannels', 1) #很多麦克风只支持单声道录音
# self.sample_rate = int(device_info.get('defaultSampleRate', self.__RATE))
# 设置格式这里以16位深度为例
format = pyaudio.paInt16
# 打开音频流,使用设备的最大声道数和默认采样率
# 获取默认输入设备的信息
default_device = self.paudio.get_default_input_device_info()
self.channels = min(int(default_device.get('maxInputChannels', 1)), 2) # 最多使用2个通道
# self.sample_rate = int(default_device.get('defaultSampleRate', 16000))
util.printInfo(1, "系统", f"默认麦克风信息 - 采样率: {self.sample_rate}Hz, 通道数: {self.channels}")
# 使用系统默认麦克风
self.stream = self.paudio.open(
input_device_index=device_id,
rate=self.sample_rate,
format=format,
format=self.__FORMAT,
channels=self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer=4096
frames_per_buffer=1024
)
self.__running = True
MyThread(target=self.__pyaudio_clear).start()
except Exception as e:
print(f"Error: {e}")
util.log(1, f"打开麦克风时出错: {str(e)}")
util.printInfo(1, self.username, "请检查录音设备是否有误,再重新启动!")
time.sleep(10)
return self.stream
def __pyaudio_clear(self):
while self.__running:
time.sleep(30)
try:
while self.__running:
time.sleep(30)
except Exception as e:
util.log(1, f"音频清理线程出错: {str(e)}")
finally:
if hasattr(self, 'stream') and self.stream:
try:
self.stream.stop_stream()
self.stream.close()
except Exception as e:
util.log(1, f"关闭音频流时出错: {str(e)}")
def stop(self):
super().stop()
self.__running = False
time.sleep(0.1)#给清理线程一点处理时间
try:
while self.is_reading:
while self.is_reading:#是为了确保停止的时候麦克风没有刚好在读取音频的
time.sleep(0.1)
if self.stream is not None:
self.stream.stop_stream()
@@ -250,6 +259,8 @@ def start_auto_play_service(): #TODO 评估一下有无优化的空间
if not audio_url or audio_url.strip()[0:4] != "http":
audio_url = None
response_text = data.get('text')
if audio_url is None and (response_text is None or '' == response_text.strip()):
continue
timestamp = data.get('timestamp')
interact = Interact("auto_play", 2, {'user': user, 'text': response_text, 'audio': audio_url})
util.printInfo(1, user, '自动播放:{}{}'.format(response_text, audio_url), time.time())
@@ -276,6 +287,13 @@ def stop():
global socket_service_instance
global deviceSocketServer
#停止外部应用
util.log(1, '停止外部应用...')
startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
if os.path.exists(startup_script):
from shell.run_startup import stop_all_processes
stop_all_processes()
util.log(1, '正在关闭服务...')
__running = False
if recorderListener is not None:
@@ -310,6 +328,14 @@ def start():
global recorderListener
global __running
global socket_service_instance
#启动外部应用
util.log(1,'启动外部应用...')
startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
if os.path.exists(startup_script):
subprocess.Popen([sys.executable, startup_script],
creationflags=subprocess.CREATE_NEW_CONSOLE)
util.log(1, '开启服务...')
__running = True

View File

@@ -9,6 +9,7 @@ from flask_cors import CORS
import requests
import datetime
import pytz
import logging
import fay_booter
@@ -26,6 +27,13 @@ from flask_httpauth import HTTPBasicAuth
from core import qa_service
__app = Flask(__name__)
# 禁用 Flask 默认日志
__app.logger.disabled = True
log = logging.getLogger('werkzeug')
log.disabled = True
# 禁用请求日志中间件
__app.config['PROPAGATE_EXCEPTIONS'] = True
auth = HTTPBasicAuth()
CORS(__app, supports_credentials=True)
@@ -70,7 +78,6 @@ def __get_device_list():
print(f"Error getting device list: {e}")
return []
@__app.route('/api/submit', methods=['post'])
def api_submit():
data = request.values.get('data')
@@ -336,7 +343,7 @@ def api_get_Member_list():
except Exception as e:
return jsonify({'list': [], 'message': f'获取成员列表时出错: {e}'}), 500
@__app.route('/api/get_run_status', methods=['post'])
@__app.route('/api/get-run-status', methods=['post'])
def api_get_run_status():
# 获取运行状态
try:
@@ -486,6 +493,37 @@ def to_greet():
text = fay_booter.feiFei.on_interact(interact)
return jsonify({'status': 'success', 'data': text, 'msg': '已进行打招呼'}), 200
#唤醒:在普通唤醒模式,进行大屏交互才有意义
@__app.route('/to_wake', methods=['POST'])
def to_wake():
data = request.get_json()
username = data.get('username', 'User')
observation = data.get('observation', '')
fay_booter.recorderListener.wakeup_matched = True
return jsonify({'status': 'success', 'msg': '已唤醒'}), 200
#打断
@__app.route('/to_stop_talking', methods=['POST'])
def to_stop_talking():
try:
data = request.get_json()
username = data.get('username', 'User')
message = data.get('text', '你好,请说?')
observation = data.get('observation', '')
interact = Interact("stop_talking", 2, {'user': username, 'text': message, 'observation': str(observation)})
result = fay_booter.feiFei.on_interact(interact)
return jsonify({
'status': 'success',
'data': str(result) if result is not None else '',
'msg': '已停止说话'
}), 200
except Exception as e:
return jsonify({
'status': 'error',
'msg': str(e)
}), 500
#消息透传接口
@__app.route('/transparent_pass', methods=['post'])
def transparent_pass():
@@ -509,7 +547,14 @@ def transparent_pass():
def run():
server = pywsgi.WSGIServer(('0.0.0.0',5000), __app)
class NullLogHandler:
def write(self, *args, **kwargs):
pass
server = pywsgi.WSGIServer(
('0.0.0.0', 5000),
__app,
log=NullLogHandler()
)
server.serve_forever()
def start():

View File

@@ -62,9 +62,7 @@ def get_communication_history(uid=0):
messages.append({"role": "user", "content": message_content})
elif role == "fay":
messages.append({"role": "assistant", "content": message_content})
if messages:
messages[-1]["content"] += f" 当前时间:{thistime}"
return messages
def send_request(session, data):

2
qa.csv
View File

@@ -1 +1 @@
问题,答案,执行的脚本
问题,答案,脚本
1 问题 答案 执行的脚本 脚本

62
shell/run_startup.py Normal file
View File

@@ -0,0 +1,62 @@
import subprocess
import os
import signal
# 存储所有启动的进程
running_processes = []
def run_startup_apps():
# Get the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))
startup_file = os.path.join(script_dir, 'startup.txt')
if not os.path.exists(startup_file):
return
# Read and process each line in the startup file
with open(startup_file, 'r', encoding='utf-8') as f:
for line in f:
# Skip empty lines
line = line.strip()
if not line:
continue
try:
# Split the command into program path and arguments
parts = line.split()
program = parts[0]
args = parts[1:] if len(parts) > 1 else []
# Create the process with no window
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = subprocess.SW_HIDE
# Start the process
process = subprocess.Popen(
[program] + args,
startupinfo=startupinfo,
creationflags=subprocess.CREATE_NEW_CONSOLE
)
running_processes.append(process)
print(f"Started: {line}")
except Exception as e:
print(f"Error starting {line}: {str(e)}")
def stop_all_processes():
"""停止所有启动的进程"""
for process in running_processes:
try:
if process.poll() is None: # 检查进程是否还在运行
process.terminate() # 尝试正常终止
try:
process.wait(timeout=3) # 等待进程终止最多等待3秒
except subprocess.TimeoutExpired:
process.kill() # 如果进程没有及时终止,强制结束
print(f"Stopped process with PID: {process.pid}")
except Exception as e:
print(f"Error stopping process: {str(e)}")
running_processes.clear()
if __name__ == "__main__":
run_startup_apps()

2
shell/startup.txt Normal file
View File

@@ -0,0 +1,2 @@
C:\Program Files\bililive\livehime\livehime.exe
D:\anaconda3\envs\fay_cv\python.exe H:\fay_cv\gesture_detection.py