年番更新

1、增加一组意图接口：唤醒、打招呼、打断； 2、增加一个自启动脚本列表； 3、修复aliyun asr错误后无法继续拾音问题； 4、优化本地拾音逻辑：麦克风启动时间减小、VAD判断时间减小、错误检查机制、拾音音量动态阈值、调整缓冲区大小； 5、补充自动播放时推送数据的检测； 6、更正qa.csv文件默认编码； 7、去掉http请求日志信息。
2026-03-12 17:51:28 +08:00 · 2025-01-09 01:36:33 +08:00
parent 421157c667
commit 9186f91afa
10 changed files with 252 additions and 88 deletions
--- a/asr/ali_nls.py
+++ b/asr/ali_nls.py
@@ -114,12 +114,11 @@ class ALiNls:
    def on_close(self, ws, code, msg):
        self.__endding = True
        self.__is_close = True
-        if msg:
-            print("aliyun asr服务不太稳定:", msg)

    # 收到websocket错误的处理
    def on_error(self, ws, error):
        print("aliyun asr error:", error)
+        self.started = True #避免在aliyun asr出错时，recorder一直等待start状态返回

    # 收到websocket连接建立的处理
    def on_open(self, ws):
--- a/core/fay_core.py
+++ b/core/fay_core.py
@@ -445,7 +445,7 @@ class FeiFei:

            #发送音频给数字人接口
            if wsa_server.get_instance().is_connected(interact.data.get("user")):
-                content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url),  'Text': text, 'Time': audio_length, 'Type': 'hello' if interact.interleaver == 'hello' else ('interact' if interact.interact_type == 1 else 'auto_play')}, 'Username' : interact.data.get('user')}
+                content = {'Topic': 'Unreal', 'Data': {'Key': 'audio', 'Value': os.path.abspath(file_url), 'HttpValue': f'http://{cfg.fay_url}:5000/audio/' + os.path.basename(file_url),  'Text': text, 'Time': audio_length, 'Type': interact.interleaver}, 'Username' : interact.data.get('user')}
                #计算lips
                if platform.system() == "Windows":
                    try:
--- a/core/interact.py
+++ b/core/interact.py
@@ -1,4 +1,4 @@
-#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互；2、穿透。
+#inerleaver:mic、text、socket、auto_play。interact_type:1、语音/文字交互；2、透传。
 class Interact:

    def __init__(self, interleaver: str, interact_type: int, data: dict):
--- a/core/recorder.py
+++ b/core/recorder.py
@@ -16,11 +16,11 @@ import tempfile
 import wave
 from core import fay_core
 from core import interact
-# 启动时间 (秒)
-_ATTACK = 0.2
+# 麦克风启动时间 (秒)
+_ATTACK = 0.1

-# 释放时间 (秒)
-_RELEASE = 0.7
+# 麦克风释放时间 (秒)
+_RELEASE = 0.5


 class Recorder:
@@ -49,6 +49,10 @@ class Recorder:
        self.is_reading = False
        self.stream = None

+        self.__last_ws_notify_time = 0
+        self.__ws_notify_interval = 0.5  # 最小通知间隔（秒）
+        self.__ws_notify_thread = None
+
    def asrclient(self):
        if self.ASRMode == "ali":
            asrcli = ALiNls(self.username)
@@ -196,8 +200,8 @@ class Recorder:
                util.printInfo(1, self.username, "请检查设备是否有误，再重新启动!")
                return
        isSpeaking = False
-        last_mute_time = time.time()
-        last_speaking_time = time.time()
+        last_mute_time = time.time() #用户上次说话完话的时刻，用于VAD的开始判断（也会影响fay说完话到收听用户说话的时间间隔） 
+        last_speaking_time = time.time()#用户上次说话的时刻，用于VAD的结束判断
        data = None
        concatenated_audio = bytearray()
        audio_data_list = []
@@ -248,56 +252,84 @@ class Recorder:
                self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * 1
            
           
-            #激活拾音
-            if percentage > self.__dynamic_threshold:
-                last_speaking_time = time.time()
+            #用户正在说话，激活拾音
+            try:
+                if percentage > self.__dynamic_threshold:
+                    last_speaking_time = time.time() 

-                if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
-                    isSpeaking = True  #用户正在说话
-                    util.printInfo(1, self.username,"聆听中...")
-                    if wsa_server.get_web_instance().is_connected(self.username):
-                        wsa_server.get_web_instance().add_cmd({"panelMsg": "聆听中...", 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'})
-                    if wsa_server.get_instance().is_connected(self.username):
-                        content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "聆听中..."}, 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'}
-                        wsa_server.get_instance().add_cmd(content)
-                    concatenated_audio.clear()
-                    self.__aLiNls = self.asrclient()
-                    try:
+                    if not self.__processing and not isSpeaking and time.time() - last_mute_time > _ATTACK:
+                        isSpeaking = True  #用户正在说话
+                        util.printInfo(1, self.username,"聆听中...")
+                        self.__notify_listening_status()  # 使用新方法发送通知
+                        concatenated_audio.clear()
+                        self.__aLiNls = self.asrclient()
                        task_id = self.__aLiNls.start()
                        while not self.__aLiNls.started:
                            time.sleep(0.01)
-                    except Exception as e:
-                        print(e)
-                        util.printInfo(1, self.username, "aliyun asr 连接受限")
-                    for i in range(len(self.__history_data) - 1): #当前data在下面会做发送，这里是发送激活前的音频数据，以免漏掉信息
-                        buf = self.__history_data[i]
-                        audio_data_list.append(self.__process_audio_data(buf, self.channels))
-                        if self.ASRMode == "ali":
-                            self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
-                        else:
-                            concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
-                    self.__history_data.clear()
-            else:#结束拾音
-                last_mute_time = time.time()
-                if isSpeaking:
-                    if time.time() - last_speaking_time > _RELEASE: #TODO 更换的vad更靠谱
-                        isSpeaking = False
-                        self.__aLiNls.end()
-                        util.printInfo(1, self.username, "语音处理中...")
-                        self.__waitingResult(self.__aLiNls, concatenated_audio)
+                        for i in range(len(self.__history_data) - 1): #当前data在下面会做发送，这里是发送激活前的音频数据，以免漏掉信息
+                            buf = self.__history_data[i]
+                            audio_data_list.append(self.__process_audio_data(buf, self.channels))
+                            if self.ASRMode == "ali":
+                                self.__aLiNls.send(self.__process_audio_data(buf, self.channels).tobytes())
+                            else:
+                                concatenated_audio.extend(self.__process_audio_data(buf, self.channels).tobytes())
+                        self.__history_data.clear()
+                else:#结束拾音
+                    last_mute_time = time.time()
+                    if isSpeaking:
+                        if time.time() - last_speaking_time > _RELEASE: 
+                            isSpeaking = False
+                            self.__aLiNls.end()
+                            util.printInfo(1, self.username, "语音处理中...")
+                            self.__waitingResult(self.__aLiNls, concatenated_audio)

-                        mono_data = self.__concatenate_audio_data(audio_data_list)
-                        self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
-                        audio_data_list = []
-            
-            #拾音中
-            if isSpeaking:
-                audio_data_list.append(self.__process_audio_data(data, self.channels))
-                if self.ASRMode == "ali":
-                    self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
-                else:
-                    concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
+                            mono_data = self.__concatenate_audio_data(audio_data_list)
+                            self.__save_audio_to_wav(mono_data, self.sample_rate, "cache_data/input.wav")
+                            audio_data_list = []
+                
+                #拾音中
+                if isSpeaking:
+                    audio_data_list.append(self.__process_audio_data(data, self.channels))
+                    if self.ASRMode == "ali":
+                        self.__aLiNls.send(self.__process_audio_data(data, self.channels).tobytes())
+                    else:
+                        concatenated_audio.extend(self.__process_audio_data(data, self.channels).tobytes())
+            except Exception as e:
+                printInfo(1, self.username, "录音失败: " + str(e))
+
+    #异步发送 WebSocket 通知
+    def __notify_listening_status(self):
+        current_time = time.time()
+        if current_time - self.__last_ws_notify_time < self.__ws_notify_interval:
+            return
        
+        def send_ws_notification():
+            try:
+                if wsa_server.get_web_instance().is_connected(self.username):
+                    wsa_server.get_web_instance().add_cmd({
+                        "panelMsg": "聆听中...", 
+                        'Username': self.username, 
+                        'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
+                    })
+                if wsa_server.get_instance().is_connected(self.username):
+                    content = {
+                        'Topic': 'Unreal', 
+                        'Data': {'Key': 'log', 'Value': "聆听中..."}, 
+                        'Username': self.username, 
+                        'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'
+                    }
+                    wsa_server.get_instance().add_cmd(content)
+            except Exception as e:
+                util.log(1, f"发送 WebSocket 通知失败: {e}")
+        
+        # 如果之前的通知线程还在运行，就不启动新的
+        if self.__ws_notify_thread is None or not self.__ws_notify_thread.is_alive():
+            self.__ws_notify_thread = threading.Thread(target=send_ws_notification)
+            self.__ws_notify_thread.daemon = True
+            self.__ws_notify_thread.start()
+            self.__last_ws_notify_time = current_time
+
+
    def __save_audio_to_wav(self, data, sample_rate, filename):
        # 确保数据类型为 int16
        if data.dtype != np.int16:
--- a/fay_booter.py
+++ b/fay_booter.py
@@ -1,4 +1,5 @@
 #核心启动模块
+import os
 import time
 import re
 import pyaudio
@@ -15,6 +16,7 @@ from core.wsa_server import MyServer
 from core import wsa_server
 from core import socket_bridge_service
 from llm.agent import agent_service
+import subprocess

 feiFei: fay_core.FeiFei = None
 recorderListener: Recorder = None
@@ -34,13 +36,12 @@ class RecorderListener(Recorder):

    def __init__(self, device, fei):
        self.__device = device
-        self.__RATE = 16000
        self.__FORMAT = pyaudio.paInt16
        self.__running = False
        self.username = 'User'
-        self.channels = 1
-        self.sample_rate = 16000
-
+        # 这两个参数会在 get_stream 中根据实际设备更新
+        self.channels = None
+        self.sample_rate = None
        super().__init__(fei)

    def on_speaking(self, text):
@@ -51,51 +52,59 @@ class RecorderListener(Recorder):

    def get_stream(self):
        try:
-            #是否录音的控制是放在recorder.py的，这里的作用是防止没有麦克风的设备出错
            while True:
                record = config_util.config['source']['record']
                if record['enabled']:
                    break
                time.sleep(0.1)
-
+    
            self.paudio = pyaudio.PyAudio()
-            device_id = 0  # 或者根据需要选择其他设备
-
-            # 获取设备信息
-            device_info = self.paudio.get_device_info_by_index(device_id)
-            self.channels = device_info.get('maxInputChannels', 1) #很多麦克风只支持单声道录音
-            # self.sample_rate = int(device_info.get('defaultSampleRate', self.__RATE))
-
-            # 设置格式（这里以16位深度为例）
-            format = pyaudio.paInt16
-
-            # 打开音频流，使用设备的最大声道数和默认采样率
+            
+            # 获取默认输入设备的信息
+            default_device = self.paudio.get_default_input_device_info()
+            self.channels = min(int(default_device.get('maxInputChannels', 1)), 2)  # 最多使用2个通道
+            # self.sample_rate = int(default_device.get('defaultSampleRate', 16000))
+            
+            util.printInfo(1, "系统", f"默认麦克风信息 - 采样率: {self.sample_rate}Hz, 通道数: {self.channels}")
+            
+            # 使用系统默认麦克风
            self.stream = self.paudio.open(
-                input_device_index=device_id,
-                rate=self.sample_rate,
-                format=format,
+                format=self.__FORMAT,
                channels=self.channels,
+                rate=self.sample_rate,
                input=True,
-                frames_per_buffer=4096
+                frames_per_buffer=1024
            )
-
+            
            self.__running = True
            MyThread(target=self.__pyaudio_clear).start()
+            
        except Exception as e:
-            print(f"Error: {e}")
+            util.log(1, f"打开麦克风时出错: {str(e)}")
+            util.printInfo(1, self.username, "请检查录音设备是否有误，再重新启动!")
            time.sleep(10)
        return self.stream

-
    def __pyaudio_clear(self):
-        while self.__running:
-            time.sleep(30)
+        try:
+            while self.__running:
+                time.sleep(30)
+        except Exception as e:
+            util.log(1, f"音频清理线程出错: {str(e)}")
+        finally:
+            if hasattr(self, 'stream') and self.stream:
+                try:
+                    self.stream.stop_stream()
+                    self.stream.close()
+                except Exception as e:
+                    util.log(1, f"关闭音频流时出错: {str(e)}")
    
    def stop(self):
        super().stop()
        self.__running = False
+        time.sleep(0.1)#给清理线程一点处理时间
        try:
-            while self.is_reading:
+            while self.is_reading:#是为了确保停止的时候麦克风没有刚好在读取音频的
                time.sleep(0.1)
            if self.stream is not None:
                self.stream.stop_stream()
@@ -250,6 +259,8 @@ def start_auto_play_service(): #TODO 评估一下有无优化的空间
                        if not audio_url or audio_url.strip()[0:4] != "http":
                            audio_url = None   
                        response_text = data.get('text')
+                        if audio_url is None and (response_text is None or '' == response_text.strip()):
+                            continue
                        timestamp = data.get('timestamp')
                        interact = Interact("auto_play", 2, {'user': user, 'text': response_text, 'audio': audio_url})
                        util.printInfo(1, user, '自动播放：{}，{}'.format(response_text, audio_url), time.time())
@@ -276,6 +287,13 @@ def stop():
    global socket_service_instance
    global deviceSocketServer

+    #停止外部应用
+    util.log(1, '停止外部应用...')
+    startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
+    if os.path.exists(startup_script):
+        from shell.run_startup import stop_all_processes
+        stop_all_processes()
+
    util.log(1, '正在关闭服务...')
    __running = False
    if recorderListener is not None:
@@ -310,6 +328,14 @@ def start():
    global recorderListener
    global __running
    global socket_service_instance
+
+    #启动外部应用
+    util.log(1,'启动外部应用...')
+    startup_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'shell', 'run_startup.py')
+    if os.path.exists(startup_script):
+        subprocess.Popen([sys.executable, startup_script], 
+                        creationflags=subprocess.CREATE_NEW_CONSOLE)
+    
    util.log(1, '开启服务...')
    __running = True

--- a/gui/flask_server.py
+++ b/gui/flask_server.py
@@ -9,6 +9,7 @@ from flask_cors import CORS
 import requests
 import datetime
 import pytz
+import logging

 import fay_booter

@@ -26,6 +27,13 @@ from flask_httpauth import HTTPBasicAuth
 from core import qa_service

 __app = Flask(__name__)
+# 禁用 Flask 默认日志
+__app.logger.disabled = True
+log = logging.getLogger('werkzeug')
+log.disabled = True
+# 禁用请求日志中间件
+__app.config['PROPAGATE_EXCEPTIONS'] = True
+
 auth = HTTPBasicAuth()
 CORS(__app, supports_credentials=True)

@@ -70,7 +78,6 @@ def __get_device_list():
        print(f"Error getting device list: {e}")
        return []

-
@__app.route('/api/submit', methods=['post'])
 def api_submit():
    data = request.values.get('data')
@@ -336,7 +343,7 @@ def api_get_Member_list():
    except Exception as e:
        return jsonify({'list': [], 'message': f'获取成员列表时出错: {e}'}), 500

-@__app.route('/api/get_run_status', methods=['post'])
+@__app.route('/api/get-run-status', methods=['post'])
 def api_get_run_status():
    # 获取运行状态
    try:
@@ -486,6 +493,37 @@ def to_greet():
    text = fay_booter.feiFei.on_interact(interact)
    return jsonify({'status': 'success', 'data': text, 'msg': '已进行打招呼'}), 200 

+#唤醒:在普通唤醒模式，进行大屏交互才有意义
+@__app.route('/to_wake', methods=['POST'])
+def to_wake():
+    data = request.get_json()
+    username = data.get('username', 'User')
+    observation = data.get('observation', '')
+    fay_booter.recorderListener.wakeup_matched = True
+    return jsonify({'status': 'success', 'msg': '已唤醒'}), 200 
+
+#打断
+@__app.route('/to_stop_talking', methods=['POST'])
+def to_stop_talking():
+    try:
+        data = request.get_json()
+        username = data.get('username', 'User')
+        message = data.get('text', '你好，请说？')
+        observation = data.get('observation', '')
+        interact = Interact("stop_talking", 2, {'user': username, 'text': message, 'observation': str(observation)})
+        result = fay_booter.feiFei.on_interact(interact)
+        return jsonify({
+            'status': 'success',
+            'data': str(result) if result is not None else '',
+            'msg': '已停止说话'
+        }), 200
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'msg': str(e)
+        }), 500
+
+
 #消息透传接口
@__app.route('/transparent_pass', methods=['post'])
 def transparent_pass():
@@ -509,7 +547,14 @@ def transparent_pass():


 def run():
-    server = pywsgi.WSGIServer(('0.0.0.0',5000), __app)
+    class NullLogHandler:
+        def write(self, *args, **kwargs):
+            pass
+    server = pywsgi.WSGIServer(
+        ('0.0.0.0', 5000), 
+        __app,
+        log=NullLogHandler()  
+    )
    server.serve_forever()

 def start():
--- a/llm/nlp_gpt.py
+++ b/llm/nlp_gpt.py
@@ -62,9 +62,7 @@ def get_communication_history(uid=0):
                messages.append({"role": "user", "content": message_content})
            elif role == "fay":
                messages.append({"role": "assistant", "content": message_content})
-    
-    if messages:
-        messages[-1]["content"] += f" 当前时间：{thistime}。"
+
    return messages

 def send_request(session, data):
--- a/qa.csv
+++ b/qa.csv
@@ -1 +1 @@
-问题,答案,执行的脚本
+问题,答案,脚本
--- a/shell/run_startup.py
+++ b/shell/run_startup.py
@@ -0,0 +1,62 @@
+import subprocess
+import os
+import signal
+
+# 存储所有启动的进程
+running_processes = []
+
+def run_startup_apps():
+    # Get the directory of the current script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    startup_file = os.path.join(script_dir, 'startup.txt')
+    if not os.path.exists(startup_file):
+        return
+    
+    # Read and process each line in the startup file
+    with open(startup_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            # Skip empty lines
+            line = line.strip()
+            if not line:
+                continue
+                
+            try:
+                # Split the command into program path and arguments
+                parts = line.split()
+                program = parts[0]
+                args = parts[1:] if len(parts) > 1 else []
+                
+                # Create the process with no window
+                startupinfo = subprocess.STARTUPINFO()
+                startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+                startupinfo.wShowWindow = subprocess.SW_HIDE
+                
+                # Start the process
+                process = subprocess.Popen(
+                    [program] + args,
+                    startupinfo=startupinfo,
+                    creationflags=subprocess.CREATE_NEW_CONSOLE
+                )
+                running_processes.append(process)
+                print(f"Started: {line}")
+                
+            except Exception as e:
+                print(f"Error starting {line}: {str(e)}")
+
+def stop_all_processes():
+    """停止所有启动的进程"""
+    for process in running_processes:
+        try:
+            if process.poll() is None:  # 检查进程是否还在运行
+                process.terminate()  # 尝试正常终止
+                try:
+                    process.wait(timeout=3)  # 等待进程终止，最多等待3秒
+                except subprocess.TimeoutExpired:
+                    process.kill()  # 如果进程没有及时终止，强制结束
+            print(f"Stopped process with PID: {process.pid}")
+        except Exception as e:
+            print(f"Error stopping process: {str(e)}")
+    running_processes.clear()
+
+if __name__ == "__main__":
+    run_startup_apps()
--- a/shell/startup.txt
+++ b/shell/startup.txt
@@ -0,0 +1,2 @@
+C:\Program Files\bililive\livehime\livehime.exe
+D:\anaconda3\envs\fay_cv\python.exe H:\fay_cv\gesture_detection.py