mirror of
https://github.com/xszyou/Fay.git
synced 2026-03-12 17:51:28 +08:00
知识库模块
1. 优化yueshen知识库初始化逻辑; 2. 更新yueshen知识库默认使用fay透传的embedding模型; 3.补充知识库目录“新知识库”。
This commit is contained in:
16
faymcp/data/mcp_prestart_tools.json
Normal file
16
faymcp/data/mcp_prestart_tools.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"4": {
|
||||
"query_yueshen": {
|
||||
"params": {
|
||||
"embedding_api_key": "",
|
||||
"embedding_base_url": "",
|
||||
"embedding_model": "",
|
||||
"query": "{{question}}",
|
||||
"top_k": 5,
|
||||
"where": {}
|
||||
},
|
||||
"include_history": true,
|
||||
"allow_function_call": false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@
|
||||
"id": 1,
|
||||
"name": "tools",
|
||||
"ip": "",
|
||||
"connection_time": "2025-12-10 21:16:35",
|
||||
"connection_time": "2026-01-22 15:59:12",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "python",
|
||||
@@ -47,7 +47,7 @@
|
||||
"id": 4,
|
||||
"name": "yueshen rag",
|
||||
"ip": "",
|
||||
"connection_time": "2025-12-10 21:16:44",
|
||||
"connection_time": "2026-01-27 16:01:24",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "C:\\Users\\Lenovo\\anaconda3\\envs\\rag\\python.exe",
|
||||
@@ -55,20 +55,13 @@
|
||||
"mcp_servers/yueshen_rag/server.py"
|
||||
],
|
||||
"cwd": "",
|
||||
"env": {
|
||||
"YUESHEN_AUTO_INGEST": "1",
|
||||
"YUESHEN_AUTO_INTERVAL": "300",
|
||||
"YUESHEN_AUTO_RESET_ON_START": "0",
|
||||
"YUESHEN_EMBED_API_KEY": "sk-izmvqrzyhjghzyghiofqfpusxprmfljntxzggkcovtneqpas",
|
||||
"YUESHEN_EMBED_BASE_URL": "https://api.siliconflow.cn/v1",
|
||||
"YUESHEN_EMBED_MODEL": "Qwen/Qwen3-Embedding-8B"
|
||||
}
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"name": "window capture",
|
||||
"ip": "",
|
||||
"connection_time": "2025-12-10 21:16:45",
|
||||
"connection_time": "2025-12-17 22:26:55",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "python",
|
||||
@@ -77,5 +70,64 @@
|
||||
],
|
||||
"cwd": "",
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"name": "broswermcp",
|
||||
"ip": "",
|
||||
"connection_time": "2026-01-15 14:18:38",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"@browsermcp/mcp@latest"
|
||||
],
|
||||
"cwd": "",
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"name": "Todo Server",
|
||||
"ip": "",
|
||||
"connection_time": "2025-12-17 16:06:29",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "python",
|
||||
"args": [
|
||||
"-u",
|
||||
"server.py"
|
||||
],
|
||||
"cwd": "mcp_servers/mcp-todo-server",
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"name": "KinEcho",
|
||||
"ip": "",
|
||||
"connection_time": "2025-12-24 11:20:24",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "python",
|
||||
"args": [
|
||||
"server.py"
|
||||
],
|
||||
"cwd": "mcp_servers\\elderly_mcp",
|
||||
"env": {}
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"name": "easydeal-trading",
|
||||
"ip": "",
|
||||
"connection_time": "2026-01-15 20:30:00",
|
||||
"key": "",
|
||||
"transport": "stdio",
|
||||
"command": "C:\\Users\\Lenovo\\anaconda3\\envs\\fxai\\python.exe",
|
||||
"args": [
|
||||
"D:/Projects/easy_deal_agent/easy-deal/easydeal_mcp_server.py"
|
||||
],
|
||||
"cwd": "D:/Projects/easy_deal_agent/easy-deal",
|
||||
"env": {
|
||||
"EA_PROFILE_PATH": "D:\\Projects\\easy_deal_agent\\config.set"
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -37,7 +37,11 @@
|
||||
"search_files": false,
|
||||
"write_file": false
|
||||
},
|
||||
"4": {},
|
||||
"4": {
|
||||
"ingest_yueshen": false,
|
||||
"yueshen_stats": false,
|
||||
"query_yueshen": false
|
||||
},
|
||||
"6": {
|
||||
"list_windows": true
|
||||
}
|
||||
|
||||
@@ -62,9 +62,10 @@ DEFAULT_PERSIST_DIR = os.getenv(
|
||||
os.path.join(PROJECT_ROOT, "cache_data", "chromadb_yueshen"),
|
||||
)
|
||||
COLLECTION_NAME = "yueshen_kb"
|
||||
DEFAULT_EMBED_BASE_URL = os.getenv("YUESHEN_EMBED_BASE_URL")
|
||||
DEFAULT_EMBED_API_KEY = os.getenv("YUESHEN_EMBED_API_KEY")
|
||||
DEFAULT_EMBED_MODEL = os.getenv("YUESHEN_EMBED_MODEL", "text-embedding-3-small")
|
||||
# Default to Fay's OpenAI-compatible passthrough embedding endpoint.
|
||||
DEFAULT_EMBED_BASE_URL = os.getenv("YUESHEN_EMBED_BASE_URL", "http://127.0.0.1:5000/v1")
|
||||
DEFAULT_EMBED_API_KEY = os.getenv("YUESHEN_EMBED_API_KEY", "sk-fay")
|
||||
DEFAULT_EMBED_MODEL = os.getenv("YUESHEN_EMBED_MODEL", "embedding")
|
||||
AUTO_INGEST_ENABLED = os.getenv("YUESHEN_AUTO_INGEST", "1") != "0"
|
||||
AUTO_INGEST_INTERVAL = int(os.getenv("YUESHEN_AUTO_INTERVAL", "300"))
|
||||
AUTO_RESET_ON_START = os.getenv("YUESHEN_AUTO_RESET_ON_START", "0") != "0"
|
||||
@@ -469,6 +470,7 @@ class AutoIngestor:
|
||||
self._stop = threading.Event()
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._snapshot: Dict[str, Tuple[float, int]] = {}
|
||||
self._last_ingest_ok = False
|
||||
|
||||
def _take_snapshot(self) -> Dict[str, Tuple[float, int]]:
|
||||
snap: Dict[str, Tuple[float, int]] = {}
|
||||
@@ -490,7 +492,13 @@ class AutoIngestor:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _ingest_once(self, reset: bool = False):
|
||||
def _vector_count(self) -> Optional[int]:
|
||||
try:
|
||||
return self.km.store.collection.count()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _ingest_once(self, reset: bool = False) -> bool:
|
||||
try:
|
||||
res = self.km.ingest(
|
||||
corpus_dir=self.km.corpus_dir,
|
||||
@@ -500,8 +508,12 @@ class AutoIngestor:
|
||||
embedding_model=self.km.embedder.model,
|
||||
)
|
||||
logger.info("Auto-ingest done: %s", json.dumps(res, ensure_ascii=False))
|
||||
self._last_ingest_ok = True
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error("Auto-ingest failed: %s", exc)
|
||||
self._last_ingest_ok = False
|
||||
return False
|
||||
|
||||
def _loop(self):
|
||||
# initial snapshot and optional first ingest
|
||||
@@ -514,8 +526,17 @@ class AutoIngestor:
|
||||
self._ingest_once(reset=False)
|
||||
|
||||
while not self._stop.wait(self.interval):
|
||||
if self._has_changes():
|
||||
logger.info("Detected corpus change, auto-ingest...")
|
||||
changed = self._has_changes()
|
||||
vectors = self._vector_count()
|
||||
needs_bootstrap = (vectors == 0) or (vectors is None and not self._last_ingest_ok)
|
||||
if changed or needs_bootstrap:
|
||||
if changed:
|
||||
reason = "corpus change"
|
||||
elif vectors == 0:
|
||||
reason = "empty vector store"
|
||||
else:
|
||||
reason = "retry after ingest failure"
|
||||
logger.info("Auto-ingest trigger: %s", reason)
|
||||
self._ingest_once(reset=False)
|
||||
|
||||
def start(self):
|
||||
@@ -636,6 +657,27 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
|
||||
|
||||
if name == "query_yueshen":
|
||||
query_text = arguments.get("query", "")
|
||||
try:
|
||||
vector_count = manager.store.collection.count()
|
||||
except Exception:
|
||||
vector_count = None
|
||||
if vector_count == 0:
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text=json.dumps(
|
||||
{
|
||||
"results": [],
|
||||
"count": 0,
|
||||
"skipped": True,
|
||||
"reason": "vector store empty; run ingest_yueshen first",
|
||||
"stats": manager.stats(),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
)
|
||||
]
|
||||
# 跳过常见问候和简单回复,不进行知识库查询
|
||||
if _is_trivial_query(query_text):
|
||||
return [TextContent(type="text", text=json.dumps({
|
||||
|
||||
31
新知识库/README.md
Normal file
31
新知识库/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# 新知识库(YueShen MCP 知识库目录)
|
||||
|
||||
这个目录是 `yueshen_rag` MCP 服务器默认扫描的知识库目录(默认等价于项目根目录下的 `新知识库`)。
|
||||
|
||||
## 怎么用(最少配置版)
|
||||
|
||||
1) 把文档放进本目录(可放子目录)。
|
||||
2) 启动 Fay(确保 `http://127.0.0.1:5000` 可用)。
|
||||
3) 在 Fay 的 MCP 中启用 `yueshen_rag` 服务器。
|
||||
4) 首次使用建议先执行一次工具:`ingest_yueshen`(建立索引)。
|
||||
5) 之后用工具:`query_yueshen` 进行检索。
|
||||
|
||||
> 提示:默认会把向量索引持久化到 `cache_data/chromadb_yueshen`。
|
||||
|
||||
## 支持的文件格式(以代码为准)
|
||||
|
||||
当前 `yueshen_rag` 实际会扫描并处理:
|
||||
|
||||
- `.pdf`
|
||||
- `.docx`
|
||||
|
||||
不在上述列表中的文件会被忽略(例如 `.doc` / `.txt`)。如需使用 `.doc`,请先转换为 `.docx`。
|
||||
|
||||
## 常用工具说明
|
||||
|
||||
- `ingest_yueshen`:扫描本目录文档并写入向量库
|
||||
- `query_yueshen`:向量检索(输入 `query`)
|
||||
- `yueshen_stats`:查看当前向量库状态(例如向量数量)
|
||||
|
||||
如果检索返回提示 “vector store empty; run ingest_yueshen first”,说明还没建立索引,先执行一次 `ingest_yueshen` 即可。
|
||||
|
||||
Reference in New Issue
Block a user