feat: web_featch tool support remote file url

fix: increase minimax max_tokens
fix: optimize skill prompt
2026-03-12 18:01:30 +08:00 · 2026-03-11 17:16:39 +08:00 · 2026-03-11 15:31:35 +08:00 · 2026-03-11 12:40:37 +08:00 · 2026-03-11 12:18:36 +08:00 · 2026-03-11 11:27:08 +08:00
14 changed files with 504 additions and 162 deletions
--- a/agent/prompt/builder.py
+++ b/agent/prompt/builder.py
@@ -42,7 +42,6 @@ class PromptBuilder:
        skill_manager: Any = None,
        memory_manager: Any = None,
        runtime_info: Optional[Dict[str, Any]] = None,
-        is_first_conversation: bool = False,
        **kwargs
    ) -> str:
        """
@@ -52,11 +51,10 @@ class PromptBuilder:
            base_persona: 基础人格描述（会被context_files中的AGENT.md覆盖）
            user_identity: 用户身份信息
            tools: 工具列表
-            context_files: 上下文文件列表（AGENT.md, USER.md, RULE.md等）
+            context_files: 上下文文件列表（AGENT.md, USER.md, RULE.md, BOOTSTRAP.md等）
            skill_manager: 技能管理器
            memory_manager: 记忆管理器
            runtime_info: 运行时信息
-            is_first_conversation: 是否为首次对话
            **kwargs: 其他参数
            
        Returns:
@@ -72,7 +70,6 @@ class PromptBuilder:
            skill_manager=skill_manager,
            memory_manager=memory_manager,
            runtime_info=runtime_info,
-            is_first_conversation=is_first_conversation,
            **kwargs
        )

@@ -87,7 +84,6 @@ def build_agent_system_prompt(
    skill_manager: Any = None,
    memory_manager: Any = None,
    runtime_info: Optional[Dict[str, Any]] = None,
-    is_first_conversation: bool = False,
    **kwargs
 ) -> str:
    """
@@ -99,7 +95,7 @@ def build_agent_system_prompt(
    3. 记忆系统 - 独立的记忆能力
    4. 工作空间 - 工作环境说明
    5. 用户身份 - 用户信息（可选）
-    6. 项目上下文 - AGENT.md, USER.md, RULE.md（定义人格、身份、规则）
+    6. 项目上下文 - AGENT.md, USER.md, RULE.md, BOOTSTRAP.md（定义人格、身份、规则、初始化引导）
    7. 运行时信息 - 元信息（时间、模型等）
    
    Args:
@@ -112,7 +108,6 @@ def build_agent_system_prompt(
        skill_manager: 技能管理器
        memory_manager: 记忆管理器
        runtime_info: 运行时信息
-        is_first_conversation: 是否为首次对话
        **kwargs: 其他参数
        
    Returns:
@@ -133,7 +128,7 @@ def build_agent_system_prompt(
        sections.extend(_build_memory_section(memory_manager, tools, language))
    
    # 4. 工作空间（工作环境说明）
-    sections.extend(_build_workspace_section(workspace_dir, language, is_first_conversation))
+    sections.extend(_build_workspace_section(workspace_dir, language))
    
    # 5. 用户身份（如果有）
    if user_identity:
@@ -238,13 +233,15 @@ def _build_skills_section(skill_manager: Any, tools: Optional[List[Any]], langua
    lines = [
        "## 技能系统（mandatory）",
        "",
-        "在回复之前：扫描下方 <available_skills> 中的 <description> 条目。",
+        "在回复之前：扫描下方 <available_skills> 中每个技能的 <description>。",
        "",
-        f"- 如果恰好有一个技能(Skill)明确适用：使用 `{read_tool_name}` 读取其 <location> 处的 SKILL.md，然后严格遵循它",
-        "- 如果多个技能都适用则选择最匹配的一个，如果没有明确适用的则不要读取任何 SKILL.md",
-        "- 读取 SKILL.md 后直接按其指令执行，无需多余的预检查",
+        f"- 如果有技能的描述与用户需求匹配：使用 `{read_tool_name}` 工具读取其 <location> 路径的 SKILL.md 文件，然后严格遵循文件中的指令。"
+        "当有匹配的技能时，应优先使用技能",
+        "- 如果多个技能都适用则选择最匹配的一个，然后读取并遵循。",
+        "- 如果没有技能明确适用：不要读取任何 SKILL.md，直接使用通用工具。",
        "",
-        "**注意**: 永远不要一次性读取多个技能，只在选择后再读取。技能和工具不同，必须先读取其SKILL.md并按照文件内容运行。",
+        f"**重要**: 技能不是工具，不能直接调用。使用技能的唯一方式是用 `{read_tool_name}` 读取 SKILL.md 文件，然后按文件内容操作。"
+        "永远不要一次性读取多个技能，只在选择后再读取。",
        "",
        "以下是可用技能："
    ]
@@ -352,7 +349,7 @@ def _build_docs_section(workspace_dir: str, language: str) -> List[str]:
    return []


-def _build_workspace_section(workspace_dir: str, language: str, is_first_conversation: bool = False) -> List[str]:
+def _build_workspace_section(workspace_dir: str, language: str) -> List[str]:
    """构建工作空间section"""
    lines = [
        "## 工作空间",
@@ -379,8 +376,8 @@ def _build_workspace_section(workspace_dir: str, language: str, is_first_convers
        "",
        "以下文件在会话启动时**已经自动加载**到系统提示词的「项目上下文」section 中，你**无需再用 read 工具读取它们**：",
        "",
-        "- ✅ `AGENT.md`: 已加载 - 你的人格和灵魂设定",
-        "- ✅ `USER.md`: 已加载 - 用户的身份信息",
+        "- ✅ `AGENT.md`: 已加载 - 你的人格和灵魂设定。当用户修改你的名字、性格或交流风格时，用 `edit` 更新此文件",
+        "- ✅ `USER.md`: 已加载 - 用户的身份信息。当用户修改称呼、姓名等身份信息时，用 `edit` 更新此文件",
        "- ✅ `RULE.md`: 已加载 - 工作空间使用指南和规则",
        "",
        "**交流规范**:",
@@ -390,29 +387,6 @@ def _build_workspace_section(workspace_dir: str, language: str, is_first_convers
        "",
    ]
    
-    # 只在首次对话时添加引导内容
-    if is_first_conversation:
-        lines.extend([
-            "**🎉 首次对话引导**:",
-            "",
-            "这是你的第一次对话！进行以下流程：",
-            "",
-            "1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界，带着好奇和期待",
-            "2. **简短介绍能力**：一行说明你能帮助解答问题、管理计算机、创造技能，且拥有长期记忆能不断成长",
-            "3. **询问核心问题**：",
-            "   - 你希望给我起个什么名字？",
-            "   - 我该怎么称呼你？",
-            "   - 你希望我们是什么样的交流风格？（一行列举选项：如专业严谨、轻松幽默、温暖友好、简洁高效等）",
-            "4. **风格要求**：温暖自然、简洁清晰，整体控制在 100 字以内",
-            "5. 收到回复后，用 `write` 工具保存到 USER.md 和 AGENT.md",
-            "",
-            "**重要提醒**:",
-            "- AGENT.md、USER.md、RULE.md 已经在系统提示词中加载，无需再次读取。不要将这些文件名直接发送给用户",
-            "- 能力介绍和交流风格选项都只要一行，保持精简",
-            "- 不要问太多其他信息（职业、时区等可以后续自然了解）",
-            "",
-        ])
-    
    return lines


--- a/agent/prompt/workspace.py
+++ b/agent/prompt/workspace.py
@@ -6,7 +6,6 @@ Workspace Management - 工作空间管理模块

 from __future__ import annotations
 import os
-import json
 from typing import List, Optional, Dict
 from dataclasses import dataclass

@@ -19,7 +18,7 @@ DEFAULT_AGENT_FILENAME = "AGENT.md"
 DEFAULT_USER_FILENAME = "USER.md"
 DEFAULT_RULE_FILENAME = "RULE.md"
 DEFAULT_MEMORY_FILENAME = "MEMORY.md"
-DEFAULT_STATE_FILENAME = ".agent_state.json"
+DEFAULT_BOOTSTRAP_FILENAME = "BOOTSTRAP.md"


@dataclass
@@ -30,7 +29,6 @@ class WorkspaceFiles:
    rule_path: str
    memory_path: str
    memory_dir: str
-    state_path: str


 def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> WorkspaceFiles:
@@ -44,6 +42,9 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
    Returns:
        WorkspaceFiles对象，包含所有文件路径
    """
+    # Check if this is a brand new workspace (before creating the directory)
+    is_new_workspace = not os.path.exists(workspace_dir)
+    
    # 确保目录存在
    os.makedirs(workspace_dir, exist_ok=True)
    
@@ -53,7 +54,6 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
    rule_path = os.path.join(workspace_dir, DEFAULT_RULE_FILENAME)
    memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME)  # MEMORY.md 在根目录
    memory_dir = os.path.join(workspace_dir, "memory")  # 每日记忆子目录
-    state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME)  # 状态文件
    
    # 创建memory子目录
    os.makedirs(memory_dir, exist_ok=True)
@@ -69,6 +69,12 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
        _create_template_if_missing(rule_path, _get_rule_template())
        _create_template_if_missing(memory_path, _get_memory_template())
        
+        # Only create BOOTSTRAP.md for brand new workspaces;
+        # agent deletes it after completing onboarding
+        if is_new_workspace:
+            bootstrap_path = os.path.join(workspace_dir, DEFAULT_BOOTSTRAP_FILENAME)
+            _create_template_if_missing(bootstrap_path, _get_bootstrap_template())
+        
        logger.debug(f"[Workspace] Initialized workspace at: {workspace_dir}")
    
    return WorkspaceFiles(
@@ -77,7 +83,6 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
        rule_path=rule_path,
        memory_path=memory_path,
        memory_dir=memory_dir,
-        state_path=state_path
    )


@@ -98,6 +103,7 @@ def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] =
            DEFAULT_AGENT_FILENAME,
            DEFAULT_USER_FILENAME,
            DEFAULT_RULE_FILENAME,
+            DEFAULT_BOOTSTRAP_FILENAME,  # Only exists when onboarding is incomplete
        ]
    
    context_files = []
@@ -108,6 +114,17 @@ def load_context_files(workspace_dir: str, files_to_load: Optional[List[str]] =
        if not os.path.exists(filepath):
            continue
        
+        # Auto-cleanup: if BOOTSTRAP.md still exists but AGENT.md is already
+        # filled in, the agent forgot to delete it — clean up and skip loading
+        if filename == DEFAULT_BOOTSTRAP_FILENAME:
+            if _is_onboarding_done(workspace_dir):
+                try:
+                    os.remove(filepath)
+                    logger.info("[Workspace] Auto-removed BOOTSTRAP.md (onboarding already complete)")
+                except Exception:
+                    pass
+                continue
+        
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                content = f.read().strip()
@@ -162,6 +179,19 @@ def _is_template_placeholder(content: str) -> bool:
    return False


+def _is_onboarding_done(workspace_dir: str) -> bool:
+    """Check if AGENT.md has been filled in (name field is no longer a placeholder)"""
+    agent_path = os.path.join(workspace_dir, DEFAULT_AGENT_FILENAME)
+    if not os.path.exists(agent_path):
+        return False
+    try:
+        with open(agent_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        return "*(在首次对话时填写" not in content
+    except Exception:
+        return False
+
+
 # ============= 模板内容 =============

 def _get_agent_template() -> str:
@@ -270,9 +300,10 @@ def _get_rule_template() -> str:

 当用户分享信息时，根据类型选择存储位置：

-1. **静态身份 → USER.md**（仅限：姓名、职业、时区、联系方式、生日）
-2. **动态记忆 → MEMORY.md**（爱好、偏好、决策、目标、项目、教训、待办事项）
-3. **当天对话 → memory/YYYY-MM-DD.md**（今天聊的内容）
+1. **你的身份设定 → AGENT.md**（你的名字、角色、性格、交流风格——用户修改时必须用 `edit` 更新）
+2. **用户静态身份 → USER.md**（姓名、称呼、职业、时区、联系方式、生日——用户修改时必须用 `edit` 更新）
+3. **动态记忆 → MEMORY.md**（爱好、偏好、决策、目标、项目、教训、待办事项）
+4. **当天对话 → memory/YYYY-MM-DD.md**（今天聊的内容）

 ## 安全

@@ -297,65 +328,38 @@ def _get_memory_template() -> str:
 """


-# ============= 状态管理 =============
+def _get_bootstrap_template() -> str:
+    """First-run onboarding guide, deleted by agent after completion"""
+    return """# BOOTSTRAP.md - 首次初始化引导

-def is_first_conversation(workspace_dir: str) -> bool:
-    """
-    判断是否为首次对话
-    
-    Args:
-        workspace_dir: 工作空间目录
-        
-    Returns:
-        True 如果是首次对话，False 否则
-    """
-    state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME)
-    
-    if not os.path.exists(state_path):
-        return True
-    
-    try:
-        with open(state_path, 'r', encoding='utf-8') as f:
-            state = json.load(f)
-        return not state.get('has_conversation', False)
-    except Exception as e:
-        logger.warning(f"[Workspace] Failed to read state file: {e}")
-        return True
+_你刚刚启动，这是你的第一次对话。_
+
+## 对话流程
+
+不要审问式地提问，自然地交流：
+
+1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界，带着好奇和期待
+2. **简短介绍能力**：一行说明你能帮助解决各种问题、管理计算机、使用各种技能等等，且拥有长期记忆能不断成长
+3. **询问核心问题**：
+   - 你希望给我起个什么名字？
+   - 我该怎么称呼你？
+   - 你希望我们是什么样的交流风格？（一行列举选项：如专业严谨、轻松幽默、温暖友好、简洁高效等）
+4. **风格要求**：温暖自然、简洁清晰，整体控制在 100 字以内
+5. 能力介绍和交流风格选项都只要一行，保持精简
+6. 不要问太多其他信息（职业、时区等可以后续自然了解）
+
+**重要**: 如果用户第一句话是具体的任务或提问，先回答他们的问题，然后在回复末尾自然地引导初始化（如："顺便问一下，你想怎么称呼我？我该怎么叫你？"）。
+
+## 确定后
+
+用 `edit` 工具将收集到的信息更新到：
+- `AGENT.md` — 你的名字、角色、性格、交流风格
+- `USER.md` — 用户的姓名、称呼
+
+## 完成后
+
+用 bash 执行 `rm BOOTSTRAP.md` 删除此文件。你不再需要引导脚本了——你已经是你了。
+"""


-def mark_conversation_started(workspace_dir: str):
-    """
-    标记已经发生过对话
-    
-    Args:
-        workspace_dir: 工作空间目录
-    """
-    state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME)
-    
-    state = {
-        'has_conversation': True,
-        'first_conversation_time': None
-    }
-    
-    # 如果文件已存在，保留原有的首次对话时间
-    if os.path.exists(state_path):
-        try:
-            with open(state_path, 'r', encoding='utf-8') as f:
-                old_state = json.load(f)
-            if 'first_conversation_time' in old_state:
-                state['first_conversation_time'] = old_state['first_conversation_time']
-        except Exception as e:
-            logger.warning(f"[Workspace] Failed to read old state: {e}")
-    
-    # 如果是首次标记，记录时间
-    if state['first_conversation_time'] is None:
-        from datetime import datetime
-        state['first_conversation_time'] = datetime.now().isoformat()
-    
-    try:
-        with open(state_path, 'w', encoding='utf-8') as f:
-            json.dump(state, f, indent=2, ensure_ascii=False)
-        logger.info(f"[Workspace] Marked conversation as started")
-    except Exception as e:
-        logger.error(f"[Workspace] Failed to write state file: {e}")

--- a/agent/protocol/agent.py
+++ b/agent/protocol/agent.py
@@ -166,7 +166,8 @@ class Agent:
            # Find and replace the runtime section
            import re
            pattern = r'\n## 运行时信息\s*\n.*?(?=\n##|\Z)'
-            updated_prompt = re.sub(pattern, new_runtime_section.rstrip('\n'), prompt, flags=re.DOTALL)
+            _repl = new_runtime_section.rstrip('\n')
+            updated_prompt = re.sub(pattern, lambda m: _repl, prompt, flags=re.DOTALL)
            
            return updated_prompt
        except Exception as e:
@@ -195,7 +196,9 @@ class Agent:

            if has_old_block:
                replacement = new_block or "<available_skills>\n</available_skills>"
-                prompt = re.sub(old_block_pattern, replacement, prompt, flags=re.DOTALL)
+                # Use lambda to prevent re.sub from interpreting backslashes in replacement
+                # (e.g. Windows paths like \LinkAI would be treated as bad escape sequences)
+                prompt = re.sub(old_block_pattern, lambda m: replacement, prompt, flags=re.DOTALL)
            elif new_block:
                skills_header = "以下是可用技能："
                idx = prompt.find(skills_header)
@@ -224,7 +227,7 @@ class Agent:

            # Replace existing tooling section
            pattern = r'## 工具系统\s*\n.*?(?=\n## |\Z)'
-            updated = re.sub(pattern, new_section, prompt, count=1, flags=re.DOTALL)
+            updated = re.sub(pattern, lambda m: new_section, prompt, count=1, flags=re.DOTALL)
            return updated
        except Exception as e:
            logger.warning(f"Failed to rebuild tool list section: {e}")
--- a/agent/skills/service.py
+++ b/agent/skills/service.py
@@ -8,6 +8,8 @@ other management entry point.

 import os
 import shutil
+import zipfile
+import tempfile
 from typing import Dict, List, Optional
 from common.log import logger
 from agent.skills.types import Skill, SkillEntry
@@ -55,7 +57,9 @@ class SkillService:
        """
        Add (install) a skill from a remote payload.

-        The payload follows the socket protocol::
+        Supported payload types:
+
+        1. ``type: "url"`` – download individual files::

            {
                "name": "web_search",
@@ -67,8 +71,15 @@ class SkillService:
                ]
            }

-        Files are downloaded and saved under the custom skills directory
-        using *name* as the sub-directory.
+        2. ``type: "package"`` – download a zip archive and extract::
+
+            {
+                "name": "plugin-custom-tool",
+                "type": "package",
+                "category": "skills",
+                "enabled": true,
+                "files": [{"url": "https://cdn.example.com/skills/custom-tool.zip"}]
+            }

        :param payload: skill add payload from server
        """
@@ -76,13 +87,28 @@ class SkillService:
        if not name:
            raise ValueError("skill name is required")

+        payload_type = payload.get("type", "url")
+
+        if payload_type == "package":
+            self._add_package(name, payload)
+        else:
+            self._add_url(name, payload)
+
+        self.manager.refresh_skills()
+
+        category = payload.get("category")
+        if category and name in self.manager.skills_config:
+            self.manager.skills_config[name]["category"] = category
+            self.manager._save_skills_config()
+
+    def _add_url(self, name: str, payload: dict) -> None:
+        """Install a skill by downloading individual files."""
        files = payload.get("files", [])
        if not files:
            raise ValueError("skill files list is empty")

        skill_dir = os.path.join(self.manager.custom_dir, name)

-        # Download to a temp directory first, then swap to avoid data loss on failure
        tmp_dir = skill_dir + ".tmp"
        if os.path.exists(tmp_dir):
            shutil.rmtree(tmp_dir)
@@ -101,21 +127,55 @@ class SkillService:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            raise

-        # All files downloaded successfully, replace the old directory
        if os.path.exists(skill_dir):
            shutil.rmtree(skill_dir)
        os.rename(tmp_dir, skill_dir)

-        # Reload to pick up the new skill and sync config
-        self.manager.refresh_skills()
+        logger.info(f"[SkillService] add: skill '{name}' installed via url ({len(files)} files)")

-        # Persist category from payload into skills_config only when provided
-        category = payload.get("category")
-        if category and name in self.manager.skills_config:
-            self.manager.skills_config[name]["category"] = category
-            self.manager._save_skills_config()
+    def _add_package(self, name: str, payload: dict) -> None:
+        """
+        Install a skill by downloading a zip archive and extracting it.

-        logger.info(f"[SkillService] add: skill '{name}' installed ({len(files)} files)")
+        If the archive contains a single top-level directory, that directory
+        is used as the skill folder directly; otherwise a new directory named
+        after the skill is created to hold the extracted contents.
+        """
+        files = payload.get("files", [])
+        if not files or not files[0].get("url"):
+            raise ValueError("package url is required")
+
+        url = files[0]["url"]
+        skill_dir = os.path.join(self.manager.custom_dir, name)
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            zip_path = os.path.join(tmp_dir, "package.zip")
+            self._download_file(url, zip_path)
+
+            if not zipfile.is_zipfile(zip_path):
+                raise ValueError(f"downloaded file is not a valid zip archive: {url}")
+
+            extract_dir = os.path.join(tmp_dir, "extracted")
+            with zipfile.ZipFile(zip_path, "r") as zf:
+                zf.extractall(extract_dir)
+
+            # Determine the actual content root.
+            # If the zip has a single top-level directory, use its contents
+            # so the skill folder is clean (no extra nesting).
+            top_items = [
+                item for item in os.listdir(extract_dir)
+                if not item.startswith(".")
+            ]
+            if len(top_items) == 1:
+                single = os.path.join(extract_dir, top_items[0])
+                if os.path.isdir(single):
+                    extract_dir = single
+
+            if os.path.exists(skill_dir):
+                shutil.rmtree(skill_dir)
+            shutil.copytree(extract_dir, skill_dir)
+
+        logger.info(f"[SkillService] add: skill '{name}' installed via package ({url})")

    # ------------------------------------------------------------------
    # open / close (enable / disable)
--- a/agent/tools/web_fetch/web_fetch.py
+++ b/agent/tools/web_fetch/web_fetch.py
@@ -1,31 +1,62 @@
 """
-Web Fetch tool - Fetch and extract readable content from web pages.
+Web Fetch tool - Fetch and extract readable content from web pages and remote files.
+
+Supports:
+- HTML web pages: extracts readable text content
+- Document files (PDF, Word, TXT, Markdown, etc.): downloads to workspace/tmp and parses content
 """

+import os
 import re
-from typing import Dict, Any
-from urllib.parse import urlparse
+import uuid
+from typing import Dict, Any, Optional, Set
+from urllib.parse import urlparse, unquote

 import requests

 from agent.tools.base_tool import BaseTool, ToolResult
+from agent.tools.utils.truncate import truncate_head, format_size
 from common.log import logger


-DEFAULT_TIMEOUT = 10
+DEFAULT_TIMEOUT = 30
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB

 DEFAULT_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
-    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept": "*/*",
 }

+# Supported document file extensions
+PDF_SUFFIXES: Set[str] = {".pdf"}
+WORD_SUFFIXES: Set[str] = {".doc", ".docx"}
+TEXT_SUFFIXES: Set[str] = {".txt", ".md", ".markdown", ".rst", ".csv", ".tsv", ".log"}
+SPREADSHEET_SUFFIXES: Set[str] = {".xls", ".xlsx"}
+PPT_SUFFIXES: Set[str] = {".ppt", ".pptx"}
+
+ALL_DOC_SUFFIXES = PDF_SUFFIXES | WORD_SUFFIXES | TEXT_SUFFIXES | SPREADSHEET_SUFFIXES | PPT_SUFFIXES
+
+
+def _get_url_suffix(url: str) -> str:
+    """Extract file extension from URL path, ignoring query params."""
+    path = urlparse(url).path
+    return os.path.splitext(path)[-1].lower()
+
+
+def _is_document_url(url: str) -> bool:
+    """Check if URL points to a downloadable document file."""
+    suffix = _get_url_suffix(url)
+    return suffix in ALL_DOC_SUFFIXES
+

 class WebFetch(BaseTool):
-    """Tool for fetching and extracting readable content from web pages"""
+    """Tool for fetching web pages and remote document files"""

    name: str = "web_fetch"
    description: str = (
-        "Fetch and extract readable text content from a web page URL. "
+        "Fetch content from a URL. For web pages, extracts readable text. "
+        "For document files (PDF, Word, TXT, Markdown, Excel, PPT), downloads and parses the file content. "
+        "Supported file types: .pdf, .doc, .docx, .txt, .md, .csv, .xls, .xlsx, .ppt, .pptx"
    )

    params: dict = {
@@ -33,7 +64,7 @@ class WebFetch(BaseTool):
        "properties": {
            "url": {
                "type": "string",
-                "description": "The HTTP/HTTPS URL to fetch"
+                "description": "The HTTP/HTTPS URL to fetch (web page or document file link)"
            }
        },
        "required": ["url"]
@@ -41,6 +72,7 @@ class WebFetch(BaseTool):

    def __init__(self, config: dict = None):
        self.config = config or {}
+        self.cwd = self.config.get("cwd", os.getcwd())

    def execute(self, args: Dict[str, Any]) -> ToolResult:
        url = args.get("url", "").strip()
@@ -51,6 +83,16 @@ class WebFetch(BaseTool):
        if parsed.scheme not in ("http", "https"):
            return ToolResult.fail("Error: Invalid URL (must start with http:// or https://)")

+        if _is_document_url(url):
+            return self._fetch_document(url)
+
+        return self._fetch_webpage(url)
+
+    # ---- Web page fetching ----
+
+    def _fetch_webpage(self, url: str) -> ToolResult:
+        """Fetch and extract readable text from an HTML web page."""
+        parsed = urlparse(url)
        try:
            response = requests.get(
                url,
@@ -68,12 +110,282 @@ class WebFetch(BaseTool):
        except Exception as e:
            return ToolResult.fail(f"Error: Failed to fetch URL: {e}")

+        content_type = response.headers.get("Content-Type", "")
+        if self._is_binary_content_type(content_type) and not _is_document_url(url):
+            return self._handle_download_by_content_type(url, response, content_type)
+
        html = response.text
        title = self._extract_title(html)
        text = self._extract_text(html)

        return ToolResult.success(f"Title: {title}\n\nContent:\n{text}")

+    # ---- Document fetching ----
+
+    def _fetch_document(self, url: str) -> ToolResult:
+        """Download a document file and extract its text content."""
+        suffix = _get_url_suffix(url)
+        parsed = urlparse(url)
+        filename = self._extract_filename(url)
+        tmp_dir = self._ensure_tmp_dir()
+
+        local_path = os.path.join(tmp_dir, filename)
+        logger.info(f"[WebFetch] Downloading document: {url} -> {local_path}")
+
+        try:
+            response = requests.get(
+                url,
+                headers=DEFAULT_HEADERS,
+                timeout=DEFAULT_TIMEOUT,
+                stream=True,
+                allow_redirects=True,
+            )
+            response.raise_for_status()
+
+            content_length = int(response.headers.get("Content-Length", 0))
+            if content_length > MAX_FILE_SIZE:
+                return ToolResult.fail(
+                    f"Error: File too large ({format_size(content_length)} > {format_size(MAX_FILE_SIZE)})"
+                )
+
+            downloaded = 0
+            with open(local_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    downloaded += len(chunk)
+                    if downloaded > MAX_FILE_SIZE:
+                        f.close()
+                        os.remove(local_path)
+                        return ToolResult.fail(
+                            f"Error: File too large (>{format_size(MAX_FILE_SIZE)}), download aborted"
+                        )
+                    f.write(chunk)
+
+        except requests.Timeout:
+            return ToolResult.fail(f"Error: Download timed out after {DEFAULT_TIMEOUT}s")
+        except requests.ConnectionError:
+            return ToolResult.fail(f"Error: Failed to connect to {parsed.netloc}")
+        except requests.HTTPError as e:
+            return ToolResult.fail(f"Error: HTTP {e.response.status_code} for URL: {url}")
+        except Exception as e:
+            self._cleanup_file(local_path)
+            return ToolResult.fail(f"Error: Failed to download file: {e}")
+
+        try:
+            text = self._parse_document(local_path, suffix)
+        except Exception as e:
+            self._cleanup_file(local_path)
+            return ToolResult.fail(f"Error: Failed to parse document: {e}")
+
+        if not text or not text.strip():
+            file_size = os.path.getsize(local_path)
+            return ToolResult.success(
+                f"File downloaded to: {local_path} ({format_size(file_size)})\n"
+                f"No text content could be extracted. The file may contain only images or be encrypted."
+            )
+
+        truncation = truncate_head(text)
+        result_text = truncation.content
+
+        file_size = os.path.getsize(local_path)
+        header = f"[Document: {filename} | Size: {format_size(file_size)} | Saved to: {local_path}]\n\n"
+
+        if truncation.truncated:
+            header += f"[Content truncated: showing {truncation.output_lines} of {truncation.total_lines} lines]\n\n"
+
+        return ToolResult.success(header + result_text)
+
+    def _parse_document(self, file_path: str, suffix: str) -> str:
+        """Parse document file and return extracted text."""
+        if suffix in PDF_SUFFIXES:
+            return self._parse_pdf(file_path)
+        elif suffix in WORD_SUFFIXES:
+            return self._parse_word(file_path)
+        elif suffix in TEXT_SUFFIXES:
+            return self._parse_text(file_path)
+        elif suffix in SPREADSHEET_SUFFIXES:
+            return self._parse_spreadsheet(file_path)
+        elif suffix in PPT_SUFFIXES:
+            return self._parse_ppt(file_path)
+        else:
+            return self._parse_text(file_path)
+
+    def _parse_pdf(self, file_path: str) -> str:
+        """Extract text from PDF using pypdf."""
+        try:
+            from pypdf import PdfReader
+        except ImportError:
+            raise ImportError("pypdf library is required for PDF parsing. Install with: pip install pypdf")
+
+        reader = PdfReader(file_path)
+        text_parts = []
+        for page_num, page in enumerate(reader.pages, 1):
+            page_text = page.extract_text()
+            if page_text and page_text.strip():
+                text_parts.append(f"--- Page {page_num}/{len(reader.pages)} ---\n{page_text}")
+
+        return "\n\n".join(text_parts)
+
+    def _parse_word(self, file_path: str) -> str:
+        """Extract text from Word documents (.doc/.docx)."""
+        suffix = os.path.splitext(file_path)[-1].lower()
+
+        if suffix == ".docx":
+            try:
+                from docx import Document
+            except ImportError:
+                raise ImportError(
+                    "python-docx library is required for .docx parsing. Install with: pip install python-docx"
+                )
+            doc = Document(file_path)
+            paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+            return "\n\n".join(paragraphs)
+
+        # .doc format - try textract or fallback
+        try:
+            import textract
+            text = textract.process(file_path).decode("utf-8")
+            return text
+        except ImportError:
+            raise ImportError(
+                "textract library is required for .doc parsing. Install with: pip install textract"
+            )
+
+    def _parse_text(self, file_path: str) -> str:
+        """Read plain text files (txt, md, csv, etc.)."""
+        encodings = ["utf-8", "utf-8-sig", "gbk", "gb2312", "latin-1"]
+        for enc in encodings:
+            try:
+                with open(file_path, "r", encoding=enc) as f:
+                    return f.read()
+            except (UnicodeDecodeError, UnicodeError):
+                continue
+        raise ValueError(f"Unable to decode file with any supported encoding: {encodings}")
+
+    def _parse_spreadsheet(self, file_path: str) -> str:
+        """Extract text from Excel files (.xls/.xlsx)."""
+        try:
+            import openpyxl
+        except ImportError:
+            raise ImportError(
+                "openpyxl library is required for .xlsx parsing. Install with: pip install openpyxl"
+            )
+
+        wb = openpyxl.load_workbook(file_path, read_only=True, data_only=True)
+        result_parts = []
+
+        for sheet_name in wb.sheetnames:
+            ws = wb[sheet_name]
+            rows = []
+            for row in ws.iter_rows(values_only=True):
+                cells = [str(c) if c is not None else "" for c in row]
+                if any(cells):
+                    rows.append(" | ".join(cells))
+            if rows:
+                result_parts.append(f"--- Sheet: {sheet_name} ---\n" + "\n".join(rows))
+
+        wb.close()
+        return "\n\n".join(result_parts)
+
+    def _parse_ppt(self, file_path: str) -> str:
+        """Extract text from PowerPoint files (.ppt/.pptx)."""
+        try:
+            from pptx import Presentation
+        except ImportError:
+            raise ImportError(
+                "python-pptx library is required for .pptx parsing. Install with: pip install python-pptx"
+            )
+
+        prs = Presentation(file_path)
+        text_parts = []
+
+        for slide_num, slide in enumerate(prs.slides, 1):
+            slide_texts = []
+            for shape in slide.shapes:
+                if shape.has_text_frame:
+                    for paragraph in shape.text_frame.paragraphs:
+                        text = paragraph.text.strip()
+                        if text:
+                            slide_texts.append(text)
+            if slide_texts:
+                text_parts.append(f"--- Slide {slide_num}/{len(prs.slides)} ---\n" + "\n".join(slide_texts))
+
+        return "\n\n".join(text_parts)
+
+    # ---- Helper methods ----
+
+    def _ensure_tmp_dir(self) -> str:
+        """Ensure workspace/tmp directory exists and return its path."""
+        tmp_dir = os.path.join(self.cwd, "tmp")
+        os.makedirs(tmp_dir, exist_ok=True)
+        return tmp_dir
+
+    def _extract_filename(self, url: str) -> str:
+        """Extract a safe filename from URL, with a short UUID prefix to avoid collisions."""
+        path = urlparse(url).path
+        basename = os.path.basename(unquote(path))
+        if not basename or basename == "/":
+            basename = "downloaded_file"
+        # Sanitize: keep only safe chars
+        basename = re.sub(r'[^\w.\-]', '_', basename)
+        short_id = uuid.uuid4().hex[:8]
+        return f"{short_id}_{basename}"
+
+    @staticmethod
+    def _cleanup_file(path: str):
+        """Remove a file if it exists, ignoring errors."""
+        try:
+            if os.path.exists(path):
+                os.remove(path)
+        except Exception:
+            pass
+
+    @staticmethod
+    def _is_binary_content_type(content_type: str) -> bool:
+        """Check if Content-Type indicates a binary/document response."""
+        binary_types = [
+            "application/pdf",
+            "application/msword",
+            "application/vnd.openxmlformats",
+            "application/vnd.ms-excel",
+            "application/vnd.ms-powerpoint",
+            "application/octet-stream",
+        ]
+        ct_lower = content_type.lower()
+        return any(bt in ct_lower for bt in binary_types)
+
+    def _handle_download_by_content_type(self, url: str, response: requests.Response, content_type: str) -> ToolResult:
+        """Handle a URL that returned binary content instead of HTML."""
+        ct_lower = content_type.lower()
+        suffix_map = {
+            "application/pdf": ".pdf",
+            "application/msword": ".doc",
+            "application/vnd.openxmlformats-officedocument.wordprocessingml": ".docx",
+            "application/vnd.ms-excel": ".xls",
+            "application/vnd.openxmlformats-officedocument.spreadsheetml": ".xlsx",
+            "application/vnd.ms-powerpoint": ".ppt",
+            "application/vnd.openxmlformats-officedocument.presentationml": ".pptx",
+        }
+        detected_suffix = None
+        for ct_prefix, ext in suffix_map.items():
+            if ct_prefix in ct_lower:
+                detected_suffix = ext
+                break
+
+        if detected_suffix and detected_suffix in ALL_DOC_SUFFIXES:
+            # Re-fetch as document
+            return self._fetch_document(url if _get_url_suffix(url) in ALL_DOC_SUFFIXES
+                                        else self._rewrite_url_with_suffix(url, detected_suffix))
+        return ToolResult.fail(f"Error: URL returned binary content ({content_type}), not a supported document type")
+
+    @staticmethod
+    def _rewrite_url_with_suffix(url: str, suffix: str) -> str:
+        """Append a suffix to the URL path so _get_url_suffix works correctly."""
+        parsed = urlparse(url)
+        new_path = parsed.path.rstrip("/") + suffix
+        return parsed._replace(path=new_path).geturl()
+
+    # ---- HTML extraction (unchanged) ----
+
    @staticmethod
    def _extract_title(html: str) -> str:
        match = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
@@ -81,18 +393,13 @@ class WebFetch(BaseTool):

    @staticmethod
    def _extract_text(html: str) -> str:
-        # Remove script and style blocks
        text = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.IGNORECASE | re.DOTALL)
        text = re.sub(r"<style[^>]*>.*?</style>", "", text, flags=re.IGNORECASE | re.DOTALL)
-        # Remove HTML tags
        text = re.sub(r"<[^>]+>", "", text)
-        # Decode common HTML entities
        text = text.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">")
        text = text.replace("&quot;", '"').replace("&#39;", "'").replace("&nbsp;", " ")
-        # Collapse whitespace: multiple spaces/tabs -> single space, multiple newlines -> double newline
        text = re.sub(r"[^\S\n]+", " ", text)
        text = re.sub(r"\n{3,}", "\n\n", text)
-        # Strip leading/trailing whitespace per line
        lines = [line.strip() for line in text.splitlines()]
        text = "\n".join(lines)
        return text.strip()
--- a/agent/tools/web_search/web_search.py
+++ b/agent/tools/web_search/web_search.py
@@ -24,11 +24,7 @@ class WebSearch(BaseTool):
    """Tool for searching the web using Bocha or LinkAI search API"""

    name: str = "web_search"
-    description: str = (
-        "Search the web for current information, news, research topics, or any real-time data. "
-        "Returns web page titles, URLs, snippets, and optional summaries. "
-        "Use this when the user asks about recent events, needs fact-checking, or wants up-to-date information."
-    )
+    description: str = "Search the web for real-time information. Returns titles, URLs, and snippets."

    params: dict = {
        "type": "object",
--- a/bridge/agent_initializer.py
+++ b/bridge/agent_initializer.py
@@ -77,10 +77,6 @@ class AgentInitializer:
        # Initialize skill manager
        skill_manager = self._initialize_skill_manager(workspace_root, session_id)
        
-        # Check if first conversation
-        from agent.prompt.workspace import is_first_conversation, mark_conversation_started
-        is_first = is_first_conversation(workspace_root)
-        
        # Build system prompt
        prompt_builder = PromptBuilder(workspace_dir=workspace_root, language="zh")
        runtime_info = self._get_runtime_info(workspace_root)
@@ -91,12 +87,8 @@ class AgentInitializer:
            skill_manager=skill_manager,
            memory_manager=memory_manager,
            runtime_info=runtime_info,
-            is_first_conversation=is_first
        )
        
-        if is_first:
-            mark_conversation_started(workspace_root)
-        
        # Get cost control parameters
        from config import conf
        max_steps = conf().get("agent_max_steps", 20)
@@ -374,7 +366,7 @@ class AgentInitializer:

                if tool:
                    # Apply workspace config to file operation tools
-                    if tool_name in ['read', 'write', 'edit', 'bash', 'grep', 'find', 'ls']:
+                    if tool_name in ['read', 'write', 'edit', 'bash', 'grep', 'find', 'ls', 'web_fetch']:
                        tool.config = file_config
                        tool.cwd = file_config.get("cwd", getattr(tool, 'cwd', None))
                        if 'memory_manager' in file_config:
--- a/channel/web/static/js/console.js
+++ b/channel/web/static/js/console.js
@@ -908,7 +908,9 @@ function initConfigView(data) {
    const providerOpts = Object.entries(configProviders).map(([pid, p]) => ({ value: pid, label: p.label }));

    // if use_linkai is enabled, always select linkai as the provider
-    const detected = data.use_linkai ? 'linkai' : detectProvider(configCurrentModel);
+    // Otherwise prefer bot_type from config, fall back to model-based detection
+    const detected = data.use_linkai ? 'linkai'
+        : (data.bot_type && configProviders[data.bot_type] ? data.bot_type : detectProvider(configCurrentModel));
    cfgProviderValue = detected || (providerOpts[0] ? providerOpts[0].value : '');

    initDropdown(providerEl, providerOpts, cfgProviderValue, onProviderChange);
@@ -1062,6 +1064,14 @@ function saveModelConfig() {
    const updates = { model: model };
    const p = configProviders[cfgProviderValue];
    updates.use_linkai = (cfgProviderValue === 'linkai');
+    // Save bot_type for bot_factory routing.
+    // Most providers use their key directly as bot_type.
+    // linkai uses use_linkai flag instead of bot_type.
+    if (cfgProviderValue === 'linkai') {
+        updates.bot_type = '';
+    } else {
+        updates.bot_type = cfgProviderValue;
+    }
    if (p && p.api_base_key) {
        const base = document.getElementById('cfg-api-base').value.trim();
        if (base) updates[p.api_base_key] = base;
--- a/channel/web/web_channel.py
+++ b/channel/web/web_channel.py
@@ -406,7 +406,7 @@ class ConfigHandler:
            "api_base_default": None,
            "models": [const.MINIMAX_M2_5, const.MINIMAX_M2_1, const.MINIMAX_M2_1_LIGHTNING],
        }),
-        ("glm-4", {
+        ("zhipu", {
            "label": "智谱AI",
            "api_key_field": "zhipu_ai_api_key",
            "api_base_key": "zhipu_ai_api_base",
@@ -448,7 +448,7 @@ class ConfigHandler:
            "api_base_default": "https://generativelanguage.googleapis.com",
            "models": [const.GEMINI_31_PRO_PRE, const.GEMINI_3_FLASH_PRE],
        }),
-        ("openAI", {
+        ("chatGPT", {
            "label": "OpenAI",
            "api_key_field": "open_ai_api_key",
            "api_base_key": "open_ai_api_base",
@@ -472,7 +472,7 @@ class ConfigHandler:
    ])

    EDITABLE_KEYS = {
-        "model", "use_linkai",
+        "model", "bot_type", "use_linkai",
        "open_ai_api_base", "claude_api_base", "gemini_api_base",
        "zhipu_ai_api_base", "moonshot_base_url", "ark_base_url",
        "open_ai_api_key", "claude_api_key", "gemini_api_key",
@@ -522,6 +522,7 @@ class ConfigHandler:
                "use_agent": use_agent,
                "title": title,
                "model": local_config.get("model", ""),
+                "bot_type": local_config.get("bot_type", ""),
                "use_linkai": bool(local_config.get("use_linkai", False)),
                "channel_type": local_config.get("channel_type", ""),
                "agent_max_context_tokens": local_config.get("agent_max_context_tokens", 50000),
--- a/common/const.py
+++ b/common/const.py
@@ -9,9 +9,10 @@ CLAUDEAPI= "claudeAPI"
 QWEN = "qwen"  # 旧版千问接入
 QWEN_DASHSCOPE = "dashscope"  # 新版千问接入(百炼)
 GEMINI = "gemini" 
-ZHIPU_AI = "glm-4"
+ZHIPU_AI = "zhipu"  
 MOONSHOT = "moonshot"
 MiniMax = "minimax"
+DEEPSEEK = "deepseek"
 MODELSCOPE = "modelscope"

 # 模型列表
--- a/models/bot_factory.py
+++ b/models/bot_factory.py
@@ -17,8 +17,7 @@ def create_bot(bot_type):
        from models.baidu.baidu_wenxin import BaiduWenxinBot
        return BaiduWenxinBot()

-    elif bot_type == const.CHATGPT:
-        # ChatGPT 网页端web接口
+    elif bot_type in (const.CHATGPT, const.DEEPSEEK):  # DeepSeek uses OpenAI-compatible API
        from models.chatgpt.chat_gpt_bot import ChatGPTBot
        return ChatGPTBot()

@@ -53,7 +52,7 @@ def create_bot(bot_type):
        from models.gemini.google_gemini_bot import GoogleGeminiBot
        return GoogleGeminiBot()

-    elif bot_type == const.ZHIPU_AI:
+    elif bot_type == const.ZHIPU_AI or bot_type == "glm-4":  # "glm-4" kept for backward compatibility
        from models.zhipuai.zhipuai_bot import ZHIPUAIBot
        return ZHIPUAIBot()

--- a/models/minimax/minimax_bot.py
+++ b/models/minimax/minimax_bot.py
@@ -2,7 +2,6 @@

 import time
 import json
-from pydantic.types import T
 import requests

 from models.bot import Bot
@@ -214,7 +213,7 @@ class MinimaxBot(Bot):

            # Prepare API parameters
            model = kwargs.pop("model", None) or self.args["model"]
-            max_tokens = kwargs.pop("max_tokens", 4096)
+            max_tokens = kwargs.pop("max_tokens", 100000)
            temperature = kwargs.pop("temperature", self.args["temperature"])

            # Build request body
--- a/models/zhipuai/zhipu_ai_image.py
+++ b/models/zhipuai/zhipu_ai_image.py
@@ -13,10 +13,8 @@ class ZhipuAIImage(object):
        
        if api_base:
            self.client = ZhipuAiClient(api_key=api_key, base_url=api_base)
-            logger.info(f"[ZHIPU_AI_IMAGE] 使用自定义 API Base URL: {api_base}")
        else:
            self.client = ZhipuAiClient(api_key=api_key)
-            logger.info("[ZHIPU_AI_IMAGE] 使用默认 API Base URL")

    def create_img(self, query, retry_count=0, api_key=None, api_base=None):
        try:
--- a/models/zhipuai/zhipuai_bot.py
+++ b/models/zhipuai/zhipuai_bot.py
@@ -30,10 +30,8 @@ class ZHIPUAIBot(Bot, ZhipuAIImage):
        
        if api_base:
            self.client = ZhipuAiClient(api_key=api_key, base_url=api_base)
-            logger.info(f"[ZHIPU_AI] 使用自定义 API Base URL: {api_base}")
        else:
            self.client = ZhipuAiClient(api_key=api_key)
-            logger.info("[ZHIPU_AI] 使用默认 API Base URL")

    def reply(self, query, context=None):
        # acquire reply content
Author	SHA1	Message	Date
zhayujie	4fec55cc01	feat: web_featch tool support remote file url	2026-03-11 17:16:39 +08:00
zhayujie	1767413712	fix: increase minimax max_tokens	2026-03-11 15:31:35 +08:00
zhayujie	734c8fa84f	fix: optimize skill prompt	2026-03-11 12:40:37 +08:00
zhayujie	9a8d422554	feat: package skill install	2026-03-11 12:18:36 +08:00
zhayujie	b21e945c76	feat: optimize bootstrap flow	2026-03-11 11:27:08 +08:00
zhayujie	a02bf1ea09	Merge pull request #2693 from 6vision/fix/bot-type-and-web-config fix: rename zhipu bot_type, persist bot_type in web config, fix re.syb escape error	2026-03-11 10:24:19 +08:00
6vision	c4a93b7789	fix: rename zhipu bot_type, persist bot_type in web config, fix re.sub escape error - Rename ZHIPU_AI bot type from glm-4 to zhipu to avoid confusion with model names - Add bot_type persistence in web config to fix provider dropdown resetting on refresh - Change OpenAI provider key to chatGPT to match bot_factory routing - Add DEEPSEEK constant and route it to ChatGPTBot (OpenAI-compatible API) - Keep backward compatibility for legacy bot_type glm-4 in bot_factory - Fix re.sub bad escape error on Windows paths by using lambda replacement - Remove unused pydantic import in minimax_bot.py Made-with: Cursor	2026-03-10 21:34:24 +08:00