fix: web_fetch encoding

This commit is contained in:
zhayujie
2026-03-11 19:42:37 +08:00
parent fa61744c6d
commit d8374d0fa5
5 changed files with 80 additions and 0 deletions

View File

@@ -386,10 +386,39 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]:
"- 例如用自然表达例如「我已记住」而不是「已更新 MEMORY.md」",
"",
]
# Cloud deployment: inject websites directory info and access URL
cloud_website_lines = _build_cloud_website_section(workspace_dir)
if cloud_website_lines:
lines.extend(cloud_website_lines)
return lines
def _build_cloud_website_section(workspace_dir: str) -> List[str]:
"""Build cloud website access prompt when cloud deployment is configured."""
try:
from common.cloud_client import get_website_base_url
except Exception:
return []
base_url = get_website_base_url()
if not base_url:
return []
return [
"**网页/网站生成规则** (非常重要):",
"",
f"- 当需要编写网页、网站、H5页面等前端代码时**必须**将所有文件统一放到工作空间的 `websites/` 目录中(路径: `{workspace_dir}/websites/`",
f"- 云端已为该目录配置好路由映射,访问地址为: `{base_url}`",
f" - 例如: 你在 `websites/index.html` 创建了页面,访问地址就是 `{base_url}/index.html`",
f" - 例如: 你在 `websites/my-app/index.html` 创建了页面,访问地址就是 `{base_url}/my-app/index.html`",
"- **生成网页后,必须将完整的访问链接直接发送给用户**,让用户可以直接点击访问",
"- 建议为每个独立项目在 `websites/` 下创建子目录,保持结构清晰",
"",
]
def _build_context_files_section(context_files: List[ContextFile], language: str) -> List[str]:
"""构建项目上下文文件section"""
if not context_files:

View File

@@ -61,6 +61,10 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
# 创建skills子目录 (for workspace-level skills installed by agent)
skills_dir = os.path.join(workspace_dir, "skills")
os.makedirs(skills_dir, exist_ok=True)
# 创建websites子目录 (for web pages / sites generated by agent)
websites_dir = os.path.join(workspace_dir, "websites")
os.makedirs(websites_dir, exist_ok=True)
# 如果需要,创建模板文件
if create_templates:

View File

@@ -114,6 +114,14 @@ class WebFetch(BaseTool):
if self._is_binary_content_type(content_type) and not _is_document_url(url):
return self._handle_download_by_content_type(url, response, content_type)
# Fix encoding: use apparent_encoding to auto-detect, but keep Windows encodings as-is
if response.apparent_encoding and response.apparent_encoding.lower().startswith("windows"):
response.encoding = response.encoding
else:
response.encoding = response.apparent_encoding
if not response.encoding:
response.encoding = "utf-8"
html = response.text
title = self._extract_title(html)
text = self._extract_text(html)

View File

@@ -516,6 +516,44 @@ class CloudClient(LinkAIClient):
logger.error(f"[CloudClient] Failed to save configuration to config.json: {e}")
def get_root_domain(host: str = "") -> str:
"""Extract root domain from a hostname.
If *host* is empty, reads CLOUD_HOST env var / cloud_host config.
"""
if not host:
host = os.environ.get("CLOUD_HOST") or conf().get("cloud_host", "")
if not host:
return ""
host = host.strip().rstrip("/")
if "://" in host:
host = host.split("://", 1)[1]
host = host.split("/", 1)[0].split(":")[0]
parts = host.split(".")
if len(parts) >= 2:
return ".".join(parts[-2:])
return host
def get_deployment_id() -> str:
"""Return cloud deployment id from env var or config."""
return os.environ.get("CLOUD_DEPLOYMENT_ID") or conf().get("cloud_deployment_id", "")
def get_website_base_url() -> str:
"""Return the public URL prefix that maps to the workspace websites/ dir.
Returns empty string when cloud deployment is not configured.
"""
deployment_id = get_deployment_id()
if not deployment_id:
return ""
domain = get_root_domain()
if not domain:
return ""
return f"https://app.{domain}/{deployment_id}"
def start(channel, channel_mgr=None):
global chat_client
chat_client = CloudClient(api_key=conf().get("linkai_api_key"), host=conf().get("cloud_host", ""), channel=channel)

View File

@@ -188,6 +188,7 @@ available_setting = {
"linkai_app_code": "",
"linkai_api_base": "https://api.link-ai.tech", # linkAI服务地址
"cloud_host": "client.link-ai.tech",
"cloud_deployment_id": "",
"minimax_api_key": "",
"Minimax_group_id": "",
"Minimax_base_url": "",