mirror of
https://github.com/zhayujie/chatgpt-on-wechat.git
synced 2026-03-12 18:01:30 +08:00
Compare commits
4 Commits
4fec55cc01
...
ee0c47ac1e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee0c47ac1e | ||
|
|
eba90e9343 | ||
|
|
d8374d0fa5 | ||
|
|
fa61744c6d |
@@ -386,10 +386,24 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]:
|
||||
"- 例如用自然表达例如「我已记住」而不是「已更新 MEMORY.md」",
|
||||
"",
|
||||
]
|
||||
|
||||
# Cloud deployment: inject websites directory info and access URL
|
||||
cloud_website_lines = _build_cloud_website_section(workspace_dir)
|
||||
if cloud_website_lines:
|
||||
lines.extend(cloud_website_lines)
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def _build_cloud_website_section(workspace_dir: str) -> List[str]:
|
||||
"""Build cloud website access prompt when cloud deployment is configured."""
|
||||
try:
|
||||
from common.cloud_client import build_website_prompt
|
||||
return build_website_prompt(workspace_dir)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _build_context_files_section(context_files: List[ContextFile], language: str) -> List[str]:
|
||||
"""构建项目上下文文件section"""
|
||||
if not context_files:
|
||||
|
||||
@@ -42,14 +42,16 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
|
||||
Returns:
|
||||
WorkspaceFiles对象,包含所有文件路径
|
||||
"""
|
||||
# Check if this is a brand new workspace (before creating the directory)
|
||||
is_new_workspace = not os.path.exists(workspace_dir)
|
||||
# Check if this is a brand new workspace (AGENT.md not yet created).
|
||||
# Cannot rely on directory existence because other modules (e.g. ConversationStore)
|
||||
# may create the workspace directory before ensure_workspace is called.
|
||||
agent_path = os.path.join(workspace_dir, DEFAULT_AGENT_FILENAME)
|
||||
is_new_workspace = not os.path.exists(agent_path)
|
||||
|
||||
# 确保目录存在
|
||||
os.makedirs(workspace_dir, exist_ok=True)
|
||||
|
||||
# 定义文件路径
|
||||
agent_path = os.path.join(workspace_dir, DEFAULT_AGENT_FILENAME)
|
||||
user_path = os.path.join(workspace_dir, DEFAULT_USER_FILENAME)
|
||||
rule_path = os.path.join(workspace_dir, DEFAULT_RULE_FILENAME)
|
||||
memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME) # MEMORY.md 在根目录
|
||||
@@ -61,6 +63,10 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
|
||||
# 创建skills子目录 (for workspace-level skills installed by agent)
|
||||
skills_dir = os.path.join(workspace_dir, "skills")
|
||||
os.makedirs(skills_dir, exist_ok=True)
|
||||
|
||||
# 创建websites子目录 (for web pages / sites generated by agent)
|
||||
websites_dir = os.path.join(workspace_dir, "websites")
|
||||
os.makedirs(websites_dir, exist_ok=True)
|
||||
|
||||
# 如果需要,创建模板文件
|
||||
if create_templates:
|
||||
|
||||
@@ -29,7 +29,7 @@ DEFAULT_HEADERS = {
|
||||
|
||||
# Supported document file extensions
|
||||
PDF_SUFFIXES: Set[str] = {".pdf"}
|
||||
WORD_SUFFIXES: Set[str] = {".doc", ".docx"}
|
||||
WORD_SUFFIXES: Set[str] = {".docx"}
|
||||
TEXT_SUFFIXES: Set[str] = {".txt", ".md", ".markdown", ".rst", ".csv", ".tsv", ".log"}
|
||||
SPREADSHEET_SUFFIXES: Set[str] = {".xls", ".xlsx"}
|
||||
PPT_SUFFIXES: Set[str] = {".ppt", ".pptx"}
|
||||
@@ -56,7 +56,7 @@ class WebFetch(BaseTool):
|
||||
description: str = (
|
||||
"Fetch content from a URL. For web pages, extracts readable text. "
|
||||
"For document files (PDF, Word, TXT, Markdown, Excel, PPT), downloads and parses the file content. "
|
||||
"Supported file types: .pdf, .doc, .docx, .txt, .md, .csv, .xls, .xlsx, .ppt, .pptx"
|
||||
"Supported file types: .pdf, .docx, .txt, .md, .csv, .xls, .xlsx, .ppt, .pptx"
|
||||
)
|
||||
|
||||
params: dict = {
|
||||
@@ -114,6 +114,14 @@ class WebFetch(BaseTool):
|
||||
if self._is_binary_content_type(content_type) and not _is_document_url(url):
|
||||
return self._handle_download_by_content_type(url, response, content_type)
|
||||
|
||||
# Fix encoding: use apparent_encoding to auto-detect, but keep Windows encodings as-is
|
||||
if response.apparent_encoding and response.apparent_encoding.lower().startswith("windows"):
|
||||
response.encoding = response.encoding
|
||||
else:
|
||||
response.encoding = response.apparent_encoding
|
||||
if not response.encoding:
|
||||
response.encoding = "utf-8"
|
||||
|
||||
html = response.text
|
||||
title = self._extract_title(html)
|
||||
text = self._extract_text(html)
|
||||
@@ -226,29 +234,16 @@ class WebFetch(BaseTool):
|
||||
return "\n\n".join(text_parts)
|
||||
|
||||
def _parse_word(self, file_path: str) -> str:
|
||||
"""Extract text from Word documents (.doc/.docx)."""
|
||||
suffix = os.path.splitext(file_path)[-1].lower()
|
||||
|
||||
if suffix == ".docx":
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"python-docx library is required for .docx parsing. Install with: pip install python-docx"
|
||||
)
|
||||
doc = Document(file_path)
|
||||
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
||||
return "\n\n".join(paragraphs)
|
||||
|
||||
# .doc format - try textract or fallback
|
||||
"""Extract text from Word documents (.docx)."""
|
||||
try:
|
||||
import textract
|
||||
text = textract.process(file_path).decode("utf-8")
|
||||
return text
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"textract library is required for .doc parsing. Install with: pip install textract"
|
||||
"python-docx library is required for .docx parsing. Install with: pip install python-docx"
|
||||
)
|
||||
doc = Document(file_path)
|
||||
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
||||
return "\n\n".join(paragraphs)
|
||||
|
||||
def _parse_text(self, file_path: str) -> str:
|
||||
"""Read plain text files (txt, md, csv, etc.)."""
|
||||
@@ -344,7 +339,6 @@ class WebFetch(BaseTool):
|
||||
"""Check if Content-Type indicates a binary/document response."""
|
||||
binary_types = [
|
||||
"application/pdf",
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats",
|
||||
"application/vnd.ms-excel",
|
||||
"application/vnd.ms-powerpoint",
|
||||
@@ -358,7 +352,6 @@ class WebFetch(BaseTool):
|
||||
ct_lower = content_type.lower()
|
||||
suffix_map = {
|
||||
"application/pdf": ".pdf",
|
||||
"application/msword": ".doc",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml": ".docx",
|
||||
"application/vnd.ms-excel": ".xls",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml": ".xlsx",
|
||||
|
||||
@@ -516,6 +516,79 @@ class CloudClient(LinkAIClient):
|
||||
logger.error(f"[CloudClient] Failed to save configuration to config.json: {e}")
|
||||
|
||||
|
||||
def get_root_domain(host: str = "") -> str:
|
||||
"""Extract root domain from a hostname.
|
||||
|
||||
If *host* is empty, reads CLOUD_HOST env var / cloud_host config.
|
||||
"""
|
||||
if not host:
|
||||
host = os.environ.get("CLOUD_HOST") or conf().get("cloud_host", "")
|
||||
if not host:
|
||||
return ""
|
||||
host = host.strip().rstrip("/")
|
||||
if "://" in host:
|
||||
host = host.split("://", 1)[1]
|
||||
host = host.split("/", 1)[0].split(":")[0]
|
||||
parts = host.split(".")
|
||||
if len(parts) >= 2:
|
||||
return ".".join(parts[-2:])
|
||||
return host
|
||||
|
||||
|
||||
def get_deployment_id() -> str:
|
||||
"""Return cloud deployment id from env var or config."""
|
||||
return os.environ.get("CLOUD_DEPLOYMENT_ID") or conf().get("cloud_deployment_id", "")
|
||||
|
||||
|
||||
def get_website_base_url() -> str:
|
||||
"""Return the public URL prefix that maps to the workspace websites/ dir.
|
||||
|
||||
Returns empty string when cloud deployment is not configured.
|
||||
"""
|
||||
deployment_id = get_deployment_id()
|
||||
if not deployment_id:
|
||||
return ""
|
||||
|
||||
websites_domain = os.environ.get("CLOUD_WEBSITES_DOMAIN") or conf().get("cloud_websites_domain", "")
|
||||
if websites_domain:
|
||||
websites_domain = websites_domain.strip().rstrip("/")
|
||||
return f"https://{websites_domain}/{deployment_id}"
|
||||
|
||||
domain = get_root_domain()
|
||||
if not domain:
|
||||
return ""
|
||||
return f"https://app.{domain}/{deployment_id}"
|
||||
|
||||
|
||||
def build_website_prompt(workspace_dir: str) -> list:
|
||||
"""Build system prompt lines for cloud website/file sharing rules.
|
||||
|
||||
Returns an empty list when cloud deployment is not configured,
|
||||
so callers can safely do ``lines.extend(build_website_prompt(...))``.
|
||||
"""
|
||||
base_url = get_website_base_url()
|
||||
if not base_url:
|
||||
return []
|
||||
|
||||
return [
|
||||
"**文件分享与网页生成规则** (非常重要 — 当前为云部署模式):",
|
||||
"",
|
||||
f"云端已为工作空间的 `websites/` 目录配置好公网路由映射,访问地址前缀为: `{base_url}`",
|
||||
"",
|
||||
"1. **网页/网站**: 编写网页、H5页面等前端代码时,**必须**将文件放到 `websites/` 目录中",
|
||||
f" - 例如: `websites/index.html` → `{base_url}/index.html`",
|
||||
f" - 例如: `websites/my-app/index.html` → `{base_url}/my-app/index.html`",
|
||||
"",
|
||||
"2. **生成文件分享** (PPT、PDF、图片、音视频等): 当你为用户生成了需要下载或查看的文件时,**可以**将文件保存到 `websites/` 目录中",
|
||||
f" - 例如: 生成的PPT保存到 `websites/files/report.pptx` → 下载链接为 `{base_url}/files/report.pptx`",
|
||||
" - 你仍然可以同时使用 `send` 工具发送文件(在飞书、钉钉等IM渠道中有效),但**必须同时在回复文本中提供下载链接**作为兜底,因为部分渠道(如网页端)无法通过 send 接收本地文件",
|
||||
"",
|
||||
"3. **必须发送链接**: 无论是网页还是文件,生成后**必须将完整的访问/下载链接直接写在回复文本中发送给用户**",
|
||||
"",
|
||||
"4. 建议为每个独立项目在 `websites/` 下创建子目录,保持结构清晰",
|
||||
"",
|
||||
]
|
||||
|
||||
def start(channel, channel_mgr=None):
|
||||
global chat_client
|
||||
chat_client = CloudClient(api_key=conf().get("linkai_api_key"), host=conf().get("cloud_host", ""), channel=channel)
|
||||
|
||||
@@ -188,6 +188,7 @@ available_setting = {
|
||||
"linkai_app_code": "",
|
||||
"linkai_api_base": "https://api.link-ai.tech", # linkAI服务地址
|
||||
"cloud_host": "client.link-ai.tech",
|
||||
"cloud_deployment_id": "",
|
||||
"minimax_api_key": "",
|
||||
"Minimax_group_id": "",
|
||||
"Minimax_base_url": "",
|
||||
|
||||
@@ -29,3 +29,9 @@ google-generativeai
|
||||
|
||||
# tencentcloud sdk
|
||||
tencentcloud-sdk-python>=3.0.0
|
||||
|
||||
# file parsing (web_fetch document support)
|
||||
pypdf
|
||||
python-docx
|
||||
openpyxl
|
||||
python-pptx
|
||||
|
||||
Reference in New Issue
Block a user