YuJunZhiXue
diff --git a/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 3 additions & 3 deletions b/‎Dockerfile‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backend/adapter/cli_proxy.py‎
Lines changed: 7 additions & 0 deletions b/‎backend/adapter/cli_proxy.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎backend/adapter/standard_request.py‎
Lines changed: 3 additions & 0 deletions b/‎backend/adapter/standard_request.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backend/api/admin.py‎
Lines changed: 13 additions & 25 deletions b/‎backend/api/admin.py‎
Lines changed: 13 additions & 25 deletions
diff --git a/‎backend/api/anthropic.py‎
Lines changed: 79 additions & 19 deletions b/‎backend/api/anthropic.py‎
Lines changed: 79 additions & 19 deletions
@@ -67,3 +67,5 @@ test_*.py
 .dockerignore
 Dockerfile*
 docker-compose*.yml
+**/*.tsbuildinfo
+frontend/node_modules/.tmp/
@@ -43,3 +43,4 @@ fix_*.py
 # local cleanup
 tests/
 docs/superpowers/
+*.tsbuildinfo
@@ -55,19 +55,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && rm -rf /var/lib/apt/lists/*
 
 COPY backend/requirements.txt /tmp/requirements.txt
-RUN pip install -r /tmp/requirements.txt
+RUN python -m pip install --upgrade pip && python -m pip install -r /tmp/requirements.txt
 
 # Download Camoufox browser at build time so runtime hosts do not need to fetch it again.
 RUN python -m camoufox fetch
 
 COPY backend/ ./backend/
 COPY start.py ./
-COPY --from=frontend-builder /app/dist ./frontend/dist
 RUN mkdir -p /workspace/data /workspace/logs /workspace/frontend
+COPY --from=frontend-builder /app/dist ./frontend/dist
 
 EXPOSE 7860
 
 HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
     CMD curl -fsS "http://127.0.0.1:${PORT:-7860}/healthz" || exit 1
 
-CMD ["sh", "-c", "python -m uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-7860} --workers ${WORKERS:-1}"]
+CMD ["sh", "-c", "exec python -m uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-7860} --workers ${WORKERS:-1}"]
@@ -8,6 +8,7 @@
 from backend.adapter.standard_request import StandardRequest, CLAUDE_CODE_OPENAI_PROFILE
 from backend.core.config import resolve_model
 from backend.services.prompt_builder import messages_to_prompt
+from backend.services.workspace_context import derive_workspace_root
 from backend.toolcall.normalize import build_tool_name_registry
 
 log = logging.getLogger("qwen2api.cli_proxy")
@@ -32,6 +33,8 @@ def from_openai(req_data: dict, *, client_profile: str = CLAUDE_CODE_OPENAI_PROF
             StandardRequest: 统一的标准请求对象
         """
         model_name = req_data.get("model", "gpt-4o")
+        workspace_root = derive_workspace_root(req_data)
+        req_data = {**req_data, "_workspace_root": workspace_root}
         prompt_result = messages_to_prompt(req_data, client_profile=client_profile)
 
         tools = prompt_result.tools
@@ -53,6 +56,7 @@ def from_openai(req_data: dict, *, client_profile: str = CLAUDE_CODE_OPENAI_PROF
             tool_names=tool_names,
             tool_name_registry=build_tool_name_registry(tool_names),
             tool_enabled=prompt_result.tool_enabled,
+            workspace_root=workspace_root,
         )
 
     @staticmethod
@@ -68,6 +72,8 @@ def from_anthropic(req_data: dict, *, client_profile: str = CLAUDE_CODE_OPENAI_P
             StandardRequest: 统一的标准请求对象
         """
         model_name = req_data.get("model", "claude-3-5-sonnet")
+        workspace_root = derive_workspace_root(req_data)
+        req_data = {**req_data, "_workspace_root": workspace_root}
         prompt_result = messages_to_prompt(req_data, client_profile=client_profile)
 
         tools = prompt_result.tools
@@ -89,6 +95,7 @@ def from_anthropic(req_data: dict, *, client_profile: str = CLAUDE_CODE_OPENAI_P
             tool_names=tool_names,
             tool_name_registry=build_tool_name_registry(tool_names),
             tool_enabled=prompt_result.tool_enabled,
+            workspace_root=workspace_root,
         )
 
     @staticmethod
 
@@ -36,3 +36,6 @@ class StandardRequest:
     persistent_session: bool = False
     session_message_hashes: list[str] = field(default_factory=list)
     session_chat_invalidated: bool = False
+    workspace_root: str | None = None
+    retry_blocked_tools: list[str] = field(default_factory=list)
+    retry_read_blocklist: list[str] = field(default_factory=list)
@@ -187,27 +187,25 @@ async def register_new_account(request: Request):
 
 @router.post("/verify", dependencies=[Depends(verify_admin)])
 async def verify_all_accounts(request: Request):
-    """验证所有账号的有效性 (完全复原单文件逻辑)"""
+    """逐个到 chat.qwen.ai 官网验证账号；token 失效时自动刷新。"""
     from backend.core.account_pool import AccountPool
     from backend.services.qwen_client import QwenClient
-    import logging
 
-    log = logging.getLogger("qwen2api.admin")
     pool: AccountPool = request.app.state.account_pool
     client: QwenClient = request.app.state.qwen_client
 
     results = []
     for acc in pool.accounts:
-        is_valid = await client.verify_token(acc.token)
-        if not is_valid and acc.password:
-            log.info(f"[校验] {acc.email} token失效，尝试自动刷新...")
-            is_valid = await client.auth_resolver.refresh_token(acc)
-
-        acc.valid = is_valid
-        results.append({"email": acc.email, "valid": is_valid, "refreshed": not is_valid})
-
-    await pool.save() # 直接保存全部状态，不调用 mark_invalid 以免熔断影响测试
-    return {"ok": True, "results": results}
+        results.append(await client.verify_account(acc))
+
+    summary = {
+        "total": len(results),
+        "valid": sum(1 for item in results if item.get("valid")),
+        "refreshed": sum(1 for item in results if item.get("refreshed")),
+        "banned": sum(1 for item in results if item.get("status_code") == "banned"),
+        "failed": sum(1 for item in results if not item.get("valid")),
+    }
+    return {"ok": True, "results": results, "summary": summary, "concurrency": 1}
 
 @router.post("/accounts/{email}/activate", dependencies=[Depends(verify_admin)])
 async def activate_account(email: str, request: Request):
@@ -238,28 +236,18 @@ async def activate_account(email: str, request: Request):
 
 @router.post("/accounts/{email}/verify", dependencies=[Depends(verify_admin)])
 async def verify_account(email: str, request: Request):
-    """单独验证某个账号的有效性 (完全复原单文件逻辑)"""
+    """单独到 chat.qwen.ai 官网验证账号；token 失效时自动刷新。"""
     from backend.services.qwen_client import QwenClient
     from backend.core.account_pool import AccountPool
-    import logging
 
-    log = logging.getLogger("qwen2api.admin")
     pool: AccountPool = request.app.state.account_pool
     client: QwenClient = request.app.state.qwen_client
 
     acc = next((a for a in pool.accounts if a.email == email), None)
     if not acc:
         raise HTTPException(status_code=404, detail="Account not found")
 
-    is_valid = await client.verify_token(acc.token)
-    if not is_valid and acc.password:
-        log.info(f"[校验] {acc.email} token失效，尝试自动刷新...")
-        is_valid = await client.auth_resolver.refresh_token(acc)
-
-    acc.valid = is_valid
-    await pool.save() # 直接保存，不调用 mark_invalid 以免熔断影响正常测试
-
-    return {"email": acc.email, "valid": is_valid}
+    return await client.verify_account(acc)
 
 @router.delete("/accounts/{email}", dependencies=[Depends(verify_admin)])
 async def delete_account(email: str, request: Request):
 
@@ -32,12 +32,35 @@
     plan_persistent_session_turn,
 )
 from backend.services.token_calc import count_tokens
+from backend.services.workspace_context import derive_workspace_root
 from backend.toolcall.normalize import build_tool_name_registry
 
 log = logging.getLogger("qwen2api.anthropic")
 router = APIRouter()
 
 
+def _tool_input_preview(input_data, *, limit: int = 260) -> str:
+    try:
+        raw = json.dumps(input_data if input_data is not None else {}, ensure_ascii=False, sort_keys=True)
+    except (TypeError, ValueError):
+        raw = repr(input_data)
+    return " ".join(raw.split())[:limit] + ("...[truncated]" if len(raw) > limit else "")
+
+
+def _log_response_tool_blocks(stage: str, blocks: list[dict]) -> None:
+    for idx, block in enumerate(blocks, start=1):
+        if not isinstance(block, dict) or block.get("type") != "tool_use":
+            continue
+        log.info(
+            "[ANT-ToolOut] stage=%s index=%s id=%s name=%s input=%s",
+            stage,
+            idx,
+            block.get("id", "-"),
+            block.get("name", "-"),
+            _tool_input_preview(block.get("input", {})),
+        )
+
+
 class _AnthropicStreamState:
     def __init__(self, *, msg_id: str, model_name: str, prompt: str):
         self.msg_id = msg_id
@@ -125,8 +148,9 @@ def clear_answer_text(self) -> None:
 
 
 def _build_standard_request(req_data: dict) -> StandardRequest:
-    """使用 CLIProxy 进行协议转换"""
+    """浣跨敤 CLIProxy 杩涜鍗忚杞崲"""
     standard_request = CLIProxy.from_anthropic(req_data, client_profile=CLAUDE_CODE_OPENAI_PROFILE)
+    standard_request.workspace_root = derive_workspace_root(req_data)
     CLIProxy.log_conversion("anthropic", standard_request.response_model, len(standard_request.prompt), len(standard_request.tools))
     return standard_request
 
@@ -149,7 +173,7 @@ async def _run_anthropic_attempt(
     max_attempts: int,
 ):
     update_request_context(stream_attempt=stream_attempt + 1)
-    execution = await collect_completion_run(client, standard_request, current_prompt)
+    execution = await collect_completion_run(client, standard_request, current_prompt, history_messages=history_messages)
     retry = evaluate_retry_directive(
         request=standard_request,
         current_prompt=current_prompt,
@@ -198,9 +222,9 @@ async def anthropic_count_tokens(request: Request):
     prompt_result = messages_to_prompt(req_data, client_profile=CLAUDE_CODE_OPENAI_PROFILE)
     base_tokens = count_tokens(prompt_result.prompt)
     # Context Pressure Inflation:
-    # Claude Code 假设 context window=200K，到 ~80%(160K) 触发自动压缩。
-    # 但 Qwen 实际上游 window 只有 ~150K，到 ~120K 时就开始挤压输出预算。
-    # 虚增 input_tokens 1.35x 让 CC 提前触发压缩，避免爆 window。
+    # Claude Code 鍋囪 context window=200K锛屽埌 ~80%(160K) 瑙﹀彂鑷姩鍘嬬缉銆?
+    # 浣?Qwen 瀹為檯涓婃父 window 鍙湁 ~150K锛屽埌 ~120K 鏃跺氨寮€濮嬫尋鍘嬭緭鍑洪绠椼€?
+    # 铏氬 input_tokens 1.35x 璁?CC 鎻愬墠瑙﹀彂鍘嬬缉锛岄伩鍏嶇垎 window銆?
     inflation = 1.35
     inflated = int(base_tokens * inflation)
     return JSONResponse({"input_tokens": inflated})
@@ -288,7 +312,17 @@ async def generate():
                 async with app.state.session_locks.hold(session_key):
                     standard_request, effective_payload, model_name, qwen_model, prompt, msg_id = await prepare_locked_request(req_data)
                     update_request_context(requested_model=model_name, resolved_model=qwen_model)
-                    log.info(f"[ANT] model={qwen_model}, stream={standard_request.stream}, tool_enabled={standard_request.tool_enabled}, tools={[t.get('name') for t in standard_request.tools]}, prompt_len={len(prompt)}")
+                    tool_names = [t.get('name') for t in standard_request.tools]
+                    log.info(
+                        "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s",
+                        qwen_model,
+                        standard_request.stream,
+                        standard_request.tool_enabled,
+                        tool_names,
+                        [name for name in tool_names if isinstance(name, str) and name.startswith("mcp__")],
+                        standard_request.workspace_root or "-",
+                        len(prompt),
+                    )
                     history_messages = original_history_messages
                     current_prompt = prompt
                     max_attempts = request_max_attempts(standard_request)
@@ -329,6 +363,7 @@ async def on_delta(evt, text_chunk, _):
                                 max_continuation=2,
                                 warmup_chars=64,
                                 guard_chars=96,
+                                history_messages=history_messages,
                             )
                             retry = evaluate_retry_directive(
                                 request=standard_request,
@@ -341,11 +376,11 @@ async def on_delta(evt, text_chunk, _):
                             )
                             if retry.retry:
                                 reused_persistent_chat = bool(standard_request.persistent_session and standard_request.upstream_chat_id)
-                                # 如果正在复用会话，重试时保留会话，避免删除后重建导致上下文丢失
+                                # 濡傛灉姝ｅ湪澶嶇敤浼氳瘽锛岄噸璇曟椂淇濈暀浼氳瘽锛岄伩鍏嶅垹闄ゅ悗閲嶅缓瀵艰嚧涓婁笅鏂囦涪澶?
                                 preserve_chat = reused_persistent_chat
                                 await cleanup_runtime_resources(client, execution.acc, execution.chat_id, preserve_chat=preserve_chat)
                                 if reused_persistent_chat:
-                                    # 保留 upstream_chat_id，在同一会话中重试
+                                    # 淇濈暀 upstream_chat_id锛屽湪鍚屼竴浼氳瘽涓噸璇?
                                     # standard_request.session_chat_invalidated = True
                                     # standard_request.upstream_chat_id = None
                                     current_prompt = build_retry_rebase_prompt(standard_request, reason=retry.reason)
@@ -357,13 +392,28 @@ async def on_delta(evt, text_chunk, _):
                             if not stream_state.pending_chunks:
                                 stream_state.pending_chunks.append(_message_start_event(msg_id, model_name, current_prompt, execution.state.answer_text))
 
-                            stream_state.close_current_block()
-                            directive = build_tool_directive(standard_request, execution.state)
+                            directive = build_tool_directive(standard_request, execution.state, history_messages=history_messages)
+                            if (
+                                directive.stop_reason != "tool_use"
+                                and not stream_state.answer_text_buffer
+                                and execution.state.answer_text
+                            ):
+                                # ToolSieve may hold short normal replies until stream end to
+                                # avoid leaking partial tool markup. If no live text delta was
+                                # emitted, replay the finalized visible answer here.
+                                stream_state.buffer_answer_text(execution.state.answer_text)
+                            visible_answer_length = _visible_answer_text_length(
+                                directive=directive,
+                                execution=execution,
+                                stream_state=stream_state,
+                            )
                             if directive.stop_reason == "tool_use":
                                 stream_state.clear_answer_text()
+                                stream_state.close_current_block()
                                 stream_state.current_block = {"type": None, "index": None, "tool_call_id": None}
                             else:
                                 stream_state.flush_answer_text()
+                                stream_state.close_current_block()
                             expected_tool_ids = {
                                 block.get("id")
                                 for block in directive.tool_blocks
@@ -381,11 +431,8 @@ async def on_delta(evt, text_chunk, _):
                                 )
                                 stream_state.close_current_block()
 
-                            visible_answer_length = _visible_answer_text_length(
-                                directive=directive,
-                                execution=execution,
-                                stream_state=stream_state,
-                            )
+                            _log_response_tool_blocks("stream_response", directive.tool_blocks)
+
                             stop_reason = "tool_use" if expected_tool_ids else "end_turn"
                             stream_state.pending_chunks.append(stream_presenter.anthropic_message_delta(stop_reason, visible_answer_length))
                             stream_state.pending_chunks.append(stream_presenter.anthropic_message_stop())
@@ -435,7 +482,17 @@ async def on_delta(evt, text_chunk, _):
         async with app.state.session_locks.hold(session_key):
             standard_request, effective_payload, model_name, qwen_model, prompt, msg_id = await prepare_locked_request(req_data)
             update_request_context(requested_model=model_name, resolved_model=qwen_model)
-            log.info(f"[ANT] model={qwen_model}, stream={standard_request.stream}, tool_enabled={standard_request.tool_enabled}, tools={[t.get('name') for t in standard_request.tools]}, prompt_len={len(prompt)}")
+            tool_names = [t.get('name') for t in standard_request.tools]
+            log.info(
+                "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s",
+                qwen_model,
+                standard_request.stream,
+                standard_request.tool_enabled,
+                tool_names,
+                [name for name in tool_names if isinstance(name, str) and name.startswith("mcp__")],
+                standard_request.workspace_root or "-",
+                len(prompt),
+            )
             history_messages = original_history_messages
             current_prompt = prompt
             max_attempts = request_max_attempts(standard_request)
@@ -451,11 +508,11 @@ async def on_delta(evt, text_chunk, _):
                     )
                     if retry.retry:
                         reused_persistent_chat = bool(standard_request.persistent_session and standard_request.upstream_chat_id)
-                        # 如果正在复用会话，重试时保留会话，避免删除后重建导致上下文丢失
+                        # 濡傛灉姝ｅ湪澶嶇敤浼氳瘽锛岄噸璇曟椂淇濈暀浼氳瘽锛岄伩鍏嶅垹闄ゅ悗閲嶅缓瀵艰嚧涓婁笅鏂囦涪澶?
                         preserve_chat = reused_persistent_chat
                         await cleanup_runtime_resources(client, execution.acc, execution.chat_id, preserve_chat=preserve_chat)
                         if reused_persistent_chat:
-                            # 保留 upstream_chat_id，在同一会话中重试
+                            # 淇濈暀 upstream_chat_id锛屽湪鍚屼竴浼氳瘽涓噸璇?
                             # standard_request.session_chat_invalidated = True
                             # standard_request.upstream_chat_id = None
                             current_prompt = build_retry_rebase_prompt(standard_request, reason=retry.reason)
@@ -464,7 +521,8 @@ async def on_delta(evt, text_chunk, _):
                         await _reacquire_bound_account_if_needed(client=client, standard_request=standard_request)
                         continue
 
-                    directive = build_tool_directive(standard_request, execution.state)
+                    directive = build_tool_directive(standard_request, execution.state, history_messages=history_messages)
+                    _log_response_tool_blocks("json_response", directive.tool_blocks)
                     content_blocks: list[dict] = []
                     if execution.state.reasoning_text:
                         content_blocks.append({"type": "thinking", "thinking": execution.state.reasoning_text})
@@ -511,3 +569,5 @@ async def on_delta(evt, text_chunk, _):
                     if stream_attempt == max_attempts - 1:
                         await clear_invalidated_session_chat(app=app, request=standard_request)
                         raise HTTPException(status_code=500, detail=str(e))
+
+