3232 plan_persistent_session_turn ,
3333)
3434from backend .services .token_calc import count_tokens
35+ from backend .services .workspace_context import derive_workspace_root
3536from backend .toolcall .normalize import build_tool_name_registry
3637
3738log = logging .getLogger ("qwen2api.anthropic" )
3839router = APIRouter ()
3940
4041
42+ def _tool_input_preview (input_data , * , limit : int = 260 ) -> str :
43+ try :
44+ raw = json .dumps (input_data if input_data is not None else {}, ensure_ascii = False , sort_keys = True )
45+ except (TypeError , ValueError ):
46+ raw = repr (input_data )
47+ return " " .join (raw .split ())[:limit ] + ("...[truncated]" if len (raw ) > limit else "" )
48+
49+
50+ def _log_response_tool_blocks (stage : str , blocks : list [dict ]) -> None :
51+ for idx , block in enumerate (blocks , start = 1 ):
52+ if not isinstance (block , dict ) or block .get ("type" ) != "tool_use" :
53+ continue
54+ log .info (
55+ "[ANT-ToolOut] stage=%s index=%s id=%s name=%s input=%s" ,
56+ stage ,
57+ idx ,
58+ block .get ("id" , "-" ),
59+ block .get ("name" , "-" ),
60+ _tool_input_preview (block .get ("input" , {})),
61+ )
62+
63+
4164class _AnthropicStreamState :
4265 def __init__ (self , * , msg_id : str , model_name : str , prompt : str ):
4366 self .msg_id = msg_id
@@ -125,8 +148,9 @@ def clear_answer_text(self) -> None:
125148
126149
127150def _build_standard_request (req_data : dict ) -> StandardRequest :
128- """使用 CLIProxy 进行协议转换 """
151+ """浣跨敤 CLIProxy 杩涜鍗忚杞崲 """
129152 standard_request = CLIProxy .from_anthropic (req_data , client_profile = CLAUDE_CODE_OPENAI_PROFILE )
153+ standard_request .workspace_root = derive_workspace_root (req_data )
130154 CLIProxy .log_conversion ("anthropic" , standard_request .response_model , len (standard_request .prompt ), len (standard_request .tools ))
131155 return standard_request
132156
@@ -149,7 +173,7 @@ async def _run_anthropic_attempt(
149173 max_attempts : int ,
150174):
151175 update_request_context (stream_attempt = stream_attempt + 1 )
152- execution = await collect_completion_run (client , standard_request , current_prompt )
176+ execution = await collect_completion_run (client , standard_request , current_prompt , history_messages = history_messages )
153177 retry = evaluate_retry_directive (
154178 request = standard_request ,
155179 current_prompt = current_prompt ,
@@ -198,9 +222,9 @@ async def anthropic_count_tokens(request: Request):
198222 prompt_result = messages_to_prompt (req_data , client_profile = CLAUDE_CODE_OPENAI_PROFILE )
199223 base_tokens = count_tokens (prompt_result .prompt )
200224 # Context Pressure Inflation:
201- # Claude Code 假设 context window=200K,到 ~80%(160K) 触发自动压缩。
202- # 但 Qwen 实际上游 window 只有 ~150K,到 ~120K 时就开始挤压输出预算。
203- # 虚增 input_tokens 1.35x 让 CC 提前触发压缩,避免爆 window。
225+ # Claude Code 鍋囪 context window=200K锛屽埌 ~80%(160K) 瑙﹀彂鑷姩鍘嬬缉銆?
226+ # 浣? Qwen 瀹為檯涓婃父 window 鍙湁 ~150K锛屽埌 ~120K 鏃跺氨寮€濮嬫尋鍘嬭緭鍑洪绠椼€?
227+ # 铏氬 input_tokens 1.35x 璁? CC 鎻愬墠瑙﹀彂鍘嬬缉锛岄伩鍏嶇垎 window銆?
204228 inflation = 1.35
205229 inflated = int (base_tokens * inflation )
206230 return JSONResponse ({"input_tokens" : inflated })
@@ -288,7 +312,17 @@ async def generate():
288312 async with app .state .session_locks .hold (session_key ):
289313 standard_request , effective_payload , model_name , qwen_model , prompt , msg_id = await prepare_locked_request (req_data )
290314 update_request_context (requested_model = model_name , resolved_model = qwen_model )
291- log .info (f"[ANT] model={ qwen_model } , stream={ standard_request .stream } , tool_enabled={ standard_request .tool_enabled } , tools={ [t .get ('name' ) for t in standard_request .tools ]} , prompt_len={ len (prompt )} " )
315+ tool_names = [t .get ('name' ) for t in standard_request .tools ]
316+ log .info (
317+ "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s" ,
318+ qwen_model ,
319+ standard_request .stream ,
320+ standard_request .tool_enabled ,
321+ tool_names ,
322+ [name for name in tool_names if isinstance (name , str ) and name .startswith ("mcp__" )],
323+ standard_request .workspace_root or "-" ,
324+ len (prompt ),
325+ )
292326 history_messages = original_history_messages
293327 current_prompt = prompt
294328 max_attempts = request_max_attempts (standard_request )
@@ -329,6 +363,7 @@ async def on_delta(evt, text_chunk, _):
329363 max_continuation = 2 ,
330364 warmup_chars = 64 ,
331365 guard_chars = 96 ,
366+ history_messages = history_messages ,
332367 )
333368 retry = evaluate_retry_directive (
334369 request = standard_request ,
@@ -341,11 +376,11 @@ async def on_delta(evt, text_chunk, _):
341376 )
342377 if retry .retry :
343378 reused_persistent_chat = bool (standard_request .persistent_session and standard_request .upstream_chat_id )
344- # 如果正在复用会话,重试时保留会话,避免删除后重建导致上下文丢失
379+ # 濡傛灉姝e湪澶嶇敤浼氳瘽锛岄噸璇曟椂淇濈暀浼氳瘽锛岄伩鍏嶅垹闄ゅ悗閲嶅缓瀵艰嚧涓婁笅鏂囦涪澶?
345380 preserve_chat = reused_persistent_chat
346381 await cleanup_runtime_resources (client , execution .acc , execution .chat_id , preserve_chat = preserve_chat )
347382 if reused_persistent_chat :
348- # 保留 upstream_chat_id,在同一会话中重试
383+ # 淇濈暀 upstream_chat_id锛屽湪鍚屼竴浼氳瘽涓噸璇?
349384 # standard_request.session_chat_invalidated = True
350385 # standard_request.upstream_chat_id = None
351386 current_prompt = build_retry_rebase_prompt (standard_request , reason = retry .reason )
@@ -357,13 +392,28 @@ async def on_delta(evt, text_chunk, _):
357392 if not stream_state .pending_chunks :
358393 stream_state .pending_chunks .append (_message_start_event (msg_id , model_name , current_prompt , execution .state .answer_text ))
359394
360- stream_state .close_current_block ()
361- directive = build_tool_directive (standard_request , execution .state )
395+ directive = build_tool_directive (standard_request , execution .state , history_messages = history_messages )
396+ if (
397+ directive .stop_reason != "tool_use"
398+ and not stream_state .answer_text_buffer
399+ and execution .state .answer_text
400+ ):
401+ # ToolSieve may hold short normal replies until stream end to
402+ # avoid leaking partial tool markup. If no live text delta was
403+ # emitted, replay the finalized visible answer here.
404+ stream_state .buffer_answer_text (execution .state .answer_text )
405+ visible_answer_length = _visible_answer_text_length (
406+ directive = directive ,
407+ execution = execution ,
408+ stream_state = stream_state ,
409+ )
362410 if directive .stop_reason == "tool_use" :
363411 stream_state .clear_answer_text ()
412+ stream_state .close_current_block ()
364413 stream_state .current_block = {"type" : None , "index" : None , "tool_call_id" : None }
365414 else :
366415 stream_state .flush_answer_text ()
416+ stream_state .close_current_block ()
367417 expected_tool_ids = {
368418 block .get ("id" )
369419 for block in directive .tool_blocks
@@ -381,11 +431,8 @@ async def on_delta(evt, text_chunk, _):
381431 )
382432 stream_state .close_current_block ()
383433
384- visible_answer_length = _visible_answer_text_length (
385- directive = directive ,
386- execution = execution ,
387- stream_state = stream_state ,
388- )
434+ _log_response_tool_blocks ("stream_response" , directive .tool_blocks )
435+
389436 stop_reason = "tool_use" if expected_tool_ids else "end_turn"
390437 stream_state .pending_chunks .append (stream_presenter .anthropic_message_delta (stop_reason , visible_answer_length ))
391438 stream_state .pending_chunks .append (stream_presenter .anthropic_message_stop ())
@@ -435,7 +482,17 @@ async def on_delta(evt, text_chunk, _):
435482 async with app .state .session_locks .hold (session_key ):
436483 standard_request , effective_payload , model_name , qwen_model , prompt , msg_id = await prepare_locked_request (req_data )
437484 update_request_context (requested_model = model_name , resolved_model = qwen_model )
438- log .info (f"[ANT] model={ qwen_model } , stream={ standard_request .stream } , tool_enabled={ standard_request .tool_enabled } , tools={ [t .get ('name' ) for t in standard_request .tools ]} , prompt_len={ len (prompt )} " )
485+ tool_names = [t .get ('name' ) for t in standard_request .tools ]
486+ log .info (
487+ "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s" ,
488+ qwen_model ,
489+ standard_request .stream ,
490+ standard_request .tool_enabled ,
491+ tool_names ,
492+ [name for name in tool_names if isinstance (name , str ) and name .startswith ("mcp__" )],
493+ standard_request .workspace_root or "-" ,
494+ len (prompt ),
495+ )
439496 history_messages = original_history_messages
440497 current_prompt = prompt
441498 max_attempts = request_max_attempts (standard_request )
@@ -451,11 +508,11 @@ async def on_delta(evt, text_chunk, _):
451508 )
452509 if retry .retry :
453510 reused_persistent_chat = bool (standard_request .persistent_session and standard_request .upstream_chat_id )
454- # 如果正在复用会话,重试时保留会话,避免删除后重建导致上下文丢失
511+ # 濡傛灉姝e湪澶嶇敤浼氳瘽锛岄噸璇曟椂淇濈暀浼氳瘽锛岄伩鍏嶅垹闄ゅ悗閲嶅缓瀵艰嚧涓婁笅鏂囦涪澶?
455512 preserve_chat = reused_persistent_chat
456513 await cleanup_runtime_resources (client , execution .acc , execution .chat_id , preserve_chat = preserve_chat )
457514 if reused_persistent_chat :
458- # 保留 upstream_chat_id,在同一会话中重试
515+ # 淇濈暀 upstream_chat_id锛屽湪鍚屼竴浼氳瘽涓噸璇?
459516 # standard_request.session_chat_invalidated = True
460517 # standard_request.upstream_chat_id = None
461518 current_prompt = build_retry_rebase_prompt (standard_request , reason = retry .reason )
@@ -464,7 +521,8 @@ async def on_delta(evt, text_chunk, _):
464521 await _reacquire_bound_account_if_needed (client = client , standard_request = standard_request )
465522 continue
466523
467- directive = build_tool_directive (standard_request , execution .state )
524+ directive = build_tool_directive (standard_request , execution .state , history_messages = history_messages )
525+ _log_response_tool_blocks ("json_response" , directive .tool_blocks )
468526 content_blocks : list [dict ] = []
469527 if execution .state .reasoning_text :
470528 content_blocks .append ({"type" : "thinking" , "thinking" : execution .state .reasoning_text })
@@ -511,3 +569,5 @@ async def on_delta(evt, text_chunk, _):
511569 if stream_attempt == max_attempts - 1 :
512570 await clear_invalidated_session_chat (app = app , request = standard_request )
513571 raise HTTPException (status_code = 500 , detail = str (e ))
572+
573+
0 commit comments