rtc-voice-chat/backend/routes/debug.py
2026-03-30 10:39:19 +08:00

88 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
调试端点POST /debug/chat, GET /debug/rag
"""
import json
import time
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from schemas.chat import DebugChatRequest
from services.local_llm_service import local_llm_service
from services.rag_service import rag_service
router = APIRouter(prefix="/debug")
@router.post("/chat")
async def debug_chat(request: DebugChatRequest):
current_messages = [
{"role": message.role, "content": message.content} for message in request.history
]
current_messages.append({"role": "user", "content": request.question})
start_time = time.time()
rag_context = await rag_service.retrieve(request.question)
stream_iterator = local_llm_service.chat_stream(
history_messages=current_messages,
rag_context=rag_context,
)
def generate_text():
full_ai_response = ""
total_usage = None
for chunk in stream_iterator:
if chunk is None:
continue
choices = getattr(chunk, "choices", None) or []
if choices:
delta = getattr(choices[0], "delta", None)
content = getattr(delta, "content", None)
if content:
full_ai_response += content
yield content
usage = getattr(chunk, "usage", None)
if usage:
total_usage = usage
print(f"DEBUG: LLM 调用耗时: {time.time() - start_time:.2f}s")
if total_usage:
print(
"🎫 Token 统计: "
f"Total={total_usage.total_tokens} "
f"(P:{total_usage.prompt_tokens}, C:{total_usage.completion_tokens})"
)
new_history = [
{"role": message.role, "content": message.content}
for message in request.history
]
new_history.append({"role": "user", "content": request.question})
new_history.append({"role": "assistant", "content": full_ai_response})
print("\n" + "=" * 50)
print("🐞 调试完成!以下是可用于下次请求的 history 结构:")
print(json.dumps({"history": new_history}, ensure_ascii=False, indent=2))
print("=" * 50 + "\n")
return StreamingResponse(generate_text(), media_type="text/plain")
@router.get("/rag")
async def debug_rag(query: str):
if not query:
return {"error": "请提供 query 参数"}
print(f"🔍 [Debug] 正在检索知识库: {query}")
context = await rag_service.retrieve(query)
return {
"query": query,
"retrieved_context": context,
"length": len(context) if context else 0,
"status": "success" if context else "no_results_or_error",
}