rtc-voice-chat/backend/routes/v1/chat_callback.py
2026-04-02 20:15:15 +08:00

140 lines
4.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
POST /api/chat_callback — 自定义 LLM 回调SSE 流式响应)
"""
import json
from fastapi import APIRouter, Request
from fastapi.responses import StreamingResponse
from schemas.chat import ChatCallbackRequest
from services.local_llm_service import local_llm_service
from services.scene_service import ensure_custom_llm_authorized, get_custom_llm_callback_settings
from services.session_store import get_room_history
from utils.responses import custom_llm_error_response
router = APIRouter(tags=["LLM 回调"])
@router.post(
"/api/chat_callback",
summary="自定义 LLM 回调SSE 流式)",
description=(
"由**火山引擎 RTC 平台**在用户发言后自动回调,返回 OpenAI 兼容格式的 SSE 流。\n\n"
"处理逻辑:\n"
"1. 校验 `Authorization: Bearer <CUSTOM_LLM_API_KEY>`\n"
"2. 过滤掉 RTC 平台发送的 `欢迎语` 触发词(非真实用户输入)\n"
"3. 若携带 `room_id` Query 参数,自动从缓存取历史并 prepend 到 messages 前\n"
"4. 调用本地 LLM工具调用 / RAG 按需触发),以 SSE 流返回结果\n\n"
"**鉴权**`Authorization: Bearer <CUSTOM_LLM_API_KEY>`"
),
responses={
401: {"description": "API Key 无效"},
400: {"description": "messages 为空或最后一条不是 user 角色"},
500: {"description": "LLM 初始化失败"},
},
)
async def chat_callback(request: Request, body: ChatCallbackRequest):
try:
settings = get_custom_llm_callback_settings()
ensure_custom_llm_authorized(request, settings["api_key"])
except PermissionError as exc:
return custom_llm_error_response(
str(exc),
code="AuthenticationError",
status_code=401,
)
except ValueError as exc:
return custom_llm_error_response(str(exc))
except Exception as exc:
return custom_llm_error_response(
f"解析请求失败: {exc}",
code="InternalError",
status_code=500,
)
messages = [m.model_dump() for m in body.messages]
if not messages:
return custom_llm_error_response(
"messages 不能为空",
code="BadRequest",
status_code=400,
)
# 过滤 RTC 平台的"欢迎语"触发词(不是真实用户输入)
messages = [m for m in messages if not (m["role"] == "user" and m["content"] == "欢迎语")]
# 注入历史对话上下文prepend 到当前会话消息前)
room_id = request.query_params.get("room_id", "")
if room_id:
history = get_room_history(room_id)
if history:
messages = history + messages
if not messages:
return custom_llm_error_response(
"messages 不能为空",
code="BadRequest",
status_code=400,
)
last_message = messages[-1]
if last_message.get("role") != "user":
return custom_llm_error_response(
"最后一条消息必须是用户消息",
code="BadRequest",
status_code=400,
)
# RAG 已改为 tool 按需调用,不再预检索
try:
stream_iterator = local_llm_service.chat_stream(
history_messages=messages,
request_options={
"temperature": body.temperature,
"max_tokens": body.max_tokens,
"top_p": body.top_p,
},
)
except Exception as exc:
return custom_llm_error_response(
f"初始化本地 LLM 流式调用失败: {exc}",
code="InternalError",
status_code=500,
)
def generate_sse():
has_error = False
try:
for chunk in stream_iterator:
if chunk is None:
continue
if hasattr(chunk, "model_dump_json"):
chunk_json = chunk.model_dump_json()
else:
chunk_json = json.dumps(chunk, ensure_ascii=False)
yield f"data: {chunk_json}\n\n"
except GeneratorExit:
raise
except Exception as exc:
has_error = True
print(f"❌ /api/chat_callback 流式输出失败: {exc}")
if has_error:
print("⚠️ 已提前结束当前 SSE 流")
yield "data: [DONE]\n\n"
return StreamingResponse(
generate_sse(),
status_code=200,
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Access-Control-Allow-Origin": "*",
},
)