rtc-voice-chat/backend/config/custom_scene.py

206 lines
7.5 KiB
Python

"""
Copyright 2025 Beijing Volcano Engine Technology Co., Ltd. All Rights Reserved.
SPDX-license-identifier: BSD-3-Clause
"""
from typing import Any
from utils.env import (
env_bool,
env_int,
env_json_object,
env_list,
env_number,
env_optional_bool,
env_optional_int,
env_optional_number,
env_str,
require_env,
set_if_present,
)
CUSTOM_SCENE_ID = "Custom"
DEFAULT_SCENE_NAME = "自定义助手"
DEFAULT_SCENE_ICON = (
"https://lf3-rtc-demo.volccdn.com/obj/rtc-aigc-assets/DoubaoAvatar.png"
)
DEFAULT_LLM_THINKING_TYPE = "disabled"
DEFAULT_RTC_OPENAPI_VERSION = "2025-06-01"
DEFAULT_ASR_PROVIDER = "volcano"
DEFAULT_ASR_MODE = "smallmodel"
DEFAULT_ASR_CLUSTER = "volcengine_streaming_common"
DEFAULT_TTS_PROVIDER = "volcano"
DEFAULT_TTS_CLUSTER = "volcano_tts"
DEFAULT_TTS_VOICE_TYPE = "BV001_streaming"
DEFAULT_AVATAR_TYPE = "3min"
DEFAULT_AVATAR_ROLE = "250623-zhibo-linyunzhi"
DEFAULT_AVATAR_VIDEO_BITRATE = 2000
def get_rtc_openapi_version() -> str:
return env_str("RTC_OPENAPI_VERSION", DEFAULT_RTC_OPENAPI_VERSION)
def build_llm_settings_from_env(missing: list[str]) -> dict[str, Any]:
settings = {
"system_message": require_env("CUSTOM_LLM_SYSTEM_MESSAGE", missing),
"vision_enable": env_bool("CUSTOM_LLM_VISION_ENABLE", False),
"thinking_type": env_str(
"CUSTOM_LLM_THINKING_TYPE", DEFAULT_LLM_THINKING_TYPE
),
"url": require_env("CUSTOM_LLM_URL", missing),
"api_key": env_str("CUSTOM_LLM_API_KEY"),
"model_name": env_str("CUSTOM_LLM_MODEL_NAME"),
"history_length": env_optional_int("CUSTOM_LLM_HISTORY_LENGTH"),
"prefill": env_optional_bool("CUSTOM_LLM_PREFILL"),
"custom": env_str("CUSTOM_LLM_CUSTOM"),
"extra_header": env_json_object("CUSTOM_LLM_EXTRA_HEADER_JSON"),
"enable_parallel_tool_calls": env_optional_bool(
"CUSTOM_LLM_ENABLE_PARALLEL_TOOL_CALLS"
),
"temperature": env_optional_number("CUSTOM_LLM_TEMPERATURE"),
"top_p": env_optional_number("CUSTOM_LLM_TOP_P"),
"max_tokens": env_optional_int("CUSTOM_LLM_MAX_TOKENS"),
}
require_env("LOCAL_LLM_API_KEY", missing)
require_env("LOCAL_LLM_MODEL", missing)
return settings
def build_llm_config(llm_settings: dict[str, Any]) -> dict[str, Any]:
llm_config = {
"Mode": "CustomLLM",
"SystemMessages": [llm_settings["system_message"]],
"VisionConfig": {
"Enable": llm_settings["vision_enable"],
},
"Url": llm_settings["url"],
}
if llm_settings["api_key"]:
llm_config["APIKey"] = llm_settings["api_key"]
optional_fields = {
"ModelName": llm_settings["model_name"],
"HistoryLength": llm_settings["history_length"],
"Prefill": llm_settings["prefill"],
"Custom": llm_settings["custom"],
"ExtraHeader": llm_settings["extra_header"],
"EnableParallelToolCalls": llm_settings["enable_parallel_tool_calls"],
"Temperature": llm_settings["temperature"],
"TopP": llm_settings["top_p"],
"MaxTokens": llm_settings["max_tokens"],
}
for key, value in optional_fields.items():
set_if_present(llm_config, key, value)
return llm_config
def build_custom_scene_from_env() -> dict[str, Any]:
missing: list[str] = []
access_key_id = require_env("CUSTOM_ACCESS_KEY_ID", missing)
secret_key = require_env("CUSTOM_SECRET_KEY", missing)
rtc_app_id = require_env("CUSTOM_RTC_APP_ID", missing)
task_id = require_env("CUSTOM_TASK_ID", missing)
agent_user_id = require_env("CUSTOM_AGENT_USER_ID", missing)
welcome_message = require_env("CUSTOM_AGENT_WELCOME_MESSAGE", missing)
asr_app_id = require_env("CUSTOM_ASR_APP_ID", missing)
tts_app_id = require_env("CUSTOM_TTS_APP_ID", missing)
llm_settings = build_llm_settings_from_env(missing)
rtc_app_key = env_str("CUSTOM_RTC_APP_KEY")
rtc_room_id = env_str("CUSTOM_RTC_ROOM_ID")
rtc_user_id = env_str("CUSTOM_RTC_USER_ID")
rtc_token = env_str("CUSTOM_RTC_TOKEN")
if not rtc_token and not rtc_app_key:
missing.append("CUSTOM_RTC_APP_KEY")
if missing:
missing_str = ", ".join(dict.fromkeys(missing))
raise ValueError(f"Custom 场景缺少以下环境变量: {missing_str}")
interrupt_mode = env_int("CUSTOM_INTERRUPT_MODE", 0)
avatar_enabled = env_bool("CUSTOM_AVATAR_ENABLED", False)
target_user_ids = env_list("CUSTOM_AGENT_TARGET_USER_ID")
if not target_user_ids:
target_user_ids = [rtc_user_id or ""]
return {
"SceneConfig": {
"icon": env_str("CUSTOM_SCENE_ICON", DEFAULT_SCENE_ICON),
"name": env_str("CUSTOM_SCENE_NAME", DEFAULT_SCENE_NAME),
},
"AccountConfig": {
"accessKeyId": access_key_id,
"secretKey": secret_key,
},
"RTCConfig": {
"AppId": rtc_app_id,
"AppKey": rtc_app_key,
"RoomId": rtc_room_id,
"UserId": rtc_user_id,
"Token": rtc_token,
},
"VoiceChat": {
"AppId": rtc_app_id,
"RoomId": rtc_room_id,
"TaskId": task_id,
"AgentConfig": {
"TargetUserId": target_user_ids,
"WelcomeMessage": welcome_message,
"UserId": agent_user_id,
"EnableConversationStateCallback": True,
},
"Config": {
"ASRConfig": {
"Provider": env_str("CUSTOM_ASR_PROVIDER", DEFAULT_ASR_PROVIDER),
"ProviderParams": {
"Mode": env_str("CUSTOM_ASR_MODE", DEFAULT_ASR_MODE),
"AppId": asr_app_id,
"Cluster": env_str("CUSTOM_ASR_CLUSTER", DEFAULT_ASR_CLUSTER),
},
},
"TTSConfig": {
"Provider": env_str("CUSTOM_TTS_PROVIDER", DEFAULT_TTS_PROVIDER),
"ProviderParams": {
"app": {
"appid": tts_app_id,
"cluster": env_str(
"CUSTOM_TTS_CLUSTER", DEFAULT_TTS_CLUSTER
),
},
"audio": {
"voice_type": env_str(
"CUSTOM_TTS_VOICE_TYPE", DEFAULT_TTS_VOICE_TYPE
),
"speed_ratio": env_number("CUSTOM_TTS_SPEED_RATIO", 1),
"pitch_ratio": env_number("CUSTOM_TTS_PITCH_RATIO", 1),
"volume_ratio": env_number("CUSTOM_TTS_VOLUME_RATIO", 1),
},
},
},
"LLMConfig": build_llm_config(llm_settings),
"InterruptMode": interrupt_mode,
},
"AvatarConfig": {
"Enabled": avatar_enabled,
"AvatarType": env_str("CUSTOM_AVATAR_TYPE", DEFAULT_AVATAR_TYPE),
"AvatarRole": env_str("CUSTOM_AVATAR_ROLE", DEFAULT_AVATAR_ROLE),
"BackgroundUrl": env_str("CUSTOM_AVATAR_BACKGROUND_URL"),
"VideoBitrate": env_int(
"CUSTOM_AVATAR_VIDEO_BITRATE", DEFAULT_AVATAR_VIDEO_BITRATE
),
"AvatarAppID": env_str("CUSTOM_AVATAR_APP_ID"),
"AvatarToken": env_str("CUSTOM_AVATAR_TOKEN"),
},
"InterruptMode": interrupt_mode,
},
}