rtc-voice-chat/simple-frontend/aigc-voice-client.js
2026-04-02 09:40:23 +08:00

640 lines
18 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 火山引擎 RTC AIGC 语音对话 - 原生 JS 接入实现
*
* 依赖:@volcengine/rtc (通过 <script> 或 npm 引入)
*
* 用法:
* 1. 在 HTML 中引入此文件和 RTC SDK
* 2. const client = new AigcVoiceClient({ serverUrl: 'http://localhost:3001' });
* 3. await client.init(); // 获取场景配置
* 4. await client.start(); // 开始语音对话
* 5. await client.stop(); // 结束语音对话
* 6. client.interrupt(); // 打断 AI
*/
// ============================================================
// TLV 编解码工具
// ============================================================
/**
* 将字符串编码为 TLV 格式的 ArrayBuffer
* TLV: | type(4B) | length(4B, big-endian) | value(UTF-8) |
*/
function string2tlv(str, type) {
const typeBuffer = new Uint8Array(4);
for (let i = 0; i < type.length && i < 4; i++) {
typeBuffer[i] = type.charCodeAt(i);
}
const valueBuffer = new TextEncoder().encode(str);
const length = valueBuffer.length;
const tlvBuffer = new Uint8Array(4 + 4 + length);
tlvBuffer.set(typeBuffer, 0);
// length: big-endian
tlvBuffer[4] = (length >> 24) & 0xff;
tlvBuffer[5] = (length >> 16) & 0xff;
tlvBuffer[6] = (length >> 8) & 0xff;
tlvBuffer[7] = length & 0xff;
tlvBuffer.set(valueBuffer, 8);
return tlvBuffer.buffer;
}
/**
* 将 TLV 格式的 ArrayBuffer 解码为 { type, value }
*/
function tlv2String(tlvBuffer) {
const typeBytes = new Uint8Array(tlvBuffer, 0, 4);
const lengthBytes = new Uint8Array(tlvBuffer, 4, 4);
const length =
(lengthBytes[0] << 24) |
(lengthBytes[1] << 16) |
(lengthBytes[2] << 8) |
lengthBytes[3];
let type = '';
for (let i = 0; i < 4; i++) {
if (typeBytes[i] !== 0) type += String.fromCharCode(typeBytes[i]);
}
const value = new TextDecoder().decode(new Uint8Array(tlvBuffer, 8, length));
return { type, value };
}
// ============================================================
// AI Agent 状态枚举
// ============================================================
const AgentStage = {
UNKNOWN: 0,
LISTENING: 1,
THINKING: 2,
SPEAKING: 3,
INTERRUPTED: 4,
FINISHED: 5,
};
// ============================================================
// 主类
// ============================================================
class AigcVoiceClient {
/**
* @param {Object} options
* @param {string} options.serverUrl - 后端地址, 例如 'http://localhost:3001'
* @param {string} [options.sceneId] - 指定场景 ID不传则使用第一个
*/
constructor(options = {}) {
this.serverUrl = options.serverUrl || 'http://localhost:3001';
this.preferredSceneId = options.sceneId || null;
// RTC 相关
this.engine = null;
this.appId = '';
this.roomId = '';
this.userId = '';
this.token = '';
// 场景
this.sceneId = '';
this.botName = '';
this.scenes = [];
this.sceneConfigMap = {};
this.rtcConfigMap = {};
// 状态
this.isJoined = false;
this.audioBotEnabled = false;
this.isMicOn = false;
// 对话历史
this.msgHistory = [];
// 事件回调(使用者可覆写)
this.onAIThinking = null; // () => void
this.onAISpeaking = null; // () => void
this.onAIFinished = null; // () => void
this.onAIInterrupted = null; // () => void
this.onSubtitle = null; // ({ text, userId, definite, paragraph }) => void
this.onFunctionCall = null; // (toolCalls) => void
this.onUserJoined = null; // (userId, username) => void
this.onUserLeft = null; // (userId) => void
this.onError = null; // (error) => void
this.onStateChange = null; // (state: { isJoined, audioBotEnabled, isMicOn }) => void
this.onMessageHistory = null; // (msgHistory) => void
}
// ----------------------------------------------------------
// API 调用
// ----------------------------------------------------------
async _post(path, action, body = {}) {
const url = `${this.serverUrl}${path}?Action=${action}`;
const res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
const json = await res.json();
const { ResponseMetadata, Result } = json || {};
if (ResponseMetadata?.Error) {
const errMsg = `[${ResponseMetadata.Action}] failed: ${ResponseMetadata.Error.Message}`;
console.error(errMsg);
this.onError?.(new Error(errMsg));
throw new Error(errMsg);
}
if (ResponseMetadata?.Action === 'StartVoiceChat' && ResponseMetadata?.RequestId) {
this._requestId = ResponseMetadata.RequestId;
}
return Result;
}
async _getScenes() {
return this._post('/getScenes', 'getScenes');
}
async _startVoiceChat(sceneId) {
return this._post('/proxy', 'StartVoiceChat', { SceneID: sceneId });
}
async _stopVoiceChat(sceneId) {
return this._post('/proxy', 'StopVoiceChat', { SceneID: sceneId });
}
// ----------------------------------------------------------
// 初始化:获取场景配置
// ----------------------------------------------------------
async init() {
const data = await this._getScenes();
this.scenes = data.scenes || data;
// 兼容 scenes 为数组的情况
const scenesArr = Array.isArray(this.scenes) ? this.scenes : [this.scenes];
scenesArr.forEach((s) => {
this.sceneConfigMap[s.scene.id] = s.scene;
this.rtcConfigMap[s.scene.id] = s.rtc;
});
// 选择场景
this.sceneId = this.preferredSceneId || scenesArr[0].scene.id;
const rtc = this.rtcConfigMap[this.sceneId];
const scene = this.sceneConfigMap[this.sceneId];
this.appId = rtc.AppId;
this.roomId = rtc.RoomId;
this.userId = rtc.UserId;
this.token = rtc.Token;
this.botName = scene.botName;
console.log('[AigcVoiceClient] init done, scene:', this.sceneId, ', room:', this.roomId);
return { sceneId: this.sceneId, scenes: scenesArr };
}
/**
* 切换场景(需在 start() 之前或 stop() 之后调用)
*/
switchScene(sceneId) {
if (this.isJoined) {
console.warn('请先 stop() 再切换场景');
return;
}
const rtc = this.rtcConfigMap[sceneId];
const scene = this.sceneConfigMap[sceneId];
if (!rtc || !scene) {
console.error('未找到场景:', sceneId);
return;
}
this.sceneId = sceneId;
this.appId = rtc.AppId;
this.roomId = rtc.RoomId;
this.userId = rtc.UserId;
this.token = rtc.Token;
this.botName = scene.botName;
}
// ----------------------------------------------------------
// 开始通话
// ----------------------------------------------------------
async start() {
if (this.isJoined) {
console.warn('已在通话中');
return;
}
const VERTC = window.VERTC;
if (!VERTC) {
throw new Error('未检测到 @volcengine/rtc SDK, 请先引入');
}
// 1. 检测浏览器支持
const isSupported = await VERTC.isSupported();
if (!isSupported) {
throw new Error('当前浏览器不支持 RTC');
}
// 2. 创建引擎
this.engine = VERTC.createEngine(this.appId);
// 3. 尝试注册 AI 降噪(可选,失败不影响)
try {
if (window.RTCAIAnsExtension) {
const ext = new window.RTCAIAnsExtension();
await this.engine.registerExtension(ext);
ext.enable();
}
} catch (e) {
console.warn('AI 降噪不可用,不影响使用:', e.message);
}
// 4. 绑定事件
this._bindEvents();
// 5. 加入房间
await this.engine.joinRoom(
this.token,
this.roomId,
{
userId: this.userId,
extraInfo: JSON.stringify({
call_scene: 'RTC-AIGC',
user_name: this.userId,
user_id: this.userId,
}),
},
{
isAutoPublish: true,
isAutoSubscribeAudio: true,
roomProfileType: VERTC.RoomProfileType?.chat ?? 0,
}
);
this.isJoined = true;
this._emitStateChange();
// 6. 开启麦克风
await this.enableMic();
// 7. 启动 AI Bot
await this._startAgent();
console.log('[AigcVoiceClient] 通话已开始');
}
// ----------------------------------------------------------
// 结束通话
// ----------------------------------------------------------
async stop() {
if (!this.isJoined && !this.audioBotEnabled) return;
// 1. 停止采集
try { await this.engine?.stopAudioCapture(); } catch (e) { /* ignore */ }
// 2. 停止 AI Bot
await this._stopAgent();
// 3. 离房 & 销毁
try {
await this.engine?.leaveRoom();
window.VERTC?.destroyEngine(this.engine);
} catch (e) { /* ignore */ }
this.engine = null;
this.isJoined = false;
this.isMicOn = false;
this.msgHistory = [];
this._emitStateChange();
console.log('[AigcVoiceClient] 通话已结束');
}
// ----------------------------------------------------------
// 麦克风控制
// ----------------------------------------------------------
async enableMic() {
if (!this.engine) return;
await this.engine.startAudioCapture();
this.engine.publishStream(1); // MediaType.AUDIO = 1
this.isMicOn = true;
this._emitStateChange();
}
async disableMic() {
if (!this.engine) return;
await this.engine.stopAudioCapture();
this.engine.unpublishStream(1);
this.isMicOn = false;
this._emitStateChange();
}
async toggleMic() {
this.isMicOn ? await this.disableMic() : await this.enableMic();
}
// ----------------------------------------------------------
// 打断 AI
// ----------------------------------------------------------
interrupt() {
if (!this.audioBotEnabled || !this.engine) {
console.warn('Bot 未启用,无法打断');
return;
}
this.engine.sendUserBinaryMessage(
this.botName,
string2tlv(
JSON.stringify({ Command: 'interrupt', InterruptMode: 0, Message: '' }),
'ctrl'
)
);
}
// ----------------------------------------------------------
// 向 AI 发送文本(驱动 TTS 或 LLM
// ----------------------------------------------------------
/**
* 发送文本让 AI 直接朗读TTS
*/
sendTextToTTS(text) {
this._sendCommand('ExternalTextToSpeech', text, 1);
}
/**
* 发送文本让 AI 经 LLM 处理后回复
*/
sendTextToLLM(text) {
this._sendCommand('ExternalTextToLLM', text, 1);
}
_sendCommand(command, message = '', interruptMode = 0) {
if (!this.audioBotEnabled || !this.engine) {
console.warn('Bot 未启用');
return;
}
this.engine.sendUserBinaryMessage(
this.botName,
string2tlv(
JSON.stringify({ Command: command, InterruptMode: interruptMode, Message: message }),
'ctrl'
)
);
}
// ----------------------------------------------------------
// AI Bot 管理
// ----------------------------------------------------------
async _startAgent() {
if (this.audioBotEnabled) {
await this._stopAgent();
}
await this._startVoiceChat(this.sceneId);
this.audioBotEnabled = true;
this._emitStateChange();
}
async _stopAgent() {
if (this.audioBotEnabled) {
await this._stopVoiceChat(this.sceneId);
}
this.audioBotEnabled = false;
this._emitStateChange();
}
// ----------------------------------------------------------
// 事件绑定
// ----------------------------------------------------------
_bindEvents() {
const engine = this.engine;
const events = window.VERTC.events;
engine.on(events.onError, (e) => {
console.error('[RTC Error]', e);
this.onError?.(e);
});
engine.on(events.onUserJoined, (e) => {
const extra = JSON.parse(e.userInfo.extraInfo || '{}');
const userId = extra.user_id || e.userInfo.userId;
const username = extra.user_name || e.userInfo.userId;
this.onUserJoined?.(userId, username);
});
engine.on(events.onUserLeave, (e) => {
this.onUserLeft?.(e.userInfo.userId);
});
engine.on(events.onUserPublishStream, (e) => {
// AI Bot 或其他远端用户发布了流
});
engine.on(events.onUserUnpublishStream, (e) => {
// AI Bot 或其他远端用户取消发布
});
// 核心:接收 AI Bot 的二进制消息字幕、状态、Function Call
engine.on(events.onRoomBinaryMessageReceived, (e) => {
this._handleBinaryMessage(e.message);
});
engine.on(events.onLocalAudioPropertiesReport, (infos) => {
// 本地音量信息,可用于 UI 动画
// infos[0]?.audioPropertiesInfo?.linearVolume
});
engine.on(events.onRemoteAudioPropertiesReport, (infos) => {
// 远端音量信息
});
engine.on(events.onNetworkQuality, (up, down) => {
// 网络质量: 0=Unknown, 1=Excellent, 2=Good, 3=Poor, 4=Bad, 5=VeryBad
});
engine.on(events.onAutoplayFailed, (e) => {
// 自动播放被浏览器策略阻止时触发
// 此时应引导用户点击页面以恢复播放
console.warn('[AutoPlay Failed]', e.userId, '- 请点击页面任意位置以恢复音频播放');
});
engine.on(events.onPlayerEvent, (e) => {
// 播放器事件
});
engine.on(events.onAudioDeviceStateChanged, async (device) => {
// 设备热插拔处理
console.log('[Device Changed]', device.mediaDeviceInfo.kind, device.deviceState);
});
}
// ----------------------------------------------------------
// 二进制消息处理
// ----------------------------------------------------------
_handleBinaryMessage(buffer) {
try {
const { type, value } = tlv2String(buffer);
const parsed = JSON.parse(value);
switch (type) {
case 'conv': // 状态简报
this._handleBrief(parsed);
break;
case 'subv': // 字幕
this._handleSubtitle(parsed);
break;
case 'tool': // Function Call
this._handleFunctionCall(parsed);
break;
default:
console.log('[Unknown message type]', type, parsed);
}
} catch (e) {
console.warn('[Message parse error]', e);
}
}
_handleBrief(parsed) {
const code = parsed?.Stage?.Code;
switch (code) {
case AgentStage.THINKING:
this.onAIThinking?.();
break;
case AgentStage.SPEAKING:
this.onAISpeaking?.();
break;
case AgentStage.FINISHED:
this.onAIFinished?.();
break;
case AgentStage.INTERRUPTED:
this.onAIInterrupted?.();
break;
}
}
_handleSubtitle(parsed) {
const data = parsed?.data?.[0];
if (!data || !this.audioBotEnabled) return;
const { text, definite, userId, paragraph } = data;
// 更新消息历史
this._appendMessage({ text, userId, definite, paragraph });
// 通知外部
this.onSubtitle?.({ text, userId, definite, paragraph });
}
_handleFunctionCall(parsed) {
const toolCalls = parsed?.tool_calls || [];
console.log('[Function Call]', toolCalls);
// 通知外部处理
if (this.onFunctionCall) {
this.onFunctionCall(toolCalls);
}
}
/**
* 回复 Function Call 结果给 AI Bot
* @param {string} toolCallId - tool call 的 id
* @param {string} content - 返回的文本内容
*/
respondFunctionCall(toolCallId, content) {
if (!this.engine || !this.audioBotEnabled) return;
this.engine.sendUserBinaryMessage(
this.botName,
string2tlv(
JSON.stringify({ ToolCallID: toolCallId, Content: content }),
'func'
)
);
}
// ----------------------------------------------------------
// 消息历史管理
// ----------------------------------------------------------
_appendMessage({ text, userId, definite, paragraph }) {
const lastMsg = this.msgHistory[this.msgHistory.length - 1];
const isNewSentence = !lastMsg || lastMsg.definite || lastMsg.paragraph;
if (isNewSentence) {
this.msgHistory.push({
text,
userId,
definite: !!definite,
paragraph: !!paragraph,
time: new Date().toISOString(),
});
} else {
// 话未说完,更新内容
lastMsg.text = text;
lastMsg.definite = !!definite;
lastMsg.paragraph = !!paragraph;
lastMsg.time = new Date().toISOString();
}
this.onMessageHistory?.(this.msgHistory);
}
// ----------------------------------------------------------
// 设备管理
// ----------------------------------------------------------
/**
* 获取可用的音频输入设备列表
*/
async getAudioDevices() {
const VERTC = window.VERTC;
if (!VERTC) return [];
await VERTC.enableDevices({ audio: true, video: false });
const devices = await VERTC.enumerateAudioCaptureDevices();
return devices.filter((d) => d.deviceId && d.kind === 'audioinput');
}
/**
* 切换麦克风设备
*/
switchMicrophone(deviceId) {
if (this.engine) {
this.engine.setAudioCaptureDevice(deviceId);
}
}
// ----------------------------------------------------------
// 辅助
// ----------------------------------------------------------
_emitStateChange() {
this.onStateChange?.({
isJoined: this.isJoined,
audioBotEnabled: this.audioBotEnabled,
isMicOn: this.isMicOn,
});
}
}
// ============================================================
// 导出
// ============================================================
// 兼容 ES Module 和 <script> 标签两种引入方式
if (typeof module !== 'undefined' && module.exports) {
module.exports = { AigcVoiceClient, string2tlv, tlv2String, AgentStage };
}
if (typeof window !== 'undefined') {
window.AigcVoiceClient = AigcVoiceClient;
window.AigcVoiceTLV = { string2tlv, tlv2String };
}