rtc-voice-chat/simple-frontend/aigc-voice-client.js
2026-04-02 20:15:15 +08:00

738 lines
21 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 火山引擎 RTC AIGC 语音对话 - 原生 JS 接入实现
*
* 依赖:@volcengine/rtc (通过 <script> 或 npm 引入)
*
* 用法:
* 1. 在 HTML 中引入此文件和 RTC SDK
* 2. const client = new AigcVoiceClient({ serverUrl: 'http://localhost:3001' });
* 3. await client.init(); // 获取场景配置
* 4. await client.start(); // 开始语音对话
* 5. await client.stop(); // 结束语音对话
* 6. client.interrupt(); // 打断 AI
*/
// ============================================================
// TLV 编解码工具
// ============================================================
/**
* 将字符串编码为 TLV 格式的 ArrayBuffer
* TLV: | type(4B) | length(4B, big-endian) | value(UTF-8) |
*/
function string2tlv(str, type) {
const typeBuffer = new Uint8Array(4);
for (let i = 0; i < type.length && i < 4; i++) {
typeBuffer[i] = type.charCodeAt(i);
}
const valueBuffer = new TextEncoder().encode(str);
const length = valueBuffer.length;
const tlvBuffer = new Uint8Array(4 + 4 + length);
tlvBuffer.set(typeBuffer, 0);
// length: big-endian
tlvBuffer[4] = (length >> 24) & 0xff;
tlvBuffer[5] = (length >> 16) & 0xff;
tlvBuffer[6] = (length >> 8) & 0xff;
tlvBuffer[7] = length & 0xff;
tlvBuffer.set(valueBuffer, 8);
return tlvBuffer.buffer;
}
/**
* 将 TLV 格式的 ArrayBuffer 解码为 { type, value }
*/
function tlv2String(tlvBuffer) {
const typeBytes = new Uint8Array(tlvBuffer, 0, 4);
const lengthBytes = new Uint8Array(tlvBuffer, 4, 4);
const length =
(lengthBytes[0] << 24) |
(lengthBytes[1] << 16) |
(lengthBytes[2] << 8) |
lengthBytes[3];
let type = '';
for (let i = 0; i < 4; i++) {
if (typeBytes[i] !== 0) type += String.fromCharCode(typeBytes[i]);
}
const value = new TextDecoder().decode(new Uint8Array(tlvBuffer, 8, length));
return { type, value };
}
// ============================================================
// AI Agent 状态枚举
// ============================================================
const AgentStage = {
UNKNOWN: 0,
LISTENING: 1,
THINKING: 2,
SPEAKING: 3,
INTERRUPTED: 4,
FINISHED: 5,
};
// ============================================================
// 主类
// ============================================================
class AigcVoiceClient {
/**
* @param {Object} options
* @param {string} options.serverUrl - 后端地址, 例如 'http://localhost:8080'
* @param {string} [options.sceneId] - 指定场景 ID不传则使用第一个
* @param {string} [options.authToken] - JWT Token通过 java-mock /api/auth/login 获取)
*/
constructor(options = {}) {
this.serverUrl = options.serverUrl || 'http://localhost:8080';
this.preferredSceneId = options.sceneId || null;
this.authToken = options.authToken || null;
// RTC 相关
this.engine = null;
this.appId = '';
this.roomId = '';
this.userId = '';
this.token = '';
// 场景
this.sceneId = '';
this.botName = '';
this.scenes = [];
this.sceneConfigMap = {};
this.rtcConfigMap = {};
// 状态
this.isJoined = false;
this.audioBotEnabled = false;
this.isMicOn = false;
// 对话历史
this.msgHistory = [];
// 继续上次对话时缓存的历史消息start() 时会 POST 到后端
this._historyMessages = null;
// 继续的原始对话 ID用于 stop 时追加到原记录而非新建)
this._continueFromId = null;
// 事件回调(使用者可覆写)
this.onAIThinking = null; // () => void
this.onAISpeaking = null; // () => void
this.onAIFinished = null; // () => void
this.onAIInterrupted = null; // () => void
this.onSubtitle = null; // ({ text, userId, definite, paragraph }) => void
this.onFunctionCall = null; // (toolCalls) => void
this.onUserJoined = null; // (userId, username) => void
this.onUserLeft = null; // (userId) => void
this.onError = null; // (error) => void
this.onStateChange = null; // (state: { isJoined, audioBotEnabled, isMicOn }) => void
this.onMessageHistory = null; // (msgHistory) => void
}
// ----------------------------------------------------------
// API 调用
// ----------------------------------------------------------
async _post(path, action, body = {}) {
const url = `${this.serverUrl}${path}?Action=${action}`;
const headers = { 'Content-Type': 'application/json' };
if (this.authToken) headers['Authorization'] = `Bearer ${this.authToken}`;
const res = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify(body),
});
const json = await res.json();
const { ResponseMetadata, Result } = json || {};
if (ResponseMetadata?.Error) {
const errMsg = `[${ResponseMetadata.Action}] failed: ${ResponseMetadata.Error.Message}`;
console.error(errMsg);
this.onError?.(new Error(errMsg));
throw new Error(errMsg);
}
if (ResponseMetadata?.Action === 'StartVoiceChat' && ResponseMetadata?.RequestId) {
this._requestId = ResponseMetadata.RequestId;
}
return Result;
}
async _getScenes() {
return this._post('/api/ai/getScenes', 'getScenes');
}
async _startVoiceChat(sceneId) {
return this._post('/api/ai/proxy', 'StartVoiceChat', { SceneID: sceneId });
}
async _stopVoiceChat(sceneId) {
return this._post('/api/ai/proxy', 'StopVoiceChat', { SceneID: sceneId });
}
// ----------------------------------------------------------
// 初始化:获取场景配置
// ----------------------------------------------------------
async init() {
const data = await this._getScenes();
this.scenes = data.scenes || data;
// 兼容 scenes 为数组的情况
const scenesArr = Array.isArray(this.scenes) ? this.scenes : [this.scenes];
scenesArr.forEach((s) => {
this.sceneConfigMap[s.scene.id] = s.scene;
this.rtcConfigMap[s.scene.id] = s.rtc;
});
// 选择场景
this.sceneId = this.preferredSceneId || scenesArr[0].scene.id;
const rtc = this.rtcConfigMap[this.sceneId];
const scene = this.sceneConfigMap[this.sceneId];
this.appId = rtc.AppId;
this.roomId = rtc.RoomId;
this.userId = rtc.UserId;
this.token = rtc.Token;
this.botName = scene.botName;
console.log('[AigcVoiceClient] init done, scene:', this.sceneId, ', room:', this.roomId);
return { sceneId: this.sceneId, scenes: scenesArr };
}
/**
* 加载历史对话作为上下文(在 init() 之后、start() 之前调用)
* @param {Array} messages - 来自 /api/ai/conversations/:id 的 messages 数组
* @param {string} [conversationId] - 原始对话 IDstop 时会追加到该记录而非新建
*/
loadHistory(messages, conversationId) {
if (!Array.isArray(messages) || messages.length === 0) return;
this._historyMessages = messages.map((m) => ({ role: m.role, content: m.content }));
this._continueFromId = conversationId || null;
// 展示历史展示(标记为 _historical 不保存到新记录)
this.msgHistory = messages.map((m) => ({
role: m.role,
content: m.content,
definite: true,
paragraph: true,
time: m.createdAt,
_historical: true,
}));
}
/**
* 切换场景(需在 start() 之前或 stop() 之后调用)
*/
switchScene(sceneId) {
if (this.isJoined) {
console.warn('请先 stop() 再切换场景');
return;
}
const rtc = this.rtcConfigMap[sceneId];
const scene = this.sceneConfigMap[sceneId];
if (!rtc || !scene) {
console.error('未找到场景:', sceneId);
return;
}
this.sceneId = sceneId;
this.appId = rtc.AppId;
this.roomId = rtc.RoomId;
this.userId = rtc.UserId;
this.token = rtc.Token;
this.botName = scene.botName;
}
// ----------------------------------------------------------
// 开始通话
// ----------------------------------------------------------
async start() {
if (this.isJoined) {
console.warn('已在通话中');
return;
}
const VERTC = window.VERTC;
if (!VERTC) {
throw new Error('未检测到 @volcengine/rtc SDK, 请先引入');
}
// 1. 检测浏览器支持
const isSupported = await VERTC.isSupported();
if (!isSupported) {
throw new Error('当前浏览器不支持 RTC');
}
// 2. 创建引擎
this.engine = VERTC.createEngine(this.appId);
// 3. 尝试注册 AI 降噪(可选,失败不影响)
try {
if (window.RTCAIAnsExtension) {
const ext = new window.RTCAIAnsExtension();
await this.engine.registerExtension(ext);
ext.enable();
}
} catch (e) {
console.warn('AI 降噪不可用,不影响使用:', e.message);
}
// 4. 绑定事件
this._bindEvents();
// 5. 加入房间
await this.engine.joinRoom(
this.token,
this.roomId,
{
userId: this.userId,
extraInfo: JSON.stringify({
call_scene: 'RTC-AIGC',
user_name: this.userId,
user_id: this.userId,
}),
},
{
isAutoPublish: true,
isAutoSubscribeAudio: true,
roomProfileType: VERTC.RoomProfileType?.chat ?? 0,
}
);
this.isJoined = true;
this._emitStateChange();
// 6. 开启麦克风
await this.enableMic();
// 7. 如果有历史上下文,先 POST 到后端
if (this._historyMessages && this._historyMessages.length > 0) {
try {
await fetch(`${this.serverUrl}/api/ai/session/history`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.authToken}`,
},
body: JSON.stringify({
room_id: this.roomId,
messages: this._historyMessages,
}),
});
} catch (e) {
console.warn('[AigcVoiceClient] 上传历史失败:', e);
}
}
// 8. 启动 AI Bot
await this._startAgent();
console.log('[AigcVoiceClient] 通话已开始');
}
// ----------------------------------------------------------
// 结束通话
// ----------------------------------------------------------
async stop() {
if (!this.isJoined && !this.audioBotEnabled) return;
// 1. 停止采集
try { await this.engine?.stopAudioCapture(); } catch (e) { /* ignore */ }
// 2. 停止 AI Bot
await this._stopAgent();
// 3. 保存对话历史
if (this.msgHistory.length > 0) {
await this._saveConversation();
}
// 4. 离房 & 销毁
try {
await this.engine?.leaveRoom();
window.VERTC?.destroyEngine(this.engine);
} catch (e) { /* ignore */ }
this.engine = null;
this.isJoined = false;
this.isMicOn = false;
this.msgHistory = [];
this._historyMessages = null;
this._continueFromId = null;
this._emitStateChange();
console.log('[AigcVoiceClient] 通话已结束');
}
async _saveConversation() {
if (!this.authToken) return;
try {
const newMessages = this.msgHistory
.filter((m) => !m._historical && m.content)
.map((m) => ({ role: m.role, content: m.content, time: m.time }));
if (newMessages.length === 0) return;
if (this._continueFromId) {
// 继续对话:追加到原记录
await fetch(`${this.serverUrl}/api/ai/conversations/${this._continueFromId}/append`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.authToken}`,
},
body: JSON.stringify({ messages: newMessages }),
});
} else {
// 全新对话:新建记录
await fetch(`${this.serverUrl}/api/ai/conversations`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.authToken}`,
},
body: JSON.stringify({
sceneId: this.sceneId,
roomId: this.roomId,
messages: newMessages,
}),
});
}
console.log('[AigcVoiceClient] 对话历史已保存');
} catch (e) {
console.warn('[AigcVoiceClient] 保存对话历史失败:', e);
}
}
// ----------------------------------------------------------
// 麦克风控制
// ----------------------------------------------------------
async enableMic() {
if (!this.engine) return;
await this.engine.startAudioCapture();
this.engine.publishStream(1); // MediaType.AUDIO = 1
this.isMicOn = true;
this._emitStateChange();
}
async disableMic() {
if (!this.engine) return;
await this.engine.stopAudioCapture();
this.engine.unpublishStream(1);
this.isMicOn = false;
this._emitStateChange();
}
async toggleMic() {
this.isMicOn ? await this.disableMic() : await this.enableMic();
}
// ----------------------------------------------------------
// 打断 AI
// ----------------------------------------------------------
interrupt() {
if (!this.audioBotEnabled || !this.engine) {
console.warn('Bot 未启用,无法打断');
return;
}
this.engine.sendUserBinaryMessage(
this.botName,
string2tlv(
JSON.stringify({ Command: 'interrupt', InterruptMode: 0, Message: '' }),
'ctrl'
)
);
}
// ----------------------------------------------------------
// 向 AI 发送文本(驱动 TTS 或 LLM
// ----------------------------------------------------------
/**
* 发送文本让 AI 直接朗读TTS
*/
sendTextToTTS(text) {
this._sendCommand('ExternalTextToSpeech', text, 1);
}
/**
* 发送文本让 AI 经 LLM 处理后回复
*/
sendTextToLLM(text) {
this._sendCommand('ExternalTextToLLM', text, 1);
}
_sendCommand(command, message = '', interruptMode = 0) {
if (!this.audioBotEnabled || !this.engine) {
console.warn('Bot 未启用');
return;
}
this.engine.sendUserBinaryMessage(
this.botName,
string2tlv(
JSON.stringify({ Command: command, InterruptMode: interruptMode, Message: message }),
'ctrl'
)
);
}
// ----------------------------------------------------------
// AI Bot 管理
// ----------------------------------------------------------
async _startAgent() {
if (this.audioBotEnabled) {
await this._stopAgent();
}
await this._startVoiceChat(this.sceneId);
this.audioBotEnabled = true;
this._emitStateChange();
}
async _stopAgent() {
if (this.audioBotEnabled) {
await this._stopVoiceChat(this.sceneId);
}
this.audioBotEnabled = false;
this._emitStateChange();
}
// ----------------------------------------------------------
// 事件绑定
// ----------------------------------------------------------
_bindEvents() {
const engine = this.engine;
const events = window.VERTC.events;
engine.on(events.onError, (e) => {
console.error('[RTC Error]', e);
this.onError?.(e);
});
engine.on(events.onUserJoined, (e) => {
const extra = JSON.parse(e.userInfo.extraInfo || '{}');
const userId = extra.user_id || e.userInfo.userId;
const username = extra.user_name || e.userInfo.userId;
this.onUserJoined?.(userId, username);
});
engine.on(events.onUserLeave, (e) => {
this.onUserLeft?.(e.userInfo.userId);
});
engine.on(events.onUserPublishStream, (e) => {
// AI Bot 或其他远端用户发布了流
});
engine.on(events.onUserUnpublishStream, (e) => {
// AI Bot 或其他远端用户取消发布
});
// 核心:接收 AI Bot 的二进制消息字幕、状态、Function Call
engine.on(events.onRoomBinaryMessageReceived, (e) => {
this._handleBinaryMessage(e.message);
});
engine.on(events.onLocalAudioPropertiesReport, (infos) => {
// 本地音量信息,可用于 UI 动画
// infos[0]?.audioPropertiesInfo?.linearVolume
});
engine.on(events.onRemoteAudioPropertiesReport, (infos) => {
// 远端音量信息
});
engine.on(events.onNetworkQuality, (up, down) => {
// 网络质量: 0=Unknown, 1=Excellent, 2=Good, 3=Poor, 4=Bad, 5=VeryBad
});
engine.on(events.onAutoplayFailed, (e) => {
// 自动播放被浏览器策略阻止时触发
// 此时应引导用户点击页面以恢复播放
console.warn('[AutoPlay Failed]', e.userId, '- 请点击页面任意位置以恢复音频播放');
});
engine.on(events.onPlayerEvent, (e) => {
// 播放器事件
});
engine.on(events.onAudioDeviceStateChanged, async (device) => {
// 设备热插拔处理
console.log('[Device Changed]', device.mediaDeviceInfo.kind, device.deviceState);
});
}
// ----------------------------------------------------------
// 二进制消息处理
// ----------------------------------------------------------
_handleBinaryMessage(buffer) {
try {
const { type, value } = tlv2String(buffer);
const parsed = JSON.parse(value);
switch (type) {
case 'conv': // 状态简报
this._handleBrief(parsed);
break;
case 'subv': // 字幕
this._handleSubtitle(parsed);
break;
case 'tool': // Function Call
this._handleFunctionCall(parsed);
break;
default:
console.log('[Unknown message type]', type, parsed);
}
} catch (e) {
console.warn('[Message parse error]', e);
}
}
_handleBrief(parsed) {
const code = parsed?.Stage?.Code;
switch (code) {
case AgentStage.THINKING:
this.onAIThinking?.();
break;
case AgentStage.SPEAKING:
this.onAISpeaking?.();
break;
case AgentStage.FINISHED:
this.onAIFinished?.();
break;
case AgentStage.INTERRUPTED:
this.onAIInterrupted?.();
break;
}
}
_handleSubtitle(parsed) {
const data = parsed?.data?.[0];
if (!data || !this.audioBotEnabled) return;
const { text, definite, userId, paragraph } = data;
if (!text) return; // 跳过空文本帧
// 更新消息历史
this._appendMessage({ text, userId, definite, paragraph });
// 通知外部
this.onSubtitle?.({ text, userId, definite, paragraph });
}
_handleFunctionCall(parsed) {
const toolCalls = parsed?.tool_calls || [];
console.log('[Function Call]', toolCalls);
// 通知外部处理
if (this.onFunctionCall) {
this.onFunctionCall(toolCalls);
}
}
/**
* 回复 Function Call 结果给 AI Bot
* @param {string} toolCallId - tool call 的 id
* @param {string} content - 返回的文本内容
*/
respondFunctionCall(toolCallId, content) {
if (!this.engine || !this.audioBotEnabled) return;
this.engine.sendUserBinaryMessage(
this.botName,
string2tlv(
JSON.stringify({ ToolCallID: toolCallId, Content: content }),
'func'
)
);
}
// ----------------------------------------------------------
// 消息历史管理
// ----------------------------------------------------------
_appendMessage({ text, userId, definite, paragraph }) {
const role = userId === this.userId ? 'user' : 'assistant';
// 过滤 RTC 平台触发欢迎语的系统字符串
if (text === '欢迎语') return;
const lastMsg = this.msgHistory[this.msgHistory.length - 1];
const isNewSentence = !lastMsg || lastMsg.definite || lastMsg.paragraph;
if (isNewSentence) {
this.msgHistory.push({
role,
content: text,
definite: !!definite,
paragraph: !!paragraph,
time: new Date().toISOString(),
});
} else {
// 话未说完,更新内容
lastMsg.content = text;
lastMsg.definite = !!definite;
lastMsg.paragraph = !!paragraph;
lastMsg.time = new Date().toISOString();
}
this.onMessageHistory?.(this.msgHistory);
}
// ----------------------------------------------------------
// 设备管理
// ----------------------------------------------------------
/**
* 获取可用的音频输入设备列表
*/
async getAudioDevices() {
const VERTC = window.VERTC;
if (!VERTC) return [];
await VERTC.enableDevices({ audio: true, video: false });
const devices = await VERTC.enumerateAudioCaptureDevices();
return devices.filter((d) => d.deviceId && d.kind === 'audioinput');
}
/**
* 切换麦克风设备
*/
switchMicrophone(deviceId) {
if (this.engine) {
this.engine.setAudioCaptureDevice(deviceId);
}
}
// ----------------------------------------------------------
// 辅助
// ----------------------------------------------------------
_emitStateChange() {
this.onStateChange?.({
isJoined: this.isJoined,
audioBotEnabled: this.audioBotEnabled,
isMicOn: this.isMicOn,
});
}
}
// ============================================================
// 导出
// ============================================================
// 兼容 ES Module 和 <script> 标签两种引入方式
if (typeof module !== 'undefined' && module.exports) {
module.exports = { AigcVoiceClient, string2tlv, tlv2String, AgentStage };
}
if (typeof window !== 'undefined') {
window.AigcVoiceClient = AigcVoiceClient;
window.AigcVoiceTLV = { string2tlv, tlv2String };
}