408 lines
14 KiB
TypeScript
408 lines
14 KiB
TypeScript
|
import { useState, useRef, useCallback } from "react";
|
|||
|
|
|||
|
// 新增音频源类型,playback表示音频播放,mic表示麦克风
|
|||
|
type AudioSourceType = "mic" | "playback";
|
|||
|
|
|||
|
interface ProcessState {
|
|||
|
recording: boolean;
|
|||
|
transcribing: boolean;
|
|||
|
generating: boolean;
|
|||
|
synthesizing: boolean;
|
|||
|
error?: string;
|
|||
|
}
|
|||
|
|
|||
|
interface VoiceAssistantProps {
|
|||
|
greeting: string;
|
|||
|
}
|
|||
|
|
|||
|
const ANALYSER_FFT_SIZE = 128;
|
|||
|
const VOLUME_SENSITIVITY = 1.5;
|
|||
|
const SMOOTHING_FACTOR = 0.7;
|
|||
|
const BAR_COUNT = 12;
|
|||
|
|
|||
|
const VoiceAssistant = ({ greeting }: VoiceAssistantProps) => {
|
|||
|
const [isListening, setIsListening] = useState(false);
|
|||
|
const [processState, setProcessState] = useState<ProcessState>({
|
|||
|
recording: false,
|
|||
|
transcribing: false,
|
|||
|
generating: false,
|
|||
|
synthesizing: false,
|
|||
|
});
|
|||
|
const [asrText, setAsrText] = useState("");
|
|||
|
const [answerText, setAnswerText] = useState("");
|
|||
|
const mediaRecorder = useRef<MediaRecorder | null>(null);
|
|||
|
const audioChunks = useRef<Blob[]>([]);
|
|||
|
const audioElement = useRef<HTMLAudioElement>(null);
|
|||
|
|
|||
|
const barsRef = useRef<HTMLDivElement>(null);
|
|||
|
|
|||
|
const mediaStreamRef = useRef<MediaStream | null>(null);
|
|||
|
const audioContextRef = useRef<AudioContext | null>(null);
|
|||
|
const analyserRef = useRef<AnalyserNode | null>(null);
|
|||
|
const animationFrameRef = useRef<number | null>(null);
|
|||
|
|
|||
|
const dataArrayRef = useRef<Uint8Array | null>(null);
|
|||
|
const lastValuesRef = useRef<number[]>(new Array(BAR_COUNT).fill(10));
|
|||
|
const [audioSourceType, setAudioSourceType] =
|
|||
|
useState<AudioSourceType>("mic");
|
|||
|
|
|||
|
const updateState = (newState: Partial<ProcessState>) => {
|
|||
|
setProcessState((prev) => ({ ...prev, ...newState }));
|
|||
|
};
|
|||
|
|
|||
|
const cleanupAudio = useCallback(async () => {
|
|||
|
mediaStreamRef.current?.getTracks().forEach((track) => track.stop());
|
|||
|
if (audioContextRef.current?.state !== "closed") {
|
|||
|
await audioContextRef.current?.close();
|
|||
|
}
|
|||
|
if (animationFrameRef.current) {
|
|||
|
cancelAnimationFrame(animationFrameRef.current);
|
|||
|
animationFrameRef.current = null;
|
|||
|
}
|
|||
|
}, []);
|
|||
|
const initializeAudioContext = useCallback(() => {
|
|||
|
const AudioContextClass =
|
|||
|
window.AudioContext || (window as any).webkitAudioContext;
|
|||
|
audioContextRef.current = new AudioContextClass();
|
|||
|
analyserRef.current = audioContextRef.current.createAnalyser();
|
|||
|
analyserRef.current.fftSize = ANALYSER_FFT_SIZE;
|
|||
|
analyserRef.current.smoothingTimeConstant = SMOOTHING_FACTOR;
|
|||
|
dataArrayRef.current = new Uint8Array(
|
|||
|
analyserRef.current.frequencyBinCount
|
|||
|
);
|
|||
|
}, []);
|
|||
|
|
|||
|
const startRecording = async () => {
|
|||
|
try {
|
|||
|
const stream = await navigator.mediaDevices.getUserMedia({
|
|||
|
audio: { sampleRate: 16000, channelCount: 1, sampleSize: 16 },
|
|||
|
});
|
|||
|
|
|||
|
mediaRecorder.current = new MediaRecorder(stream);
|
|||
|
audioChunks.current = [];
|
|||
|
|
|||
|
mediaRecorder.current.ondataavailable = (e) => {
|
|||
|
audioChunks.current.push(e.data);
|
|||
|
};
|
|||
|
|
|||
|
mediaRecorder.current.start(500);
|
|||
|
updateState({ recording: true, error: undefined });
|
|||
|
} catch (err) {
|
|||
|
updateState({ error: "麦克风访问失败,请检查权限设置" });
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
// 新增切换音频源的函数
|
|||
|
const stopRecording = async () => {
|
|||
|
// 如果当前没有录音器,则返回
|
|||
|
if (!mediaRecorder.current) return;
|
|||
|
// 停止录音器
|
|||
|
mediaRecorder.current.stop();
|
|||
|
// 更新状态为未录音
|
|||
|
updateState({ recording: false });
|
|||
|
// 等待录音器停止录音
|
|||
|
mediaRecorder.current.onstop = async () => {
|
|||
|
try {
|
|||
|
// 停止录音器
|
|||
|
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" });
|
|||
|
await processAudio(audioBlob);
|
|||
|
} finally {
|
|||
|
audioChunks.current = [];
|
|||
|
}
|
|||
|
};
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
const processAudio = async (audioBlob: Blob) => {
|
|||
|
const formData = new FormData();
|
|||
|
formData.append("audio", audioBlob, "recording.wav");
|
|||
|
|
|||
|
try {
|
|||
|
updateState({ transcribing: true });
|
|||
|
// 发送请求到后端
|
|||
|
const asrResponse = await fetch("http://localhost:5000/asr", {
|
|||
|
method: "POST",
|
|||
|
body: formData,
|
|||
|
});
|
|||
|
// 如果请求失败,则抛出错误
|
|||
|
if (!asrResponse.ok) throw new Error("语音识别失败");
|
|||
|
// 获取后端返回的文本
|
|||
|
const asrData = await asrResponse.json();
|
|||
|
setAsrText(asrData.asr_text);
|
|||
|
updateState({ transcribing: false, generating: true });
|
|||
|
|
|||
|
// 发送请求到后端,生成回答
|
|||
|
const generateResponse = await fetch("http://localhost:5000/generate", {
|
|||
|
method: "POST",
|
|||
|
headers: {
|
|||
|
"Content-Type": "application/json",
|
|||
|
},
|
|||
|
body: JSON.stringify({ asr_text: asrData.asr_text }),
|
|||
|
});
|
|||
|
|
|||
|
if (!generateResponse.ok) throw new Error("生成回答失败");
|
|||
|
|
|||
|
const generateData = await generateResponse.json();
|
|||
|
setAnswerText(generateData.answer_text);
|
|||
|
updateState({ generating: false, synthesizing: true });
|
|||
|
|
|||
|
// 播放合成的音频,增加可视化效果
|
|||
|
if (audioElement.current) {
|
|||
|
startVisualization();
|
|||
|
// 播放合成的音频
|
|||
|
audioElement.current.src = `http://localhost:5000${generateData.audio_url}`;
|
|||
|
// 播放音频
|
|||
|
audioElement.current.play()
|
|||
|
.catch((err) => {
|
|||
|
console.error("播放失败:", err);
|
|||
|
updateState({ error: "音频播放失败" });
|
|||
|
});
|
|||
|
}
|
|||
|
} catch (err) {
|
|||
|
updateState({ error: err instanceof Error ? err.message : "未知错误" });
|
|||
|
} finally {
|
|||
|
updateState({
|
|||
|
transcribing: false,
|
|||
|
generating: false,
|
|||
|
synthesizing: false,
|
|||
|
});
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
const getStatusText = () => {
|
|||
|
if (processState.error) return processState.error;
|
|||
|
if (processState.recording) return "录音中... 🎤";
|
|||
|
if (processState.transcribing) return "语音识别中... 🔍";
|
|||
|
if (processState.generating) return "生成回答中... 💡";
|
|||
|
if (processState.synthesizing) return "语音合成中... 🎵";
|
|||
|
return "点击开始对话";
|
|||
|
};
|
|||
|
|
|||
|
const startVisualization = useCallback(() => {
|
|||
|
if (!analyserRef.current || !dataArrayRef.current || !barsRef.current) {
|
|||
|
console.warn("可视化组件未就绪");
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
if (animationFrameRef.current) {
|
|||
|
cancelAnimationFrame(animationFrameRef.current);
|
|||
|
animationFrameRef.current = null;
|
|||
|
}
|
|||
|
|
|||
|
const bufferLength = analyserRef.current.frequencyBinCount;
|
|||
|
const updateBars = () => {
|
|||
|
try {
|
|||
|
analyserRef.current!.getByteFrequencyData(dataArrayRef.current!);
|
|||
|
|
|||
|
const bars = barsRef.current!.children;
|
|||
|
for (let i = 0; i < bars.length; i++) {
|
|||
|
const bar = bars[i] as HTMLElement;
|
|||
|
const dataIndex = Math.floor((i / BAR_COUNT) * (bufferLength / 2));
|
|||
|
const rawValue =
|
|||
|
(dataArrayRef.current![dataIndex] / 255) * 100 * VOLUME_SENSITIVITY;
|
|||
|
|
|||
|
const smoothValue = Math.min(
|
|||
|
100,
|
|||
|
Math.max(10, rawValue * 0.6 + lastValuesRef.current[i] * 0.4)
|
|||
|
);
|
|||
|
lastValuesRef.current[i] = smoothValue;
|
|||
|
|
|||
|
bar.style.cssText = `
|
|||
|
height: ${smoothValue}%;
|
|||
|
transform: scaleY(${0.8 + (smoothValue / 100) * 0.6});
|
|||
|
transition: ${i === 0 ? "none" : "height 50ms linear"};
|
|||
|
`;
|
|||
|
}
|
|||
|
|
|||
|
animationFrameRef.current = requestAnimationFrame(updateBars);
|
|||
|
} catch (err) {
|
|||
|
console.error("可视化更新失败:", err);
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
animationFrameRef.current = requestAnimationFrame(updateBars);
|
|||
|
}, [analyserRef, dataArrayRef, barsRef]);
|
|||
|
|
|||
|
// 切换监听状态
|
|||
|
const toggleListening = useCallback(async () => {
|
|||
|
if (isListening) { // 如果正在监听
|
|||
|
await cleanupAudio(); // 清理现有音频
|
|||
|
} else { // 否则
|
|||
|
try { // 尝试
|
|||
|
await cleanupAudio(); // 清理现有音频
|
|||
|
initializeAudioContext(); // 初始化音频上下文
|
|||
|
|
|||
|
if (audioSourceType === "mic") { // 如果音频源类型是麦克风
|
|||
|
const stream = await navigator.mediaDevices.getUserMedia({ // 获取用户媒体
|
|||
|
audio: { noiseSuppression: true, echoCancellation: true }, // 音频配置
|
|||
|
}); // 等待获取用户媒体
|
|||
|
mediaStreamRef.current = stream; // 设置媒体流
|
|||
|
const source = // 创建音频源
|
|||
|
audioContextRef.current!.createMediaStreamSource(stream); // 创建音频源
|
|||
|
source.connect(analyserRef.current!); // 连接到分析器
|
|||
|
} else {
|
|||
|
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
|
|||
|
const source = // 创建音频源
|
|||
|
audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
|
|||
|
source.connect(analyserRef.current!); // 连接到分析器
|
|||
|
audio.play(); // 播放音频
|
|||
|
}
|
|||
|
|
|||
|
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
|
|||
|
startVisualization(); // 开始可视化
|
|||
|
} catch (err) {
|
|||
|
console.error("初始化失败:", err);
|
|||
|
updateState({ error: "音频初始化失败" });
|
|||
|
}
|
|||
|
}
|
|||
|
setIsListening((prev) => !prev);
|
|||
|
}, [
|
|||
|
isListening,
|
|||
|
audioSourceType,
|
|||
|
cleanupAudio,
|
|||
|
initializeAudioContext,
|
|||
|
startVisualization,
|
|||
|
]);
|
|||
|
|
|||
|
// 示例音频播放
|
|||
|
const handlePlaySample = async () => {
|
|||
|
try {
|
|||
|
await cleanupAudio(); // 清理现有音频
|
|||
|
initializeAudioContext(); // 初始化音频上下文
|
|||
|
|
|||
|
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
|
|||
|
const source = audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
|
|||
|
source.connect(analyserRef.current!); // 连接到分析器
|
|||
|
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
|
|||
|
|
|||
|
await audio.play(); // 播放音频
|
|||
|
startVisualization(); // 开始可视化
|
|||
|
|
|||
|
audio.onended = () => {
|
|||
|
setIsListening(false);
|
|||
|
if (animationFrameRef.current) {
|
|||
|
cancelAnimationFrame(animationFrameRef.current);
|
|||
|
animationFrameRef.current = null;
|
|||
|
}
|
|||
|
};
|
|||
|
} catch (err) {
|
|||
|
console.error("播放示例失败:", err);
|
|||
|
updateState({ error: "示例播放失败" });
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
return (
|
|||
|
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 text-center w-full px-4">
|
|||
|
{/* 问候语 */}
|
|||
|
<h1 className="text-6xl font-light mb-8 drop-shadow-glow">{greeting}</h1>
|
|||
|
{/* 较小较细的字体显示{asrText || "等待语音输入..."}*/}
|
|||
|
<h3 className="text-sm font-light mb-8">{asrText || "等待中..."}</h3>
|
|||
|
{/*较小较细的字体显示{answerText || "等待生成回答..."}*/}
|
|||
|
<h2 className="text-sm font-light mb-8">
|
|||
|
{answerText || "AI助手待命中"}
|
|||
|
</h2>
|
|||
|
|
|||
|
{/* 音频源切换 */}
|
|||
|
<div className="mb-4 flex justify-center gap-4">
|
|||
|
<button
|
|||
|
onClick={() => setAudioSourceType("mic")}
|
|||
|
className={`px-4 py-2 rounded-lg ${
|
|||
|
audioSourceType === "mic" ? "bg-blue-500 text-white" : "bg-gray-200"
|
|||
|
}`}
|
|||
|
>
|
|||
|
麦克风
|
|||
|
</button>
|
|||
|
<button
|
|||
|
onClick={() => setAudioSourceType("playback")}
|
|||
|
className={`px-4 py-2 rounded-lg ${
|
|||
|
audioSourceType === "playback"
|
|||
|
? "bg-blue-500 text-white"
|
|||
|
: "bg-gray-200"
|
|||
|
}`}
|
|||
|
>
|
|||
|
音频播放
|
|||
|
</button>
|
|||
|
</div>
|
|||
|
{/* 示例播放按钮 */}
|
|||
|
{audioSourceType === "playback" && (
|
|||
|
<button
|
|||
|
onClick={handlePlaySample}
|
|||
|
className="mt-4 px-6 py-2 bg-green-500 text-white rounded-lg hover:bg-green-600 transition-colors"
|
|||
|
>
|
|||
|
播放示例音频
|
|||
|
</button>
|
|||
|
)}
|
|||
|
|
|||
|
{/* 优化后的音频波形可视化 */}
|
|||
|
<div className="relative inline-block">
|
|||
|
<button
|
|||
|
onClick={() => {
|
|||
|
toggleListening();
|
|||
|
processState.recording ? stopRecording() : startRecording();
|
|||
|
}}
|
|||
|
className={[
|
|||
|
"group relative flex h-20 items-end gap-1.5 rounded-3xl p-6",
|
|||
|
"bg-gradient-to-b from-black/80 to-gray-900/90",
|
|||
|
"shadow-[0_0_20px_0_rgba(34,211,238,0.2)] hover:shadow-[0_0_30px_0_rgba(109,213,237,0.3)]",
|
|||
|
"transition-shadow duration-300 ease-out",
|
|||
|
isListening ? "ring-2 ring-cyan-400/20" : "",
|
|||
|
].join(" ")}
|
|||
|
style={{
|
|||
|
// 禁用will-change优化(经测试反而降低性能)
|
|||
|
backdropFilter: "blur(12px)", // 直接使用CSS属性
|
|||
|
WebkitBackdropFilter: "blur(12px)",
|
|||
|
}}
|
|||
|
>
|
|||
|
{/* 优化后的柱状图容器 */}
|
|||
|
<div ref={barsRef} className="flex h-full w-full items-end gap-1.5">
|
|||
|
{[...Array(BAR_COUNT)].map((_, index) => (
|
|||
|
<div
|
|||
|
key={index}
|
|||
|
className={[
|
|||
|
"w-2.5 rounded-full",
|
|||
|
"bg-gradient-to-t from-cyan-400/90 via-blue-400/90 to-purple-500/90",
|
|||
|
"transition-transform duration-150 ease-out",
|
|||
|
].join(" ")}
|
|||
|
style={{
|
|||
|
willChange: "height, transform", // 提示浏览器优化
|
|||
|
boxShadow: "0 0 8px -2px rgba(52,211,254,0.4)",
|
|||
|
height: "10%", // 初始高度
|
|||
|
}}
|
|||
|
/>
|
|||
|
))}
|
|||
|
</div>
|
|||
|
</button>
|
|||
|
</div>
|
|||
|
|
|||
|
{/* 底部状态信息 */}
|
|||
|
<div className="mt-8 text-xs text-gray-500 space-y-1">
|
|||
|
<p>支持唤醒词:"魔镜魔镜"</p>
|
|||
|
<div className="flex items-center justify-center gap-1.5">
|
|||
|
<div className="relative flex items-center">
|
|||
|
{/* 呼吸圆点指示器 */}
|
|||
|
<div
|
|||
|
className={`w-2 h-2 rounded-full ${
|
|||
|
isListening ? "bg-green-400 animate-breath" : "bg-gray-400"
|
|||
|
}`}
|
|||
|
/>
|
|||
|
{/* 扩散波纹效果 */}
|
|||
|
{isListening && (
|
|||
|
<div className="absolute inset-0 rounded-full bg-green-400/20 animate-ping" />
|
|||
|
)}
|
|||
|
</div>
|
|||
|
<span>{getStatusText()}</span>
|
|||
|
</div>
|
|||
|
|
|||
|
{/* 音频播放 */}
|
|||
|
<audio
|
|||
|
ref={audioElement}
|
|||
|
controls={process.env.NODE_ENV === "development"} // 开发环境显示 controls
|
|||
|
onEnded={() => updateState({ synthesizing: false })}
|
|||
|
/>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
);
|
|||
|
};
|
|||
|
|
|||
|
export default VoiceAssistant;
|