408 lines
14 KiB
TypeScript
408 lines
14 KiB
TypeScript
import { useState, useRef, useCallback } from "react";
|
||
|
||
// 新增音频源类型,playback表示音频播放,mic表示麦克风
|
||
type AudioSourceType = "mic" | "playback";
|
||
|
||
interface ProcessState {
|
||
recording: boolean;
|
||
transcribing: boolean;
|
||
generating: boolean;
|
||
synthesizing: boolean;
|
||
error?: string;
|
||
}
|
||
|
||
interface VoiceAssistantProps {
|
||
greeting: string;
|
||
}
|
||
|
||
const ANALYSER_FFT_SIZE = 128;
|
||
const VOLUME_SENSITIVITY = 1.5;
|
||
const SMOOTHING_FACTOR = 0.7;
|
||
const BAR_COUNT = 12;
|
||
|
||
const VoiceAssistant = ({ greeting }: VoiceAssistantProps) => {
|
||
const [isListening, setIsListening] = useState(false);
|
||
const [processState, setProcessState] = useState<ProcessState>({
|
||
recording: false,
|
||
transcribing: false,
|
||
generating: false,
|
||
synthesizing: false,
|
||
});
|
||
const [asrText, setAsrText] = useState("");
|
||
const [answerText, setAnswerText] = useState("");
|
||
const mediaRecorder = useRef<MediaRecorder | null>(null);
|
||
const audioChunks = useRef<Blob[]>([]);
|
||
const audioElement = useRef<HTMLAudioElement>(null);
|
||
|
||
const barsRef = useRef<HTMLDivElement>(null);
|
||
|
||
const mediaStreamRef = useRef<MediaStream | null>(null);
|
||
const audioContextRef = useRef<AudioContext | null>(null);
|
||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||
const animationFrameRef = useRef<number | null>(null);
|
||
|
||
const dataArrayRef = useRef<Uint8Array | null>(null);
|
||
const lastValuesRef = useRef<number[]>(new Array(BAR_COUNT).fill(10));
|
||
const [audioSourceType, setAudioSourceType] =
|
||
useState<AudioSourceType>("mic");
|
||
|
||
const updateState = (newState: Partial<ProcessState>) => {
|
||
setProcessState((prev) => ({ ...prev, ...newState }));
|
||
};
|
||
|
||
const cleanupAudio = useCallback(async () => {
|
||
mediaStreamRef.current?.getTracks().forEach((track) => track.stop());
|
||
if (audioContextRef.current?.state !== "closed") {
|
||
await audioContextRef.current?.close();
|
||
}
|
||
if (animationFrameRef.current) {
|
||
cancelAnimationFrame(animationFrameRef.current);
|
||
animationFrameRef.current = null;
|
||
}
|
||
}, []);
|
||
const initializeAudioContext = useCallback(() => {
|
||
const AudioContextClass =
|
||
window.AudioContext || (window as any).webkitAudioContext;
|
||
audioContextRef.current = new AudioContextClass();
|
||
analyserRef.current = audioContextRef.current.createAnalyser();
|
||
analyserRef.current.fftSize = ANALYSER_FFT_SIZE;
|
||
analyserRef.current.smoothingTimeConstant = SMOOTHING_FACTOR;
|
||
dataArrayRef.current = new Uint8Array(
|
||
analyserRef.current.frequencyBinCount
|
||
);
|
||
}, []);
|
||
|
||
const startRecording = async () => {
|
||
try {
|
||
const stream = await navigator.mediaDevices.getUserMedia({
|
||
audio: { sampleRate: 16000, channelCount: 1, sampleSize: 16 },
|
||
});
|
||
|
||
mediaRecorder.current = new MediaRecorder(stream);
|
||
audioChunks.current = [];
|
||
|
||
mediaRecorder.current.ondataavailable = (e) => {
|
||
audioChunks.current.push(e.data);
|
||
};
|
||
|
||
mediaRecorder.current.start(500);
|
||
updateState({ recording: true, error: undefined });
|
||
} catch (err) {
|
||
updateState({ error: "麦克风访问失败,请检查权限设置" });
|
||
}
|
||
};
|
||
|
||
// 新增切换音频源的函数
|
||
const stopRecording = async () => {
|
||
// 如果当前没有录音器,则返回
|
||
if (!mediaRecorder.current) return;
|
||
// 停止录音器
|
||
mediaRecorder.current.stop();
|
||
// 更新状态为未录音
|
||
updateState({ recording: false });
|
||
// 等待录音器停止录音
|
||
mediaRecorder.current.onstop = async () => {
|
||
try {
|
||
// 停止录音器
|
||
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" });
|
||
await processAudio(audioBlob);
|
||
} finally {
|
||
audioChunks.current = [];
|
||
}
|
||
};
|
||
};
|
||
|
||
|
||
const processAudio = async (audioBlob: Blob) => {
|
||
const formData = new FormData();
|
||
formData.append("audio", audioBlob, "recording.wav");
|
||
|
||
try {
|
||
updateState({ transcribing: true });
|
||
// 发送请求到后端
|
||
const asrResponse = await fetch("http://localhost:5000/asr", {
|
||
method: "POST",
|
||
body: formData,
|
||
});
|
||
// 如果请求失败,则抛出错误
|
||
if (!asrResponse.ok) throw new Error("语音识别失败");
|
||
// 获取后端返回的文本
|
||
const asrData = await asrResponse.json();
|
||
setAsrText(asrData.asr_text);
|
||
updateState({ transcribing: false, generating: true });
|
||
|
||
// 发送请求到后端,生成回答
|
||
const generateResponse = await fetch("http://localhost:5000/generate", {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
},
|
||
body: JSON.stringify({ asr_text: asrData.asr_text }),
|
||
});
|
||
|
||
if (!generateResponse.ok) throw new Error("生成回答失败");
|
||
|
||
const generateData = await generateResponse.json();
|
||
setAnswerText(generateData.answer_text);
|
||
updateState({ generating: false, synthesizing: true });
|
||
|
||
// 播放合成的音频,增加可视化效果
|
||
if (audioElement.current) {
|
||
startVisualization();
|
||
// 播放合成的音频
|
||
audioElement.current.src = `http://localhost:5000${generateData.audio_url}`;
|
||
// 播放音频
|
||
audioElement.current.play()
|
||
.catch((err) => {
|
||
console.error("播放失败:", err);
|
||
updateState({ error: "音频播放失败" });
|
||
});
|
||
}
|
||
} catch (err) {
|
||
updateState({ error: err instanceof Error ? err.message : "未知错误" });
|
||
} finally {
|
||
updateState({
|
||
transcribing: false,
|
||
generating: false,
|
||
synthesizing: false,
|
||
});
|
||
}
|
||
};
|
||
|
||
const getStatusText = () => {
|
||
if (processState.error) return processState.error;
|
||
if (processState.recording) return "录音中... 🎤";
|
||
if (processState.transcribing) return "语音识别中... 🔍";
|
||
if (processState.generating) return "生成回答中... 💡";
|
||
if (processState.synthesizing) return "语音合成中... 🎵";
|
||
return "点击开始对话";
|
||
};
|
||
|
||
const startVisualization = useCallback(() => {
|
||
if (!analyserRef.current || !dataArrayRef.current || !barsRef.current) {
|
||
console.warn("可视化组件未就绪");
|
||
return;
|
||
}
|
||
|
||
if (animationFrameRef.current) {
|
||
cancelAnimationFrame(animationFrameRef.current);
|
||
animationFrameRef.current = null;
|
||
}
|
||
|
||
const bufferLength = analyserRef.current.frequencyBinCount;
|
||
const updateBars = () => {
|
||
try {
|
||
analyserRef.current!.getByteFrequencyData(dataArrayRef.current!);
|
||
|
||
const bars = barsRef.current!.children;
|
||
for (let i = 0; i < bars.length; i++) {
|
||
const bar = bars[i] as HTMLElement;
|
||
const dataIndex = Math.floor((i / BAR_COUNT) * (bufferLength / 2));
|
||
const rawValue =
|
||
(dataArrayRef.current![dataIndex] / 255) * 100 * VOLUME_SENSITIVITY;
|
||
|
||
const smoothValue = Math.min(
|
||
100,
|
||
Math.max(10, rawValue * 0.6 + lastValuesRef.current[i] * 0.4)
|
||
);
|
||
lastValuesRef.current[i] = smoothValue;
|
||
|
||
bar.style.cssText = `
|
||
height: ${smoothValue}%;
|
||
transform: scaleY(${0.8 + (smoothValue / 100) * 0.6});
|
||
transition: ${i === 0 ? "none" : "height 50ms linear"};
|
||
`;
|
||
}
|
||
|
||
animationFrameRef.current = requestAnimationFrame(updateBars);
|
||
} catch (err) {
|
||
console.error("可视化更新失败:", err);
|
||
}
|
||
};
|
||
|
||
animationFrameRef.current = requestAnimationFrame(updateBars);
|
||
}, [analyserRef, dataArrayRef, barsRef]);
|
||
|
||
// 切换监听状态
|
||
const toggleListening = useCallback(async () => {
|
||
if (isListening) { // 如果正在监听
|
||
await cleanupAudio(); // 清理现有音频
|
||
} else { // 否则
|
||
try { // 尝试
|
||
await cleanupAudio(); // 清理现有音频
|
||
initializeAudioContext(); // 初始化音频上下文
|
||
|
||
if (audioSourceType === "mic") { // 如果音频源类型是麦克风
|
||
const stream = await navigator.mediaDevices.getUserMedia({ // 获取用户媒体
|
||
audio: { noiseSuppression: true, echoCancellation: true }, // 音频配置
|
||
}); // 等待获取用户媒体
|
||
mediaStreamRef.current = stream; // 设置媒体流
|
||
const source = // 创建音频源
|
||
audioContextRef.current!.createMediaStreamSource(stream); // 创建音频源
|
||
source.connect(analyserRef.current!); // 连接到分析器
|
||
} else {
|
||
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
|
||
const source = // 创建音频源
|
||
audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
|
||
source.connect(analyserRef.current!); // 连接到分析器
|
||
audio.play(); // 播放音频
|
||
}
|
||
|
||
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
|
||
startVisualization(); // 开始可视化
|
||
} catch (err) {
|
||
console.error("初始化失败:", err);
|
||
updateState({ error: "音频初始化失败" });
|
||
}
|
||
}
|
||
setIsListening((prev) => !prev);
|
||
}, [
|
||
isListening,
|
||
audioSourceType,
|
||
cleanupAudio,
|
||
initializeAudioContext,
|
||
startVisualization,
|
||
]);
|
||
|
||
// 示例音频播放
|
||
const handlePlaySample = async () => {
|
||
try {
|
||
await cleanupAudio(); // 清理现有音频
|
||
initializeAudioContext(); // 初始化音频上下文
|
||
|
||
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
|
||
const source = audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
|
||
source.connect(analyserRef.current!); // 连接到分析器
|
||
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
|
||
|
||
await audio.play(); // 播放音频
|
||
startVisualization(); // 开始可视化
|
||
|
||
audio.onended = () => {
|
||
setIsListening(false);
|
||
if (animationFrameRef.current) {
|
||
cancelAnimationFrame(animationFrameRef.current);
|
||
animationFrameRef.current = null;
|
||
}
|
||
};
|
||
} catch (err) {
|
||
console.error("播放示例失败:", err);
|
||
updateState({ error: "示例播放失败" });
|
||
}
|
||
};
|
||
|
||
return (
|
||
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 text-center w-full px-4">
|
||
{/* 问候语 */}
|
||
<h1 className="text-6xl font-light mb-8 drop-shadow-glow">{greeting}</h1>
|
||
{/* 较小较细的字体显示{asrText || "等待语音输入..."}*/}
|
||
<h3 className="text-sm font-light mb-8">{asrText || "等待中..."}</h3>
|
||
{/*较小较细的字体显示{answerText || "等待生成回答..."}*/}
|
||
<h2 className="text-sm font-light mb-8">
|
||
{answerText || "AI助手待命中"}
|
||
</h2>
|
||
|
||
{/* 音频源切换 */}
|
||
<div className="mb-4 flex justify-center gap-4">
|
||
<button
|
||
onClick={() => setAudioSourceType("mic")}
|
||
className={`px-4 py-2 rounded-lg ${
|
||
audioSourceType === "mic" ? "bg-blue-500 text-white" : "bg-gray-200"
|
||
}`}
|
||
>
|
||
麦克风
|
||
</button>
|
||
<button
|
||
onClick={() => setAudioSourceType("playback")}
|
||
className={`px-4 py-2 rounded-lg ${
|
||
audioSourceType === "playback"
|
||
? "bg-blue-500 text-white"
|
||
: "bg-gray-200"
|
||
}`}
|
||
>
|
||
音频播放
|
||
</button>
|
||
</div>
|
||
{/* 示例播放按钮 */}
|
||
{audioSourceType === "playback" && (
|
||
<button
|
||
onClick={handlePlaySample}
|
||
className="mt-4 px-6 py-2 bg-green-500 text-white rounded-lg hover:bg-green-600 transition-colors"
|
||
>
|
||
播放示例音频
|
||
</button>
|
||
)}
|
||
|
||
{/* 优化后的音频波形可视化 */}
|
||
<div className="relative inline-block">
|
||
<button
|
||
onClick={() => {
|
||
toggleListening();
|
||
processState.recording ? stopRecording() : startRecording();
|
||
}}
|
||
className={[
|
||
"group relative flex h-20 items-end gap-1.5 rounded-3xl p-6",
|
||
"bg-gradient-to-b from-black/80 to-gray-900/90",
|
||
"shadow-[0_0_20px_0_rgba(34,211,238,0.2)] hover:shadow-[0_0_30px_0_rgba(109,213,237,0.3)]",
|
||
"transition-shadow duration-300 ease-out",
|
||
isListening ? "ring-2 ring-cyan-400/20" : "",
|
||
].join(" ")}
|
||
style={{
|
||
// 禁用will-change优化(经测试反而降低性能)
|
||
backdropFilter: "blur(12px)", // 直接使用CSS属性
|
||
WebkitBackdropFilter: "blur(12px)",
|
||
}}
|
||
>
|
||
{/* 优化后的柱状图容器 */}
|
||
<div ref={barsRef} className="flex h-full w-full items-end gap-1.5">
|
||
{[...Array(BAR_COUNT)].map((_, index) => (
|
||
<div
|
||
key={index}
|
||
className={[
|
||
"w-2.5 rounded-full",
|
||
"bg-gradient-to-t from-cyan-400/90 via-blue-400/90 to-purple-500/90",
|
||
"transition-transform duration-150 ease-out",
|
||
].join(" ")}
|
||
style={{
|
||
willChange: "height, transform", // 提示浏览器优化
|
||
boxShadow: "0 0 8px -2px rgba(52,211,254,0.4)",
|
||
height: "10%", // 初始高度
|
||
}}
|
||
/>
|
||
))}
|
||
</div>
|
||
</button>
|
||
</div>
|
||
|
||
{/* 底部状态信息 */}
|
||
<div className="mt-8 text-xs text-gray-500 space-y-1">
|
||
<p>支持唤醒词:"魔镜魔镜"</p>
|
||
<div className="flex items-center justify-center gap-1.5">
|
||
<div className="relative flex items-center">
|
||
{/* 呼吸圆点指示器 */}
|
||
<div
|
||
className={`w-2 h-2 rounded-full ${
|
||
isListening ? "bg-green-400 animate-breath" : "bg-gray-400"
|
||
}`}
|
||
/>
|
||
{/* 扩散波纹效果 */}
|
||
{isListening && (
|
||
<div className="absolute inset-0 rounded-full bg-green-400/20 animate-ping" />
|
||
)}
|
||
</div>
|
||
<span>{getStatusText()}</span>
|
||
</div>
|
||
|
||
{/* 音频播放 */}
|
||
<audio
|
||
ref={audioElement}
|
||
controls={process.env.NODE_ENV === "development"} // 开发环境显示 controls
|
||
onEnded={() => updateState({ synthesizing: false })}
|
||
/>
|
||
</div>
|
||
</div>
|
||
);
|
||
};
|
||
|
||
export default VoiceAssistant;
|