mirror3/src/components/VoiceAssistant copy 2.tsx
2025-03-07 02:22:25 +08:00

408 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { useState, useRef, useCallback } from "react";
// 新增音频源类型,playback表示音频播放,mic表示麦克风
type AudioSourceType = "mic" | "playback";
interface ProcessState {
recording: boolean;
transcribing: boolean;
generating: boolean;
synthesizing: boolean;
error?: string;
}
interface VoiceAssistantProps {
greeting: string;
}
const ANALYSER_FFT_SIZE = 128;
const VOLUME_SENSITIVITY = 1.5;
const SMOOTHING_FACTOR = 0.7;
const BAR_COUNT = 12;
const VoiceAssistant = ({ greeting }: VoiceAssistantProps) => {
const [isListening, setIsListening] = useState(false);
const [processState, setProcessState] = useState<ProcessState>({
recording: false,
transcribing: false,
generating: false,
synthesizing: false,
});
const [asrText, setAsrText] = useState("");
const [answerText, setAnswerText] = useState("");
const mediaRecorder = useRef<MediaRecorder | null>(null);
const audioChunks = useRef<Blob[]>([]);
const audioElement = useRef<HTMLAudioElement>(null);
const barsRef = useRef<HTMLDivElement>(null);
const mediaStreamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const analyserRef = useRef<AnalyserNode | null>(null);
const animationFrameRef = useRef<number | null>(null);
const dataArrayRef = useRef<Uint8Array | null>(null);
const lastValuesRef = useRef<number[]>(new Array(BAR_COUNT).fill(10));
const [audioSourceType, setAudioSourceType] =
useState<AudioSourceType>("mic");
const updateState = (newState: Partial<ProcessState>) => {
setProcessState((prev) => ({ ...prev, ...newState }));
};
const cleanupAudio = useCallback(async () => {
mediaStreamRef.current?.getTracks().forEach((track) => track.stop());
if (audioContextRef.current?.state !== "closed") {
await audioContextRef.current?.close();
}
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
animationFrameRef.current = null;
}
}, []);
const initializeAudioContext = useCallback(() => {
const AudioContextClass =
window.AudioContext || (window as any).webkitAudioContext;
audioContextRef.current = new AudioContextClass();
analyserRef.current = audioContextRef.current.createAnalyser();
analyserRef.current.fftSize = ANALYSER_FFT_SIZE;
analyserRef.current.smoothingTimeConstant = SMOOTHING_FACTOR;
dataArrayRef.current = new Uint8Array(
analyserRef.current.frequencyBinCount
);
}, []);
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: { sampleRate: 16000, channelCount: 1, sampleSize: 16 },
});
mediaRecorder.current = new MediaRecorder(stream);
audioChunks.current = [];
mediaRecorder.current.ondataavailable = (e) => {
audioChunks.current.push(e.data);
};
mediaRecorder.current.start(500);
updateState({ recording: true, error: undefined });
} catch (err) {
updateState({ error: "麦克风访问失败,请检查权限设置" });
}
};
// 新增切换音频源的函数
const stopRecording = async () => {
// 如果当前没有录音器,则返回
if (!mediaRecorder.current) return;
// 停止录音器
mediaRecorder.current.stop();
// 更新状态为未录音
updateState({ recording: false });
// 等待录音器停止录音
mediaRecorder.current.onstop = async () => {
try {
// 停止录音器
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" });
await processAudio(audioBlob);
} finally {
audioChunks.current = [];
}
};
};
const processAudio = async (audioBlob: Blob) => {
const formData = new FormData();
formData.append("audio", audioBlob, "recording.wav");
try {
updateState({ transcribing: true });
// 发送请求到后端
const asrResponse = await fetch("http://localhost:5000/asr", {
method: "POST",
body: formData,
});
// 如果请求失败,则抛出错误
if (!asrResponse.ok) throw new Error("语音识别失败");
// 获取后端返回的文本
const asrData = await asrResponse.json();
setAsrText(asrData.asr_text);
updateState({ transcribing: false, generating: true });
// 发送请求到后端,生成回答
const generateResponse = await fetch("http://localhost:5000/generate", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ asr_text: asrData.asr_text }),
});
if (!generateResponse.ok) throw new Error("生成回答失败");
const generateData = await generateResponse.json();
setAnswerText(generateData.answer_text);
updateState({ generating: false, synthesizing: true });
// 播放合成的音频,增加可视化效果
if (audioElement.current) {
startVisualization();
// 播放合成的音频
audioElement.current.src = `http://localhost:5000${generateData.audio_url}`;
// 播放音频
audioElement.current.play()
.catch((err) => {
console.error("播放失败:", err);
updateState({ error: "音频播放失败" });
});
}
} catch (err) {
updateState({ error: err instanceof Error ? err.message : "未知错误" });
} finally {
updateState({
transcribing: false,
generating: false,
synthesizing: false,
});
}
};
const getStatusText = () => {
if (processState.error) return processState.error;
if (processState.recording) return "录音中... 🎤";
if (processState.transcribing) return "语音识别中... 🔍";
if (processState.generating) return "生成回答中... 💡";
if (processState.synthesizing) return "语音合成中... 🎵";
return "点击开始对话";
};
const startVisualization = useCallback(() => {
if (!analyserRef.current || !dataArrayRef.current || !barsRef.current) {
console.warn("可视化组件未就绪");
return;
}
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
animationFrameRef.current = null;
}
const bufferLength = analyserRef.current.frequencyBinCount;
const updateBars = () => {
try {
analyserRef.current!.getByteFrequencyData(dataArrayRef.current!);
const bars = barsRef.current!.children;
for (let i = 0; i < bars.length; i++) {
const bar = bars[i] as HTMLElement;
const dataIndex = Math.floor((i / BAR_COUNT) * (bufferLength / 2));
const rawValue =
(dataArrayRef.current![dataIndex] / 255) * 100 * VOLUME_SENSITIVITY;
const smoothValue = Math.min(
100,
Math.max(10, rawValue * 0.6 + lastValuesRef.current[i] * 0.4)
);
lastValuesRef.current[i] = smoothValue;
bar.style.cssText = `
height: ${smoothValue}%;
transform: scaleY(${0.8 + (smoothValue / 100) * 0.6});
transition: ${i === 0 ? "none" : "height 50ms linear"};
`;
}
animationFrameRef.current = requestAnimationFrame(updateBars);
} catch (err) {
console.error("可视化更新失败:", err);
}
};
animationFrameRef.current = requestAnimationFrame(updateBars);
}, [analyserRef, dataArrayRef, barsRef]);
// 切换监听状态
const toggleListening = useCallback(async () => {
if (isListening) { // 如果正在监听
await cleanupAudio(); // 清理现有音频
} else { // 否则
try { // 尝试
await cleanupAudio(); // 清理现有音频
initializeAudioContext(); // 初始化音频上下文
if (audioSourceType === "mic") { // 如果音频源类型是麦克风
const stream = await navigator.mediaDevices.getUserMedia({ // 获取用户媒体
audio: { noiseSuppression: true, echoCancellation: true }, // 音频配置
}); // 等待获取用户媒体
mediaStreamRef.current = stream; // 设置媒体流
const source = // 创建音频源
audioContextRef.current!.createMediaStreamSource(stream); // 创建音频源
source.connect(analyserRef.current!); // 连接到分析器
} else {
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
const source = // 创建音频源
audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
source.connect(analyserRef.current!); // 连接到分析器
audio.play(); // 播放音频
}
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
startVisualization(); // 开始可视化
} catch (err) {
console.error("初始化失败:", err);
updateState({ error: "音频初始化失败" });
}
}
setIsListening((prev) => !prev);
}, [
isListening,
audioSourceType,
cleanupAudio,
initializeAudioContext,
startVisualization,
]);
// 示例音频播放
const handlePlaySample = async () => {
try {
await cleanupAudio(); // 清理现有音频
initializeAudioContext(); // 初始化音频上下文
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
const source = audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
source.connect(analyserRef.current!); // 连接到分析器
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
await audio.play(); // 播放音频
startVisualization(); // 开始可视化
audio.onended = () => {
setIsListening(false);
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
animationFrameRef.current = null;
}
};
} catch (err) {
console.error("播放示例失败:", err);
updateState({ error: "示例播放失败" });
}
};
return (
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 text-center w-full px-4">
{/* 问候语 */}
<h1 className="text-6xl font-light mb-8 drop-shadow-glow">{greeting}</h1>
{/* 较小较细的字体显示{asrText || "等待语音输入..."}*/}
<h3 className="text-sm font-light mb-8">{asrText || "等待中..."}</h3>
{/*较小较细的字体显示{answerText || "等待生成回答..."}*/}
<h2 className="text-sm font-light mb-8">
{answerText || "AI助手待命中"}
</h2>
{/* 音频源切换 */}
<div className="mb-4 flex justify-center gap-4">
<button
onClick={() => setAudioSourceType("mic")}
className={`px-4 py-2 rounded-lg ${
audioSourceType === "mic" ? "bg-blue-500 text-white" : "bg-gray-200"
}`}
>
</button>
<button
onClick={() => setAudioSourceType("playback")}
className={`px-4 py-2 rounded-lg ${
audioSourceType === "playback"
? "bg-blue-500 text-white"
: "bg-gray-200"
}`}
>
</button>
</div>
{/* 示例播放按钮 */}
{audioSourceType === "playback" && (
<button
onClick={handlePlaySample}
className="mt-4 px-6 py-2 bg-green-500 text-white rounded-lg hover:bg-green-600 transition-colors"
>
</button>
)}
{/* 优化后的音频波形可视化 */}
<div className="relative inline-block">
<button
onClick={() => {
toggleListening();
processState.recording ? stopRecording() : startRecording();
}}
className={[
"group relative flex h-20 items-end gap-1.5 rounded-3xl p-6",
"bg-gradient-to-b from-black/80 to-gray-900/90",
"shadow-[0_0_20px_0_rgba(34,211,238,0.2)] hover:shadow-[0_0_30px_0_rgba(109,213,237,0.3)]",
"transition-shadow duration-300 ease-out",
isListening ? "ring-2 ring-cyan-400/20" : "",
].join(" ")}
style={{
// 禁用will-change优化经测试反而降低性能
backdropFilter: "blur(12px)", // 直接使用CSS属性
WebkitBackdropFilter: "blur(12px)",
}}
>
{/* 优化后的柱状图容器 */}
<div ref={barsRef} className="flex h-full w-full items-end gap-1.5">
{[...Array(BAR_COUNT)].map((_, index) => (
<div
key={index}
className={[
"w-2.5 rounded-full",
"bg-gradient-to-t from-cyan-400/90 via-blue-400/90 to-purple-500/90",
"transition-transform duration-150 ease-out",
].join(" ")}
style={{
willChange: "height, transform", // 提示浏览器优化
boxShadow: "0 0 8px -2px rgba(52,211,254,0.4)",
height: "10%", // 初始高度
}}
/>
))}
</div>
</button>
</div>
{/* 底部状态信息 */}
<div className="mt-8 text-xs text-gray-500 space-y-1">
<p>"魔镜魔镜"</p>
<div className="flex items-center justify-center gap-1.5">
<div className="relative flex items-center">
{/* 呼吸圆点指示器 */}
<div
className={`w-2 h-2 rounded-full ${
isListening ? "bg-green-400 animate-breath" : "bg-gray-400"
}`}
/>
{/* 扩散波纹效果 */}
{isListening && (
<div className="absolute inset-0 rounded-full bg-green-400/20 animate-ping" />
)}
</div>
<span>{getStatusText()}</span>
</div>
{/* 音频播放 */}
<audio
ref={audioElement}
controls={process.env.NODE_ENV === "development"} // 开发环境显示 controls
onEnded={() => updateState({ synthesizing: false })}
/>
</div>
</div>
);
};
export default VoiceAssistant;