mirror3/src/components/VoiceAssistant copy 2.tsx

408 lines
14 KiB
TypeScript
Raw Normal View History

2025-03-06 18:22:25 +00:00
import { useState, useRef, useCallback } from "react";
// 新增音频源类型,playback表示音频播放,mic表示麦克风
type AudioSourceType = "mic" | "playback";
interface ProcessState {
recording: boolean;
transcribing: boolean;
generating: boolean;
synthesizing: boolean;
error?: string;
}
interface VoiceAssistantProps {
greeting: string;
}
const ANALYSER_FFT_SIZE = 128;
const VOLUME_SENSITIVITY = 1.5;
const SMOOTHING_FACTOR = 0.7;
const BAR_COUNT = 12;
const VoiceAssistant = ({ greeting }: VoiceAssistantProps) => {
const [isListening, setIsListening] = useState(false);
const [processState, setProcessState] = useState<ProcessState>({
recording: false,
transcribing: false,
generating: false,
synthesizing: false,
});
const [asrText, setAsrText] = useState("");
const [answerText, setAnswerText] = useState("");
const mediaRecorder = useRef<MediaRecorder | null>(null);
const audioChunks = useRef<Blob[]>([]);
const audioElement = useRef<HTMLAudioElement>(null);
const barsRef = useRef<HTMLDivElement>(null);
const mediaStreamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const analyserRef = useRef<AnalyserNode | null>(null);
const animationFrameRef = useRef<number | null>(null);
const dataArrayRef = useRef<Uint8Array | null>(null);
const lastValuesRef = useRef<number[]>(new Array(BAR_COUNT).fill(10));
const [audioSourceType, setAudioSourceType] =
useState<AudioSourceType>("mic");
const updateState = (newState: Partial<ProcessState>) => {
setProcessState((prev) => ({ ...prev, ...newState }));
};
const cleanupAudio = useCallback(async () => {
mediaStreamRef.current?.getTracks().forEach((track) => track.stop());
if (audioContextRef.current?.state !== "closed") {
await audioContextRef.current?.close();
}
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
animationFrameRef.current = null;
}
}, []);
const initializeAudioContext = useCallback(() => {
const AudioContextClass =
window.AudioContext || (window as any).webkitAudioContext;
audioContextRef.current = new AudioContextClass();
analyserRef.current = audioContextRef.current.createAnalyser();
analyserRef.current.fftSize = ANALYSER_FFT_SIZE;
analyserRef.current.smoothingTimeConstant = SMOOTHING_FACTOR;
dataArrayRef.current = new Uint8Array(
analyserRef.current.frequencyBinCount
);
}, []);
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: { sampleRate: 16000, channelCount: 1, sampleSize: 16 },
});
mediaRecorder.current = new MediaRecorder(stream);
audioChunks.current = [];
mediaRecorder.current.ondataavailable = (e) => {
audioChunks.current.push(e.data);
};
mediaRecorder.current.start(500);
updateState({ recording: true, error: undefined });
} catch (err) {
updateState({ error: "麦克风访问失败,请检查权限设置" });
}
};
// 新增切换音频源的函数
const stopRecording = async () => {
// 如果当前没有录音器,则返回
if (!mediaRecorder.current) return;
// 停止录音器
mediaRecorder.current.stop();
// 更新状态为未录音
updateState({ recording: false });
// 等待录音器停止录音
mediaRecorder.current.onstop = async () => {
try {
// 停止录音器
const audioBlob = new Blob(audioChunks.current, { type: "audio/wav" });
await processAudio(audioBlob);
} finally {
audioChunks.current = [];
}
};
};
const processAudio = async (audioBlob: Blob) => {
const formData = new FormData();
formData.append("audio", audioBlob, "recording.wav");
try {
updateState({ transcribing: true });
// 发送请求到后端
const asrResponse = await fetch("http://localhost:5000/asr", {
method: "POST",
body: formData,
});
// 如果请求失败,则抛出错误
if (!asrResponse.ok) throw new Error("语音识别失败");
// 获取后端返回的文本
const asrData = await asrResponse.json();
setAsrText(asrData.asr_text);
updateState({ transcribing: false, generating: true });
// 发送请求到后端,生成回答
const generateResponse = await fetch("http://localhost:5000/generate", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ asr_text: asrData.asr_text }),
});
if (!generateResponse.ok) throw new Error("生成回答失败");
const generateData = await generateResponse.json();
setAnswerText(generateData.answer_text);
updateState({ generating: false, synthesizing: true });
// 播放合成的音频,增加可视化效果
if (audioElement.current) {
startVisualization();
// 播放合成的音频
audioElement.current.src = `http://localhost:5000${generateData.audio_url}`;
// 播放音频
audioElement.current.play()
.catch((err) => {
console.error("播放失败:", err);
updateState({ error: "音频播放失败" });
});
}
} catch (err) {
updateState({ error: err instanceof Error ? err.message : "未知错误" });
} finally {
updateState({
transcribing: false,
generating: false,
synthesizing: false,
});
}
};
const getStatusText = () => {
if (processState.error) return processState.error;
if (processState.recording) return "录音中... 🎤";
if (processState.transcribing) return "语音识别中... 🔍";
if (processState.generating) return "生成回答中... 💡";
if (processState.synthesizing) return "语音合成中... 🎵";
return "点击开始对话";
};
const startVisualization = useCallback(() => {
if (!analyserRef.current || !dataArrayRef.current || !barsRef.current) {
console.warn("可视化组件未就绪");
return;
}
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
animationFrameRef.current = null;
}
const bufferLength = analyserRef.current.frequencyBinCount;
const updateBars = () => {
try {
analyserRef.current!.getByteFrequencyData(dataArrayRef.current!);
const bars = barsRef.current!.children;
for (let i = 0; i < bars.length; i++) {
const bar = bars[i] as HTMLElement;
const dataIndex = Math.floor((i / BAR_COUNT) * (bufferLength / 2));
const rawValue =
(dataArrayRef.current![dataIndex] / 255) * 100 * VOLUME_SENSITIVITY;
const smoothValue = Math.min(
100,
Math.max(10, rawValue * 0.6 + lastValuesRef.current[i] * 0.4)
);
lastValuesRef.current[i] = smoothValue;
bar.style.cssText = `
height: ${smoothValue}%;
transform: scaleY(${0.8 + (smoothValue / 100) * 0.6});
transition: ${i === 0 ? "none" : "height 50ms linear"};
`;
}
animationFrameRef.current = requestAnimationFrame(updateBars);
} catch (err) {
console.error("可视化更新失败:", err);
}
};
animationFrameRef.current = requestAnimationFrame(updateBars);
}, [analyserRef, dataArrayRef, barsRef]);
// 切换监听状态
const toggleListening = useCallback(async () => {
if (isListening) { // 如果正在监听
await cleanupAudio(); // 清理现有音频
} else { // 否则
try { // 尝试
await cleanupAudio(); // 清理现有音频
initializeAudioContext(); // 初始化音频上下文
if (audioSourceType === "mic") { // 如果音频源类型是麦克风
const stream = await navigator.mediaDevices.getUserMedia({ // 获取用户媒体
audio: { noiseSuppression: true, echoCancellation: true }, // 音频配置
}); // 等待获取用户媒体
mediaStreamRef.current = stream; // 设置媒体流
const source = // 创建音频源
audioContextRef.current!.createMediaStreamSource(stream); // 创建音频源
source.connect(analyserRef.current!); // 连接到分析器
} else {
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
const source = // 创建音频源
audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
source.connect(analyserRef.current!); // 连接到分析器
audio.play(); // 播放音频
}
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
startVisualization(); // 开始可视化
} catch (err) {
console.error("初始化失败:", err);
updateState({ error: "音频初始化失败" });
}
}
setIsListening((prev) => !prev);
}, [
isListening,
audioSourceType,
cleanupAudio,
initializeAudioContext,
startVisualization,
]);
// 示例音频播放
const handlePlaySample = async () => {
try {
await cleanupAudio(); // 清理现有音频
initializeAudioContext(); // 初始化音频上下文
const audio = new Audio("/test-audio.mp3"); // 创建音频元素
const source = audioContextRef.current!.createMediaElementSource(audio); // 创建音频源
source.connect(analyserRef.current!); // 连接到分析器
analyserRef.current!.connect(audioContextRef.current!.destination); // 连接到目标
await audio.play(); // 播放音频
startVisualization(); // 开始可视化
audio.onended = () => {
setIsListening(false);
if (animationFrameRef.current) {
cancelAnimationFrame(animationFrameRef.current);
animationFrameRef.current = null;
}
};
} catch (err) {
console.error("播放示例失败:", err);
updateState({ error: "示例播放失败" });
}
};
return (
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 text-center w-full px-4">
{/* 问候语 */}
<h1 className="text-6xl font-light mb-8 drop-shadow-glow">{greeting}</h1>
{/* 较小较细的字体显示{asrText || "等待语音输入..."}*/}
<h3 className="text-sm font-light mb-8">{asrText || "等待中..."}</h3>
{/*较小较细的字体显示{answerText || "等待生成回答..."}*/}
<h2 className="text-sm font-light mb-8">
{answerText || "AI助手待命中"}
</h2>
{/* 音频源切换 */}
<div className="mb-4 flex justify-center gap-4">
<button
onClick={() => setAudioSourceType("mic")}
className={`px-4 py-2 rounded-lg ${
audioSourceType === "mic" ? "bg-blue-500 text-white" : "bg-gray-200"
}`}
>
</button>
<button
onClick={() => setAudioSourceType("playback")}
className={`px-4 py-2 rounded-lg ${
audioSourceType === "playback"
? "bg-blue-500 text-white"
: "bg-gray-200"
}`}
>
</button>
</div>
{/* 示例播放按钮 */}
{audioSourceType === "playback" && (
<button
onClick={handlePlaySample}
className="mt-4 px-6 py-2 bg-green-500 text-white rounded-lg hover:bg-green-600 transition-colors"
>
</button>
)}
{/* 优化后的音频波形可视化 */}
<div className="relative inline-block">
<button
onClick={() => {
toggleListening();
processState.recording ? stopRecording() : startRecording();
}}
className={[
"group relative flex h-20 items-end gap-1.5 rounded-3xl p-6",
"bg-gradient-to-b from-black/80 to-gray-900/90",
"shadow-[0_0_20px_0_rgba(34,211,238,0.2)] hover:shadow-[0_0_30px_0_rgba(109,213,237,0.3)]",
"transition-shadow duration-300 ease-out",
isListening ? "ring-2 ring-cyan-400/20" : "",
].join(" ")}
style={{
// 禁用will-change优化经测试反而降低性能
backdropFilter: "blur(12px)", // 直接使用CSS属性
WebkitBackdropFilter: "blur(12px)",
}}
>
{/* 优化后的柱状图容器 */}
<div ref={barsRef} className="flex h-full w-full items-end gap-1.5">
{[...Array(BAR_COUNT)].map((_, index) => (
<div
key={index}
className={[
"w-2.5 rounded-full",
"bg-gradient-to-t from-cyan-400/90 via-blue-400/90 to-purple-500/90",
"transition-transform duration-150 ease-out",
].join(" ")}
style={{
willChange: "height, transform", // 提示浏览器优化
boxShadow: "0 0 8px -2px rgba(52,211,254,0.4)",
height: "10%", // 初始高度
}}
/>
))}
</div>
</button>
</div>
{/* 底部状态信息 */}
<div className="mt-8 text-xs text-gray-500 space-y-1">
<p>"魔镜魔镜"</p>
<div className="flex items-center justify-center gap-1.5">
<div className="relative flex items-center">
{/* 呼吸圆点指示器 */}
<div
className={`w-2 h-2 rounded-full ${
isListening ? "bg-green-400 animate-breath" : "bg-gray-400"
}`}
/>
{/* 扩散波纹效果 */}
{isListening && (
<div className="absolute inset-0 rounded-full bg-green-400/20 animate-ping" />
)}
</div>
<span>{getStatusText()}</span>
</div>
{/* 音频播放 */}
<audio
ref={audioElement}
controls={process.env.NODE_ENV === "development"} // 开发环境显示 controls
onEnded={() => updateState({ synthesizing: false })}
/>
</div>
</div>
);
};
export default VoiceAssistant;