Files
ai-app-skr/components/AudioRecorder.tsx
2025-11-21 00:24:18 +08:00

235 lines
7.5 KiB
TypeScript

import React, { useState, useRef, useEffect } from 'react';
import { Mic, Square, Loader2 } from 'lucide-react';
interface AudioRecorderProps {
onAudioCaptured: (base64Audio: string) => void;
disabled?: boolean;
titleStart?: string;
titleStop?: string;
}
const AudioRecorder: React.FC<AudioRecorderProps> = ({
onAudioCaptured,
disabled,
titleStart = "Start Voice Input",
titleStop = "Stop Recording"
}) => {
const [isRecording, setIsRecording] = useState(false);
const [isProcessing, setIsProcessing] = useState(false);
const audioContextRef = useRef<AudioContext | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const inputRef = useRef<MediaStreamAudioSourceNode | null>(null);
const audioDataRef = useRef<Float32Array[]>([]);
useEffect(() => {
return () => {
cleanup();
};
}, []);
const cleanup = () => {
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop());
streamRef.current = null;
}
if (processorRef.current) {
processorRef.current.disconnect();
processorRef.current = null;
}
if (inputRef.current) {
inputRef.current.disconnect();
inputRef.current = null;
}
if (audioContextRef.current) {
if (audioContextRef.current.state !== 'closed') {
audioContextRef.current.close();
}
audioContextRef.current = null;
}
};
const startRecording = async () => {
try {
audioDataRef.current = [];
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
streamRef.current = stream;
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
if (audioContext.state === 'suspended') {
await audioContext.resume();
}
audioContextRef.current = audioContext;
const input = audioContext.createMediaStreamSource(stream);
inputRef.current = input;
// Buffer size 4096, 1 input channel, 1 output channel
const processor = audioContext.createScriptProcessor(4096, 1, 1);
processorRef.current = processor;
processor.onaudioprocess = (e) => {
const channelData = e.inputBuffer.getChannelData(0);
// Clone the data
audioDataRef.current.push(new Float32Array(channelData));
};
input.connect(processor);
processor.connect(audioContext.destination);
setIsRecording(true);
} catch (err) {
console.error("Error accessing microphone:", err);
alert("Could not access microphone. Please check permissions.");
}
};
const stopRecording = async () => {
if (!isRecording) return;
setIsRecording(false);
setIsProcessing(true);
// Stop capturing
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop());
}
if (processorRef.current) {
processorRef.current.disconnect();
}
if (inputRef.current) {
inputRef.current.disconnect();
}
// Small delay to allow last process tick
setTimeout(() => {
try {
if (audioDataRef.current.length === 0) {
setIsProcessing(false);
cleanup();
return;
}
const sampleRate = audioContextRef.current?.sampleRate || 44100;
const blob = exportWAV(audioDataRef.current, sampleRate);
cleanup();
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = () => {
const result = reader.result as string;
// result is "data:audio/wav;base64,..."
const base64String = result.split(',')[1];
onAudioCaptured(base64String);
setIsProcessing(false);
};
} catch (e) {
console.error("WAV Encoding Error", e);
setIsProcessing(false);
cleanup();
}
}, 100);
};
return (
<button
onClick={isRecording ? stopRecording : startRecording}
disabled={disabled || isProcessing}
className={`p-3 rounded-full transition-all duration-300 ${
isRecording
? 'bg-red-500 hover:bg-red-600 text-white animate-pulse shadow-lg shadow-red-200 ring-4 ring-red-100'
: 'bg-slate-200 hover:bg-slate-300 text-slate-700 hover:shadow-md'
} disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center`}
title={isRecording ? titleStop : titleStart}
>
{isProcessing ? <Loader2 size={20} className="animate-spin" /> : (isRecording ? <Square size={20} fill="currentColor" /> : <Mic size={20} />)}
</button>
);
};
// --- WAV ENCODER HELPERS ---
const exportWAV = (audioData: Float32Array[], sampleRate: number) => {
const mergedBuffers = mergeBuffers(audioData);
const downsampledBuffer = downsampleBuffer(mergedBuffers, sampleRate);
const buffer = encodeWAV(downsampledBuffer);
return new Blob([buffer], { type: 'audio/wav' });
};
const mergeBuffers = (audioData: Float32Array[]) => {
const totalLength = audioData.reduce((acc, val) => acc + val.length, 0);
const result = new Float32Array(totalLength);
let offset = 0;
for (const arr of audioData) {
result.set(arr, offset);
offset += arr.length;
}
return result;
};
const downsampleBuffer = (buffer: Float32Array, sampleRate: number) => {
if (sampleRate === 16000) return buffer;
const targetRate = 16000;
const sampleRateRatio = sampleRate / targetRate;
const newLength = Math.ceil(buffer.length / sampleRateRatio);
const result = new Float32Array(newLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
let accum = 0, count = 0;
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
// Fixed NaN issue here: verify count is > 0
if (count > 0) {
result[offsetResult] = accum / count;
} else {
result[offsetResult] = 0;
}
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
};
const encodeWAV = (samples: Float32Array) => {
const buffer = new ArrayBuffer(44 + samples.length * 2);
const view = new DataView(buffer);
const writeString = (view: DataView, offset: number, string: string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + samples.length * 2, true);
writeString(view, 8, 'WAVE');
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, 16000, true);
view.setUint32(28, 16000 * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(view, 36, 'data');
view.setUint32(40, samples.length * 2, true);
const floatTo16BitPCM = (output: DataView, offset: number, input: Float32Array) => {
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
};
floatTo16BitPCM(view, 44, samples);
return view;
};
export default AudioRecorder;