235 lines
7.5 KiB
TypeScript
235 lines
7.5 KiB
TypeScript
import React, { useState, useRef, useEffect } from 'react';
|
|
import { Mic, Square, Loader2 } from 'lucide-react';
|
|
|
|
interface AudioRecorderProps {
|
|
onAudioCaptured: (base64Audio: string) => void;
|
|
disabled?: boolean;
|
|
titleStart?: string;
|
|
titleStop?: string;
|
|
}
|
|
|
|
const AudioRecorder: React.FC<AudioRecorderProps> = ({
|
|
onAudioCaptured,
|
|
disabled,
|
|
titleStart = "Start Voice Input",
|
|
titleStop = "Stop Recording"
|
|
}) => {
|
|
const [isRecording, setIsRecording] = useState(false);
|
|
const [isProcessing, setIsProcessing] = useState(false);
|
|
const audioContextRef = useRef<AudioContext | null>(null);
|
|
const streamRef = useRef<MediaStream | null>(null);
|
|
const processorRef = useRef<ScriptProcessorNode | null>(null);
|
|
const inputRef = useRef<MediaStreamAudioSourceNode | null>(null);
|
|
const audioDataRef = useRef<Float32Array[]>([]);
|
|
|
|
useEffect(() => {
|
|
return () => {
|
|
cleanup();
|
|
};
|
|
}, []);
|
|
|
|
const cleanup = () => {
|
|
if (streamRef.current) {
|
|
streamRef.current.getTracks().forEach(track => track.stop());
|
|
streamRef.current = null;
|
|
}
|
|
if (processorRef.current) {
|
|
processorRef.current.disconnect();
|
|
processorRef.current = null;
|
|
}
|
|
if (inputRef.current) {
|
|
inputRef.current.disconnect();
|
|
inputRef.current = null;
|
|
}
|
|
if (audioContextRef.current) {
|
|
if (audioContextRef.current.state !== 'closed') {
|
|
audioContextRef.current.close();
|
|
}
|
|
audioContextRef.current = null;
|
|
}
|
|
};
|
|
|
|
const startRecording = async () => {
|
|
try {
|
|
audioDataRef.current = [];
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
streamRef.current = stream;
|
|
|
|
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
|
|
if (audioContext.state === 'suspended') {
|
|
await audioContext.resume();
|
|
}
|
|
audioContextRef.current = audioContext;
|
|
|
|
const input = audioContext.createMediaStreamSource(stream);
|
|
inputRef.current = input;
|
|
|
|
// Buffer size 4096, 1 input channel, 1 output channel
|
|
const processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
processorRef.current = processor;
|
|
|
|
processor.onaudioprocess = (e) => {
|
|
const channelData = e.inputBuffer.getChannelData(0);
|
|
// Clone the data
|
|
audioDataRef.current.push(new Float32Array(channelData));
|
|
};
|
|
|
|
input.connect(processor);
|
|
processor.connect(audioContext.destination);
|
|
|
|
setIsRecording(true);
|
|
|
|
} catch (err) {
|
|
console.error("Error accessing microphone:", err);
|
|
alert("Could not access microphone. Please check permissions.");
|
|
}
|
|
};
|
|
|
|
const stopRecording = async () => {
|
|
if (!isRecording) return;
|
|
setIsRecording(false);
|
|
setIsProcessing(true);
|
|
|
|
// Stop capturing
|
|
if (streamRef.current) {
|
|
streamRef.current.getTracks().forEach(track => track.stop());
|
|
}
|
|
if (processorRef.current) {
|
|
processorRef.current.disconnect();
|
|
}
|
|
if (inputRef.current) {
|
|
inputRef.current.disconnect();
|
|
}
|
|
|
|
// Small delay to allow last process tick
|
|
setTimeout(() => {
|
|
try {
|
|
if (audioDataRef.current.length === 0) {
|
|
setIsProcessing(false);
|
|
cleanup();
|
|
return;
|
|
}
|
|
|
|
const sampleRate = audioContextRef.current?.sampleRate || 44100;
|
|
const blob = exportWAV(audioDataRef.current, sampleRate);
|
|
cleanup();
|
|
|
|
const reader = new FileReader();
|
|
reader.readAsDataURL(blob);
|
|
reader.onloadend = () => {
|
|
const result = reader.result as string;
|
|
// result is "data:audio/wav;base64,..."
|
|
const base64String = result.split(',')[1];
|
|
onAudioCaptured(base64String);
|
|
setIsProcessing(false);
|
|
};
|
|
} catch (e) {
|
|
console.error("WAV Encoding Error", e);
|
|
setIsProcessing(false);
|
|
cleanup();
|
|
}
|
|
}, 100);
|
|
};
|
|
|
|
return (
|
|
<button
|
|
onClick={isRecording ? stopRecording : startRecording}
|
|
disabled={disabled || isProcessing}
|
|
className={`p-3 rounded-full transition-all duration-300 ${
|
|
isRecording
|
|
? 'bg-red-500 hover:bg-red-600 text-white animate-pulse shadow-lg shadow-red-200 ring-4 ring-red-100'
|
|
: 'bg-slate-200 hover:bg-slate-300 text-slate-700 hover:shadow-md'
|
|
} disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center`}
|
|
title={isRecording ? titleStop : titleStart}
|
|
>
|
|
{isProcessing ? <Loader2 size={20} className="animate-spin" /> : (isRecording ? <Square size={20} fill="currentColor" /> : <Mic size={20} />)}
|
|
</button>
|
|
);
|
|
};
|
|
|
|
// --- WAV ENCODER HELPERS ---
|
|
|
|
const exportWAV = (audioData: Float32Array[], sampleRate: number) => {
|
|
const mergedBuffers = mergeBuffers(audioData);
|
|
const downsampledBuffer = downsampleBuffer(mergedBuffers, sampleRate);
|
|
const buffer = encodeWAV(downsampledBuffer);
|
|
return new Blob([buffer], { type: 'audio/wav' });
|
|
};
|
|
|
|
const mergeBuffers = (audioData: Float32Array[]) => {
|
|
const totalLength = audioData.reduce((acc, val) => acc + val.length, 0);
|
|
const result = new Float32Array(totalLength);
|
|
let offset = 0;
|
|
for (const arr of audioData) {
|
|
result.set(arr, offset);
|
|
offset += arr.length;
|
|
}
|
|
return result;
|
|
};
|
|
|
|
const downsampleBuffer = (buffer: Float32Array, sampleRate: number) => {
|
|
if (sampleRate === 16000) return buffer;
|
|
const targetRate = 16000;
|
|
const sampleRateRatio = sampleRate / targetRate;
|
|
const newLength = Math.ceil(buffer.length / sampleRateRatio);
|
|
const result = new Float32Array(newLength);
|
|
let offsetResult = 0;
|
|
let offsetBuffer = 0;
|
|
|
|
while (offsetResult < result.length) {
|
|
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
|
|
let accum = 0, count = 0;
|
|
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
|
|
accum += buffer[i];
|
|
count++;
|
|
}
|
|
|
|
// Fixed NaN issue here: verify count is > 0
|
|
if (count > 0) {
|
|
result[offsetResult] = accum / count;
|
|
} else {
|
|
result[offsetResult] = 0;
|
|
}
|
|
|
|
offsetResult++;
|
|
offsetBuffer = nextOffsetBuffer;
|
|
}
|
|
return result;
|
|
};
|
|
|
|
const encodeWAV = (samples: Float32Array) => {
|
|
const buffer = new ArrayBuffer(44 + samples.length * 2);
|
|
const view = new DataView(buffer);
|
|
|
|
const writeString = (view: DataView, offset: number, string: string) => {
|
|
for (let i = 0; i < string.length; i++) {
|
|
view.setUint8(offset + i, string.charCodeAt(i));
|
|
}
|
|
};
|
|
|
|
writeString(view, 0, 'RIFF');
|
|
view.setUint32(4, 36 + samples.length * 2, true);
|
|
writeString(view, 8, 'WAVE');
|
|
writeString(view, 12, 'fmt ');
|
|
view.setUint32(16, 16, true);
|
|
view.setUint16(20, 1, true);
|
|
view.setUint16(22, 1, true);
|
|
view.setUint32(24, 16000, true);
|
|
view.setUint32(28, 16000 * 2, true);
|
|
view.setUint16(32, 2, true);
|
|
view.setUint16(34, 16, true);
|
|
writeString(view, 36, 'data');
|
|
view.setUint32(40, samples.length * 2, true);
|
|
|
|
const floatTo16BitPCM = (output: DataView, offset: number, input: Float32Array) => {
|
|
for (let i = 0; i < input.length; i++, offset += 2) {
|
|
const s = Math.max(-1, Math.min(1, input[i]));
|
|
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
|
}
|
|
};
|
|
|
|
floatTo16BitPCM(view, 44, samples);
|
|
return view;
|
|
};
|
|
|
|
export default AudioRecorder; |