// AudioManager.js (오디오 처리) import {state} from "./state.js"; import {oai001B01} from "./api.js"; import {elements} from "./elements.js"; export const AudioManager = { async initWAVMediaRecorder() { let audioChunks = []; let speaking = false; let silenceStart = 0; const SILENCE_THRESHOLD = 0.02; // 음성 감지 임계값 const SILENCE_DURATION = 1000; // 묵음 유지 시간(ms) try { // 마이크 스트림 생성 const userMedia = await navigator.mediaDevices.getUserMedia({audio: true}); const audioContext = new AudioContext({sampleRate: 16000}); // 16kHz 샘플레이트 const mediaStreamSource = audioContext.createMediaStreamSource(userMedia); const processor = audioContext.createScriptProcessor(4096, 1, 1); let isTransmitting = false; processor.onaudioprocess = async (event) => { if (!state.isRecording) return; const inputData = event.inputBuffer.getChannelData(0); // Float32Array const rms = Math.sqrt(inputData.reduce((sum, sample) => sum + sample * sample, 0) / inputData.length); const pcmData = getPCMData(inputData); // Int16Array로 변환 console.log('### rms', new Date().toLocaleTimeString(), rms); if (rms > SILENCE_THRESHOLD) { speaking = true; audioChunks.push(pcmData); isTransmitting = false; // 음성 다시 감지되면 전송 상태 해제 silenceStart = 0; } else if (speaking && !isTransmitting) { if (silenceStart === 0) silenceStart = Date.now(); if (Date.now() - silenceStart > SILENCE_DURATION) { isTransmitting = true; // 전송 중 표시 const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0); const combinedData = new Int16Array(totalLength); let offset = 0; for (const chunk of audioChunks) { combinedData.set(chunk, offset); offset += chunk.length; } // 서버로 전송 console.log("묵음 감지! 서버로 전송"); elements.transcriptionDiv.textContent = await oai001B01(combinedData.buffer); audioChunks = []; speaking = false; silenceStart = 0; } else { audioChunks.push(pcmData); } } }; // 노드 연결 mediaStreamSource.connect(processor); processor.connect(audioContext.destination); // 상태 저장 state.isRecording = true; state.userMedia = userMedia; state.audioContext = audioContext; state.mediaStreamSource = mediaStreamSource; state.processor = processor; } catch (error) { console.error("Failed to initialize WAV Media Recorder:", error); await AudioManager.cleanup(); // 초기화 실패 시 자원 정리 } }, async cleanup() { state.isRecording = false; // 5초 간격 전송 타이머 해제 if (state.sendInterval) { clearInterval(state.sendInterval); state.sendInterval = null; } // 1. ScriptProcessorNode 연결 해제 if (state.processor) { state.processor.disconnect(); // 모든 연결 끊기 state.processor.onaudioprocess = null; // 이벤트 핸들러 제거 state.processor = null; } // MediaRecorder 정리 if (state.mediaRecorder) { state.mediaRecorder.stop(); state.mediaRecorder.ondataavailable = null; // 이벤트 핸들러 제거 state.mediaRecorder.onstop = null; if (state.mediaRecorder.stream) { state.mediaRecorder.stream.getTracks().forEach(track => track.stop()); } state.mediaRecorder = null; } // 2. MediaStreamAudioSourceNode 연결 해제 (필요 시) if (state.mediaStreamSource) { state.mediaStreamSource.disconnect(); state.mediaStreamSource = null; } // 3. AudioContext 종료 if (state.audioContext && state.audioContext.state !== "closed") { await state.audioContext.close(); state.audioContext = null; } // 4. MediaStream 트랙 종료 if (state.userMedia) { state.userMedia.getTracks().forEach(track => track.stop()); state.userMedia = null; } // AudioWorklet 정리 if (state.audioWorkletNode) { state.audioWorkletNode.port.postMessage('stop'); state.audioWorkletNode.disconnect(); state.audioWorkletNode = null; } // 기타 상태 초기화 if (state.silenceTimeout) { clearTimeout(state.silenceTimeout); state.silenceTimeout = null; } state.audioBuffer = []; console.log('AudioManager fully destroyed'); }, }; const getPCMData = (inputBuffer) => { const pcmData = new Int16Array(inputBuffer.length); // Float32를 Int16으로 변환 (클리핑 포함) for (let i = 0; i < inputBuffer.length; i++) { const sample = Math.max(-1, Math.min(1, inputBuffer[i])); // 클리핑 pcmData[i] = sample * 32767; // -32768 ~ 32767 범위로 스케일링 } // 원시 PCM 데이터를 ArrayBuffer로 변환 return pcmData; } export function encodeWAV(pcmData, sampleRate = 16000) { const numChannels = 1; const bytesPerSample = 2; const blockAlign = numChannels * bytesPerSample; const byteRate = sampleRate * blockAlign; const dataLength = pcmData.length * bytesPerSample; const buffer = new ArrayBuffer(44 + dataLength); const view = new DataView(buffer); // RIFF 헤더 writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataLength, true); // chunk size writeString(view, 8, 'WAVE'); // fmt 서브 청크 writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); // 서브청크1 크기 view.setUint16(20, 1, true); // 오디오 포맷(1: PCM) view.setUint16(22, numChannels, true); view.setUint32(24, sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, bytesPerSample * 8, true); // data 서브 청크 writeString(view, 36, 'data'); view.setUint32(40, dataLength, true); // PCM 데이터 복사 let offset = 44; for (let i = 0; i < pcmData.length; i++, offset += 2) { view.setInt16(offset, pcmData[i], true); } return new Blob([buffer], { type: 'audio/wav' }); } function writeString(view, offset, string) { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }