AudioManager.js 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. // AudioManager.js (오디오 처리)
  2. import {state} from "./state.js";
  3. import {oai001B01} from "./api.js";
  4. import {elements} from "./elements.js";
  5. export const AudioManager = {
  6. async initWAVMediaRecorder() {
  7. let audioChunks = [];
  8. let speaking = false;
  9. let silenceStart = 0;
  10. const SILENCE_THRESHOLD = 0.02; // 음성 감지 임계값
  11. const SILENCE_DURATION = 1000; // 묵음 유지 시간(ms)
  12. try {
  13. // 마이크 스트림 생성
  14. const userMedia = await navigator.mediaDevices.getUserMedia({audio: true});
  15. const audioContext = new AudioContext({sampleRate: 16000}); // 16kHz 샘플레이트
  16. const mediaStreamSource = audioContext.createMediaStreamSource(userMedia);
  17. const processor = audioContext.createScriptProcessor(4096, 1, 1);
  18. let isTransmitting = false;
  19. processor.onaudioprocess = async (event) => {
  20. if (!state.isRecording) return;
  21. const inputData = event.inputBuffer.getChannelData(0); // Float32Array
  22. const rms = Math.sqrt(inputData.reduce((sum, sample) => sum + sample * sample, 0) / inputData.length);
  23. const pcmData = getPCMData(inputData); // Int16Array로 변환
  24. console.log('### rms', new Date().toLocaleTimeString(), rms);
  25. if (rms > SILENCE_THRESHOLD) {
  26. speaking = true;
  27. audioChunks.push(pcmData);
  28. isTransmitting = false; // 음성 다시 감지되면 전송 상태 해제
  29. silenceStart = 0;
  30. } else if (speaking && !isTransmitting) {
  31. if (silenceStart === 0) silenceStart = Date.now();
  32. if (Date.now() - silenceStart > SILENCE_DURATION) {
  33. isTransmitting = true; // 전송 중 표시
  34. const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
  35. const combinedData = new Int16Array(totalLength);
  36. let offset = 0;
  37. for (const chunk of audioChunks) {
  38. combinedData.set(chunk, offset);
  39. offset += chunk.length;
  40. }
  41. // 서버로 전송
  42. console.log("묵음 감지! 서버로 전송");
  43. elements.transcriptionDiv.textContent = await oai001B01(combinedData.buffer);
  44. audioChunks = [];
  45. speaking = false;
  46. silenceStart = 0;
  47. } else {
  48. audioChunks.push(pcmData);
  49. }
  50. }
  51. };
  52. // 노드 연결
  53. mediaStreamSource.connect(processor);
  54. processor.connect(audioContext.destination);
  55. // 상태 저장
  56. state.isRecording = true;
  57. state.userMedia = userMedia;
  58. state.audioContext = audioContext;
  59. state.mediaStreamSource = mediaStreamSource;
  60. state.processor = processor;
  61. } catch (error) {
  62. console.error("Failed to initialize WAV Media Recorder:", error);
  63. await AudioManager.cleanup(); // 초기화 실패 시 자원 정리
  64. }
  65. },
  66. async cleanup() {
  67. state.isRecording = false;
  68. // 5초 간격 전송 타이머 해제
  69. if (state.sendInterval) {
  70. clearInterval(state.sendInterval);
  71. state.sendInterval = null;
  72. }
  73. // 1. ScriptProcessorNode 연결 해제
  74. if (state.processor) {
  75. state.processor.disconnect(); // 모든 연결 끊기
  76. state.processor.onaudioprocess = null; // 이벤트 핸들러 제거
  77. state.processor = null;
  78. }
  79. // MediaRecorder 정리
  80. if (state.mediaRecorder) {
  81. state.mediaRecorder.stop();
  82. state.mediaRecorder.ondataavailable = null; // 이벤트 핸들러 제거
  83. state.mediaRecorder.onstop = null;
  84. if (state.mediaRecorder.stream) {
  85. state.mediaRecorder.stream.getTracks().forEach(track => track.stop());
  86. }
  87. state.mediaRecorder = null;
  88. }
  89. // 2. MediaStreamAudioSourceNode 연결 해제 (필요 시)
  90. if (state.mediaStreamSource) {
  91. state.mediaStreamSource.disconnect();
  92. state.mediaStreamSource = null;
  93. }
  94. // 3. AudioContext 종료
  95. if (state.audioContext && state.audioContext.state !== "closed") {
  96. await state.audioContext.close();
  97. state.audioContext = null;
  98. }
  99. // 4. MediaStream 트랙 종료
  100. if (state.userMedia) {
  101. state.userMedia.getTracks().forEach(track => track.stop());
  102. state.userMedia = null;
  103. }
  104. // AudioWorklet 정리
  105. if (state.audioWorkletNode) {
  106. state.audioWorkletNode.port.postMessage('stop');
  107. state.audioWorkletNode.disconnect();
  108. state.audioWorkletNode = null;
  109. }
  110. // 기타 상태 초기화
  111. if (state.silenceTimeout) {
  112. clearTimeout(state.silenceTimeout);
  113. state.silenceTimeout = null;
  114. }
  115. state.audioBuffer = [];
  116. console.log('AudioManager fully destroyed');
  117. },
  118. };
  119. const getPCMData = (inputBuffer) => {
  120. const pcmData = new Int16Array(inputBuffer.length);
  121. // Float32를 Int16으로 변환 (클리핑 포함)
  122. for (let i = 0; i < inputBuffer.length; i++) {
  123. const sample = Math.max(-1, Math.min(1, inputBuffer[i])); // 클리핑
  124. pcmData[i] = sample * 32767; // -32768 ~ 32767 범위로 스케일링
  125. }
  126. // 원시 PCM 데이터를 ArrayBuffer로 변환
  127. return pcmData;
  128. }
  129. export function encodeWAV(pcmData, sampleRate = 16000) {
  130. const numChannels = 1;
  131. const bytesPerSample = 2;
  132. const blockAlign = numChannels * bytesPerSample;
  133. const byteRate = sampleRate * blockAlign;
  134. const dataLength = pcmData.length * bytesPerSample;
  135. const buffer = new ArrayBuffer(44 + dataLength);
  136. const view = new DataView(buffer);
  137. // RIFF 헤더
  138. writeString(view, 0, 'RIFF');
  139. view.setUint32(4, 36 + dataLength, true); // chunk size
  140. writeString(view, 8, 'WAVE');
  141. // fmt 서브 청크
  142. writeString(view, 12, 'fmt ');
  143. view.setUint32(16, 16, true); // 서브청크1 크기
  144. view.setUint16(20, 1, true); // 오디오 포맷(1: PCM)
  145. view.setUint16(22, numChannels, true);
  146. view.setUint32(24, sampleRate, true);
  147. view.setUint32(28, byteRate, true);
  148. view.setUint16(32, blockAlign, true);
  149. view.setUint16(34, bytesPerSample * 8, true);
  150. // data 서브 청크
  151. writeString(view, 36, 'data');
  152. view.setUint32(40, dataLength, true);
  153. // PCM 데이터 복사
  154. let offset = 44;
  155. for (let i = 0; i < pcmData.length; i++, offset += 2) {
  156. view.setInt16(offset, pcmData[i], true);
  157. }
  158. return new Blob([buffer], { type: 'audio/wav' });
  159. }
  160. function writeString(view, offset, string) {
  161. for (let i = 0; i < string.length; i++) {
  162. view.setUint8(offset + i, string.charCodeAt(i));
  163. }
  164. }