8 months ago · 6ea182479e
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,14 @@
 
				+### venv template
			
 
				+# Virtualenv
			
 
				+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
			
 
				+.Python
			
 
				+[Bb]in
			
 
				+[Ii]nclude
			
 
				+[Ll]ib
			
 
				+[Ll]ib64
			
 
				+[Ll]ocal
			
 
				+[Ss]cripts
			
 
				+pyvenv.cfg
			
 
				+.venv
			
 
				+pip-selfcheck.json
			
 
				+
			
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
 
				+# 디폴트 무시된 파일
			
 
				+/shelf/
			
 
				+/workspace.xml
			
 
				+# 에디터 기반 HTTP 클라이언트 요청
			
 
				+/httpRequests/
			
 
				+# Datasource local storage ignored files
			
 
				+/dataSources/
			
 
				+/dataSources.local.xml
			
--- a/.idea/git_toolbox_blame.xml
+++ b/.idea/git_toolbox_blame.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="GitToolBoxBlameSettings">
			
 
				+    <option name="version" value="2" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,14 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <profile version="1.0">
			
 
				+    <option name="myName" value="Project Default" />
			
 
				+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
			
 
				+      <option name="ignoredPackages">
			
 
				+        <value>
			
 
				+          <list size="1">
			
 
				+            <item index="0" class="java.lang.String" itemvalue="numpy" />
			
 
				+          </list>
			
 
				+        </value>
			
 
				+      </option>
			
 
				+    </inspection_tool>
			
 
				+  </profile>
			
 
				+</component>
			
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,12 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="Black">
			
 
				+    <option name="sdkName" value="Python 3.11" />
			
 
				+  </component>
			
 
				+  <component name="ComposerSettings">
			
 
				+    <execution />
			
 
				+  </component>
			
 
				+  <component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="Python 3.11 (whisper)" project-jdk-type="Python SDK">
			
 
				+    <output url="file://$PROJECT_DIR$/out" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectModuleManager">
			
 
				+    <modules>
			
 
				+      <module fileurl="file://$PROJECT_DIR$/.idea/whisper.iml" filepath="$PROJECT_DIR$/.idea/whisper.iml" />
			
 
				+    </modules>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/whisper.iml
+++ b/.idea/whisper.iml
@@ -0,0 +1,11 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<module type="JAVA_MODULE" version="4">
			
 
				+  <component name="NewModuleRootManager" inherit-compiler-output="true">
			
 
				+    <exclude-output />
			
 
				+    <content url="file://$MODULE_DIR$">
			
 
				+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
			
 
				+    </content>
			
 
				+    <orderEntry type="inheritedJdk" />
			
 
				+    <orderEntry type="sourceFolder" forTests="false" />
			
 
				+  </component>
			
 
				+</module>
			
--- a/__pycache__/stt_vosk.cpython-311.pyc
+++ b/__pycache__/stt_vosk.cpython-311.pyc
--- a/__pycache__/whisper_api.cpython-311.pyc
+++ b/__pycache__/whisper_api.cpython-311.pyc
--- a/backup/__pycache__/whisper_api.cpython-311.pyc
+++ b/backup/__pycache__/whisper_api.cpython-311.pyc
--- a/backup/whisper_api.py
+++ b/backup/whisper_api.py
@@ -0,0 +1,61 @@
 
				+from fastapi import FastAPI, WebSocket
			
 
				+import whisper
			
 
				+import numpy as np
			
 
				+import scipy.io.wavfile as wavfile
			
 
				+import base64
			
 
				+import logging
			
 
				+import os
			
 
				+
			
 
				+app = FastAPI()
			
 
				+model = whisper.load_model("large")  # 정확도 향상을 위해 medium 사용
			
 
				+logging.basicConfig(level=logging.INFO)
			
 
				+
			
 
				+@app.websocket("/audio-stream")
			
 
				+async def transcribe_audio(websocket: WebSocket):
			
 
				+    await websocket.accept()
			
 
				+    buffer = bytearray()  # 전체 오디오 데이터 저장
			
 
				+    sample_rate = 16000
			
 
				+
			
 
				+    try:
			
 
				+        while True:
			
 
				+            data = await websocket.receive_text()
			
 
				+            if data == "STOP":  # 종료 신호 수신
			
 
				+                break
			
 
				+            audio_data = base64.b64decode(data)
			
 
				+            buffer.extend(audio_data)
			
 
				+            logging.info(f"Received audio chunk of length: {len(audio_data)}")
			
 
				+
			
 
				+        # 녹음 종료 후 파일 저장 및 처리
			
 
				+        if len(buffer) > 0:
			
 
				+            audio_np = np.frombuffer(buffer, dtype=np.int16)
			
 
				+            output_file = "recorded_audio.wav"
			
 
				+            wavfile.write(output_file, sample_rate, audio_np)
			
 
				+            logging.info(f"Saved audio file: {output_file}, size: {os.path.getsize(output_file)} bytes")
			
 
				+
			
 
				+            # STT: 한국어 음성 인식
			
 
				+            stt_result = model.transcribe(output_file, language="ko")
			
 
				+            transcription = stt_result["text"]
			
 
				+            language = stt_result["language"]
			
 
				+
			
 
				+            if transcription.strip():
			
 
				+                logging.info(f"Transcription: {transcription} [lang: {language}]")
			
 
				+                await websocket.send_json({
			
 
				+                    "text": transcription,
			
 
				+                    "language": language,
			
 
				+                    # "translated_text": translated_text,
			
 
				+                    "audio_url": "/audio"
			
 
				+                })
			
 
				+            else:
			
 
				+                await websocket.send_json({"text": "No transcription available", "language": "unknown"})
			
 
				+        else:
			
 
				+            await websocket.send_json({"text": "No audio data received", "language": "unknown"})
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logging.error(f"Error: {e}")
			
 
				+        await websocket.send_json({"text": f"Error: {str(e)}", "language": "unknown"})
			
 
				+    finally:
			
 
				+        await websocket.close()
			
 
				+
			
 
				+@app.get("/health")
			
 
				+async def health_check():
			
 
				+    return {"status": "healthy"}
			
--- a/backup/whisper_api_v2.py
+++ b/backup/whisper_api_v2.py
@@ -0,0 +1,57 @@
 
				+import asyncio
			
 
				+import numpy as np
			
 
				+import soundfile as sf
			
 
				+import torch
			
 
				+import websockets
			
 
				+import whisper
			
 
				+from io import BytesIO
			
 
				+
			
 
				+# Whisper 모델 로드
			
 
				+model = whisper.load_model("large")  # 원하는 모델 크기 지정
			
 
				+
			
 
				+async def process_audio(websocket):
			
 
				+    print("클라이언가 연결되었습니다.")
			
 
				+    
			
 
				+    transcription = ['']
			
 
				+    try:
			
 
				+        async for message in websocket:
			
 
				+            # 브라우저에서 받은 오디오 데이터 (webm 형식)
			
 
				+            audio_data = message
			
 
				+
			
 
				+            # webm 데이터를 메모리에서 처리
			
 
				+            with BytesIO(audio_data) as audio_file:
			
 
				+                # soundfile로 webm을 읽고 PCM 데이터로 변환
			
 
				+                audio, sample_rate = sf.read(audio_file, dtype='float32')
			
 
				+
			
 
				+                # Whisper로 텍스트 변환
			
 
				+                result = model.transcribe(audio, language="ko", fp16=torch.cuda.is_available())
			
 
				+                text = result['text'].strip()
			
 
				+
			
 
				+                # 변환된 텍스트를 리스트에 추가
			
 
				+                transcription.append(text)
			
 
				+                print(text)  # 콘솔에 출력
			
 
				+
			
 
				+                # 클라이언트로 텍스트 전송
			
 
				+                await websocket.send(text)
			
 
				+
			
 
				+    except websockets.ConnectionClosed:
			
 
				+        print("클라이언트 연결이 종료되었습니다.")
			
 
				+    except Exception as e:
			
 
				+        print(f"오류 발생: {e}")
			
 
				+    finally:
			
 
				+        print("WebSocket 연결 정리 완료")
			
 
				+
			
 
				+async def main():
			
 
				+    # WebSocket 서버 시작
			
 
				+    server = await websockets.serve(
			
 
				+        process_audio,
			
 
				+        "localhost",
			
 
				+        8765,
			
 
				+        ping_interval=20,  # 클라이언트와의 연결 상태 확인
			
 
				+        ping_timeout=10    # 타임아웃 설정
			
 
				+    )
			
 
				+    print("WebSocket 서버가 localhost:8765에서 실행 중...")
			
 
				+    await server.wait_closed()
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    asyncio.run(main())
			
--- a/model/.DS_Store
+++ b/model/.DS_Store
--- a/model/vosk-model-small-ko-0.22.zip
+++ b/model/vosk-model-small-ko-0.22.zip
--- a/model/vosk-model-small-ko-0.22/README
+++ b/model/vosk-model-small-ko-0.22/README
@@ -0,0 +1 @@
 
				+Small Korean for Vosk (Android, RPi, other small devices)
			
--- a/model/vosk-model-small-ko-0.22/am/final.mdl
+++ b/model/vosk-model-small-ko-0.22/am/final.mdl
--- a/model/vosk-model-small-ko-0.22/conf/mfcc.conf
+++ b/model/vosk-model-small-ko-0.22/conf/mfcc.conf
@@ -0,0 +1,7 @@
 
				+--use-energy=false
			
 
				+--num-mel-bins=40
			
 
				+--num-ceps=40
			
 
				+--low-freq=20
			
 
				+--high-freq=7600
			
 
				+--allow-upsample=true
			
 
				+--allow-downsample=true
			
--- a/model/vosk-model-small-ko-0.22/conf/model.conf
+++ b/model/vosk-model-small-ko-0.22/conf/model.conf
@@ -0,0 +1,10 @@
 
				+--min-active=200
			
 
				+--max-active=3000
			
 
				+--beam=10.0
			
 
				+--lattice-beam=2.0
			
 
				+--acoustic-scale=1.0
			
 
				+--frame-subsampling-factor=3
			
 
				+--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
			
 
				+--endpoint.rule2.min-trailing-silence=0.5
			
 
				+--endpoint.rule3.min-trailing-silence=1.0
			
 
				+--endpoint.rule4.min-trailing-silence=2.0
			
--- a/model/vosk-model-small-ko-0.22/graph/Gr.fst
+++ b/model/vosk-model-small-ko-0.22/graph/Gr.fst
--- a/model/vosk-model-small-ko-0.22/graph/HCLr.fst
+++ b/model/vosk-model-small-ko-0.22/graph/HCLr.fst
--- a/model/vosk-model-small-ko-0.22/graph/disambig_tid.int
+++ b/model/vosk-model-small-ko-0.22/graph/disambig_tid.int
@@ -0,0 +1,22 @@
 
				+11887
			
 
				+11888
			
 
				+11889
			
 
				+11890
			
 
				+11891
			
 
				+11892
			
 
				+11893
			
 
				+11894
			
 
				+11895
			
 
				+11896
			
 
				+11897
			
 
				+11898
			
 
				+11899
			
 
				+11900
			
 
				+11901
			
 
				+11902
			
 
				+11903
			
 
				+11904
			
 
				+11905
			
 
				+11906
			
 
				+11907
			
 
				+11908
			
--- a/model/vosk-model-small-ko-0.22/graph/phones.txt
+++ b/model/vosk-model-small-ko-0.22/graph/phones.txt
@@ -0,0 +1,245 @@
 
				+<eps> 0
			
 
				+sp 1
			
 
				+sp_B 2
			
 
				+sp_E 3
			
 
				+sp_I 4
			
 
				+sp_S 5
			
 
				+spn 6
			
 
				+spn_B 7
			
 
				+spn_E 8
			
 
				+spn_I 9
			
 
				+spn_S 10
			
 
				+o:_B 11
			
 
				+o:_E 12
			
 
				+o:_I 13
			
 
				+o:_S 14
			
 
				+py_B 15
			
 
				+py_E 16
			
 
				+py_I 17
			
 
				+py_S 18
			
 
				+ch_B 19
			
 
				+ch_E 20
			
 
				+ch_I 21
			
 
				+ch_S 22
			
 
				+ry_B 23
			
 
				+ry_E 24
			
 
				+ry_I 25
			
 
				+ry_S 26
			
 
				+ts_B 27
			
 
				+ts_E 28
			
 
				+ts_I 29
			
 
				+ts_S 30
			
 
				+u:_B 31
			
 
				+u:_E 32
			
 
				+u:_I 33
			
 
				+u:_S 34
			
 
				+gy_B 35
			
 
				+gy_E 36
			
 
				+gy_I 37
			
 
				+gy_S 38
			
 
				+ky_B 39
			
 
				+ky_E 40
			
 
				+ky_I 41
			
 
				+ky_S 42
			
 
				+my_B 43
			
 
				+my_E 44
			
 
				+my_I 45
			
 
				+my_S 46
			
 
				+sh_B 47
			
 
				+sh_E 48
			
 
				+sh_I 49
			
 
				+sh_S 50
			
 
				+N_B 51
			
 
				+N_E 52
			
 
				+N_I 53
			
 
				+N_S 54
			
 
				+a:_B 55
			
 
				+a:_E 56
			
 
				+a:_I 57
			
 
				+a:_S 58
			
 
				+by_B 59
			
 
				+by_E 60
			
 
				+by_I 61
			
 
				+by_S 62
			
 
				+e:_B 63
			
 
				+e:_E 64
			
 
				+e:_I 65
			
 
				+e:_S 66
			
 
				+a_B 67
			
 
				+a_E 68
			
 
				+a_I 69
			
 
				+a_S 70
			
 
				+b_B 71
			
 
				+b_E 72
			
 
				+b_I 73
			
 
				+b_S 74
			
 
				+d_B 75
			
 
				+d_E 76
			
 
				+d_I 77
			
 
				+d_S 78
			
 
				+e_B 79
			
 
				+e_E 80
			
 
				+e_I 81
			
 
				+e_S 82
			
 
				+f_B 83
			
 
				+f_E 84
			
 
				+f_I 85
			
 
				+f_S 86
			
 
				+g_B 87
			
 
				+g_E 88
			
 
				+g_I 89
			
 
				+g_S 90
			
 
				+hy_B 91
			
 
				+hy_E 92
			
 
				+hy_I 93
			
 
				+hy_S 94
			
 
				+h_B 95
			
 
				+h_E 96
			
 
				+h_I 97
			
 
				+h_S 98
			
 
				+i_B 99
			
 
				+i_E 100
			
 
				+i_I 101
			
 
				+i_S 102
			
 
				+j_B 103
			
 
				+j_E 104
			
 
				+j_I 105
			
 
				+j_S 106
			
 
				+k_B 107
			
 
				+k_E 108
			
 
				+k_I 109
			
 
				+k_S 110
			
 
				+i:_B 111
			
 
				+i:_E 112
			
 
				+i:_I 113
			
 
				+i:_S 114
			
 
				+m_B 115
			
 
				+m_E 116
			
 
				+m_I 117
			
 
				+m_S 118
			
 
				+n_B 119
			
 
				+n_E 120
			
 
				+n_I 121
			
 
				+n_S 122
			
 
				+o_B 123
			
 
				+o_E 124
			
 
				+o_I 125
			
 
				+o_S 126
			
 
				+p_B 127
			
 
				+p_E 128
			
 
				+p_I 129
			
 
				+p_S 130
			
 
				+q_B 131
			
 
				+q_E 132
			
 
				+q_I 133
			
 
				+q_S 134
			
 
				+r_B 135
			
 
				+r_E 136
			
 
				+r_I 137
			
 
				+r_S 138
			
 
				+s_B 139
			
 
				+s_E 140
			
 
				+s_I 141
			
 
				+s_S 142
			
 
				+t_B 143
			
 
				+t_E 144
			
 
				+t_I 145
			
 
				+t_S 146
			
 
				+u_B 147
			
 
				+u_E 148
			
 
				+u_I 149
			
 
				+u_S 150
			
 
				+w_B 151
			
 
				+w_E 152
			
 
				+w_I 153
			
 
				+w_S 154
			
 
				+y_B 155
			
 
				+y_E 156
			
 
				+y_I 157
			
 
				+y_S 158
			
 
				+z_B 159
			
 
				+z_E 160
			
 
				+z_I 161
			
 
				+z_S 162
			
 
				+ny_B 163
			
 
				+ny_E 164
			
 
				+ny_I 165
			
 
				+ny_S 166
			
 
				+#0 167
			
 
				+#1 168
			
 
				+#2 169
			
 
				+#3 170
			
 
				+#4 171
			
 
				+#5 172
			
 
				+#6 173
			
 
				+#7 174
			
 
				+#8 175
			
 
				+#9 176
			
 
				+#10 177
			
 
				+#11 178
			
 
				+#12 179
			
 
				+#13 180
			
 
				+#14 181
			
 
				+#15 182
			
 
				+#16 183
			
 
				+#17 184
			
 
				+#18 185
			
 
				+#19 186
			
 
				+#20 187
			
 
				+#21 188
			
 
				+#22 189
			
 
				+#23 190
			
 
				+#24 191
			
 
				+#25 192
			
 
				+#26 193
			
 
				+#27 194
			
 
				+#28 195
			
 
				+#29 196
			
 
				+#30 197
			
 
				+#31 198
			
 
				+#32 199
			
 
				+#33 200
			
 
				+#34 201
			
 
				+#35 202
			
 
				+#36 203
			
 
				+#37 204
			
 
				+#38 205
			
 
				+#39 206
			
 
				+#40 207
			
 
				+#41 208
			
 
				+#42 209
			
 
				+#43 210
			
 
				+#44 211
			
 
				+#45 212
			
 
				+#46 213
			
 
				+#47 214
			
 
				+#48 215
			
 
				+#49 216
			
 
				+#50 217
			
 
				+#51 218
			
 
				+#52 219
			
 
				+#53 220
			
 
				+#54 221
			
 
				+#55 222
			
 
				+#56 223
			
 
				+#57 224
			
 
				+#58 225
			
 
				+#59 226
			
 
				+#60 227
			
 
				+#61 228
			
 
				+#62 229
			
 
				+#63 230
			
 
				+#64 231
			
 
				+#65 232
			
 
				+#66 233
			
 
				+#67 234
			
 
				+#68 235
			
 
				+#69 236
			
 
				+#70 237
			
 
				+#71 238
			
 
				+#72 239
			
 
				+#73 240
			
 
				+#74 241
			
 
				+#75 242
			
 
				+#76 243
			
 
				+#77 244
			
--- a/model/vosk-model-small-ko-0.22/graph/phones/word_boundary.int
+++ b/model/vosk-model-small-ko-0.22/graph/phones/word_boundary.int
@@ -0,0 +1,194 @@
 
				+1 nonword
			
 
				+2 begin
			
 
				+3 end
			
 
				+4 internal
			
 
				+5 singleton
			
 
				+6 nonword
			
 
				+7 begin
			
 
				+8 end
			
 
				+9 internal
			
 
				+10 singleton
			
 
				+11 begin
			
 
				+12 end
			
 
				+13 internal
			
 
				+14 singleton
			
 
				+15 begin
			
 
				+16 end
			
 
				+17 internal
			
 
				+18 singleton
			
 
				+19 begin
			
 
				+20 end
			
 
				+21 internal
			
 
				+22 singleton
			
 
				+23 begin
			
 
				+24 end
			
 
				+25 internal
			
 
				+26 singleton
			
 
				+27 begin
			
 
				+28 end
			
 
				+29 internal
			
 
				+30 singleton
			
 
				+31 begin
			
 
				+32 end
			
 
				+33 internal
			
 
				+34 singleton
			
 
				+35 begin
			
 
				+36 end
			
 
				+37 internal
			
 
				+38 singleton
			
 
				+39 begin
			
 
				+40 end
			
 
				+41 internal
			
 
				+42 singleton
			
 
				+43 begin
			
 
				+44 end
			
 
				+45 internal
			
 
				+46 singleton
			
 
				+47 begin
			
 
				+48 end
			
 
				+49 internal
			
 
				+50 singleton
			
 
				+51 begin
			
 
				+52 end
			
 
				+53 internal
			
 
				+54 singleton
			
 
				+55 begin
			
 
				+56 end
			
 
				+57 internal
			
 
				+58 singleton
			
 
				+59 begin
			
 
				+60 end
			
 
				+61 internal
			
 
				+62 singleton
			
 
				+63 begin
			
 
				+64 end
			
 
				+65 internal
			
 
				+66 singleton
			
 
				+67 begin
			
 
				+68 end
			
 
				+69 internal
			
 
				+70 singleton
			
 
				+71 begin
			
 
				+72 end
			
 
				+73 internal
			
 
				+74 singleton
			
 
				+75 begin
			
 
				+76 end
			
 
				+77 internal
			
 
				+78 singleton
			
 
				+79 begin
			
 
				+80 end
			
 
				+81 internal
			
 
				+82 singleton
			
 
				+83 begin
			
 
				+84 end
			
 
				+85 internal
			
 
				+86 singleton
			
 
				+87 begin
			
 
				+88 end
			
 
				+89 internal
			
 
				+90 singleton
			
 
				+91 begin
			
 
				+92 end
			
 
				+93 internal
			
 
				+94 singleton
			
 
				+95 begin
			
 
				+96 end
			
 
				+97 internal
			
 
				+98 singleton
			
 
				+99 begin
			
 
				+100 end
			
 
				+101 internal
			
 
				+102 singleton
			
 
				+103 begin
			
 
				+104 end
			
 
				+105 internal
			
 
				+106 singleton
			
 
				+107 begin
			
 
				+108 end
			
 
				+109 internal
			
 
				+110 singleton
			
 
				+111 begin
			
 
				+112 end
			
 
				+113 internal
			
 
				+114 singleton
			
 
				+115 begin
			
 
				+116 end
			
 
				+117 internal
			
 
				+118 singleton
			
 
				+119 begin
			
 
				+120 end
			
 
				+121 internal
			
 
				+122 singleton
			
 
				+123 begin
			
 
				+124 end
			
 
				+125 internal
			
 
				+126 singleton
			
 
				+127 begin
			
 
				+128 end
			
 
				+129 internal
			
 
				+130 singleton
			
 
				+131 begin
			
 
				+132 end
			
 
				+133 internal
			
 
				+134 singleton
			
 
				+135 begin
			
 
				+136 end
			
 
				+137 internal
			
 
				+138 singleton
			
 
				+139 begin
			
 
				+140 end
			
 
				+141 internal
			
 
				+142 singleton
			
 
				+143 begin
			
 
				+144 end
			
 
				+145 internal
			
 
				+146 singleton
			
 
				+147 begin
			
 
				+148 end
			
 
				+149 internal
			
 
				+150 singleton
			
 
				+151 begin
			
 
				+152 end
			
 
				+153 internal
			
 
				+154 singleton
			
 
				+155 begin
			
 
				+156 end
			
 
				+157 internal
			
 
				+158 singleton
			
 
				+159 begin
			
 
				+160 end
			
 
				+161 internal
			
 
				+162 singleton
			
 
				+163 begin
			
 
				+164 end
			
 
				+165 internal
			
 
				+166 singleton
			
 
				+167 begin
			
 
				+168 end
			
 
				+169 internal
			
 
				+170 singleton
			
 
				+171 begin
			
 
				+172 end
			
 
				+173 internal
			
 
				+174 singleton
			
 
				+175 begin
			
 
				+176 end
			
 
				+177 internal
			
 
				+178 singleton
			
 
				+179 begin
			
 
				+180 end
			
 
				+181 internal
			
 
				+182 singleton
			
 
				+183 begin
			
 
				+184 end
			
 
				+185 internal
			
 
				+186 singleton
			
 
				+187 begin
			
 
				+188 end
			
 
				+189 internal
			
 
				+190 singleton
			
 
				+191 begin
			
 
				+192 end
			
 
				+193 internal
			
 
				+194 singleton
			
--- a/model/vosk-model-small-ko-0.22/ivector/final.dubm
+++ b/model/vosk-model-small-ko-0.22/ivector/final.dubm
--- a/model/vosk-model-small-ko-0.22/ivector/final.ie
+++ b/model/vosk-model-small-ko-0.22/ivector/final.ie
--- a/model/vosk-model-small-ko-0.22/ivector/final.mat
+++ b/model/vosk-model-small-ko-0.22/ivector/final.mat
--- a/model/vosk-model-small-ko-0.22/ivector/global_cmvn.stats
+++ b/model/vosk-model-small-ko-0.22/ivector/global_cmvn.stats
@@ -0,0 +1,3 @@
 
				+ [
			
 
				+  4.89787e+09 -2.547588e+08 -9.749381e+08 -1.499252e+08 -1.627682e+09 -8.685257e+08 -1.170825e+09 -8.504977e+08 -8.113551e+08 -5.766986e+08 -8.823378e+08 -6.37442e+08 -7.082468e+08 -4.637755e+08 -6.869537e+08 -3.207017e+08 -3.676429e+08 -1.143865e+08 -1.631492e+08 -2.88985e+07 -6.519752e+07 -3287053 -1.012396e+07 -539363.5 1.391781e+07 -945243.2 3.484148e+07 -1.114072e+07 2.851438e+07 -5740019 4.085751e+07 3709235 6.381602e+07 3.321586e+07 6.355679e+07 2.838408e+07 2.541039e+07 -1.701099e+07 -1.931351e+07 -1.584893e+07 5.605658e+07 
			
 
				+  4.5304e+11 3.012389e+10 4.317664e+10 3.360795e+10 8.383422e+10 4.58789e+10 5.631164e+10 3.986924e+10 3.850274e+10 3.298692e+10 4.160903e+10 3.546597e+10 2.81605e+10 2.45737e+10 2.473956e+10 1.377452e+10 1.289762e+10 8.188576e+09 5.866402e+09 3.118518e+09 1.717255e+09 5.550795e+08 7.613682e+07 1.925686e+07 2.376889e+08 5.816168e+08 9.89487e+08 1.310478e+09 1.596247e+09 1.774042e+09 1.895401e+09 1.78194e+09 1.83701e+09 1.997663e+09 1.660117e+09 1.198168e+09 1.378164e+09 1.115802e+09 7.841207e+08 5.696465e+08 0 ]
			
--- a/model/vosk-model-small-ko-0.22/ivector/online_cmvn.conf
+++ b/model/vosk-model-small-ko-0.22/ivector/online_cmvn.conf
@@ -0,0 +1 @@
 
				+# configuration file for apply-cmvn-online, used when invoking online2-wav-nnet3-latgen-faster.
			
--- a/model/vosk-model-small-ko-0.22/ivector/splice.conf
+++ b/model/vosk-model-small-ko-0.22/ivector/splice.conf
@@ -0,0 +1,2 @@
 
				+--left-context=3
			
 
				+--right-context=3
			
--- a/recorded_audio.wav
+++ b/recorded_audio.wav
--- a/stt_vosk.py
+++ b/stt_vosk.py
@@ -0,0 +1,49 @@
 
				+# START : uvicorn stt_vosk:app --host 0.0.0.0 --port 8000
			
 
				+
			
 
				+from fastapi import FastAPI, WebSocket
			
 
				+from vosk import Model, KaldiRecognizer
			
 
				+import json
			
 
				+import asyncio
			
 
				+
			
 
				+app = FastAPI()
			
 
				+
			
 
				+# Vosk 한국어 모델 로드
			
 
				+model = Model("./model/vosk-model-small-ko-0.22")
			
 
				+SAMPLE_RATE = 16000
			
 
				+
			
 
				+@app.websocket("/audio-stream")
			
 
				+async def websocket_endpoint(websocket: WebSocket):
			
 
				+    await websocket.accept()
			
 
				+    recognizer = KaldiRecognizer(model, SAMPLE_RATE)
			
 
				+    recognizer.SetWords(True)  # 단어 정보 포함
			
 
				+
			
 
				+    print("🎤 클라이언트 연결됨")
			
 
				+    is_speaking = False  # 발화 상태 추적
			
 
				+
			
 
				+    try:
			
 
				+        while True:
			
 
				+            audio_chunk = await websocket.receive_bytes()
			
 
				+
			
 
				+            if recognizer.AcceptWaveform(audio_chunk):
			
 
				+                # 최종 결과 (발화 종료 시 전송)
			
 
				+                final_result = json.loads(recognizer.Result())
			
 
				+                if final_result.get("text"):  # 텍스트가 존재할 경우에만 전송
			
 
				+                    await websocket.send_text(json.dumps({
			
 
				+                        "type": "finalTranscript",
			
 
				+                        "text": final_result["text"]
			
 
				+                    }))
			
 
				+                    is_speaking = False  # 발화 종료
			
 
				+
			
 
				+            else:
			
 
				+                # 부분 결과 (발화 중일 때만 전송)
			
 
				+                partial_result = json.loads(recognizer.PartialResult())
			
 
				+                if partial_result.get("partial"):
			
 
				+                    if not is_speaking:  # 새로운 발화 시작 감지
			
 
				+                        is_speaking = True
			
 
				+                    await websocket.send_text(json.dumps({
			
 
				+                        "type": "interimTranscript",
			
 
				+                        "text": partial_result["partial"]
			
 
				+                    }))
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 오류 발생: {e}")
			
--- a/stt_whisper.py
+++ b/stt_whisper.py
@@ -0,0 +1,112 @@
 
				+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
			
 
				+import whisper
			
 
				+import webrtcvad
			
 
				+import numpy as np
			
 
				+from pydub import AudioSegment
			
 
				+import scipy.io.wavfile as wavfile
			
 
				+import io
			
 
				+import asyncio
			
 
				+
			
 
				+app = FastAPI()
			
 
				+
			
 
				+# Whisper 모델 로드 (tiny 모델로 실시간성 유지)
			
 
				+model = whisper.load_model("tiny")
			
 
				+
			
 
				+# VAD 설정
			
 
				+vad = webrtcvad.Vad()
			
 
				+vad.set_mode(1)  # 0~3 (3이 가장 엄격), 1은 중간 수준의 감도
			
 
				+
			
 
				+# 클라이언트 관리
			
 
				+clients = {}
			
 
				+
			
 
				+# 발화 감지 설정
			
 
				+SAMPLE_RATE = 16000  # Whisper와 VAD가 요구하는 샘플레이트
			
 
				+FRAME_DURATION_MS = 30  # VAD 프레임 길이 (10, 20, 30ms 중 선택)
			
 
				+SILENCE_DURATION = 1.0  # 침묵 지속 시간 (초)
			
 
				+
			
 
				+def is_speech_vad(audio_chunk):
			
 
				+    """webrtcvad를 사용한 발화 감지"""
			
 
				+    # 16-bit PCM으로 변환
			
 
				+    audio = np.frombuffer(audio_chunk, dtype=np.int16)
			
 
				+    frame_size = (SAMPLE_RATE * FRAME_DURATION_MS / 1000) * 2  # 바이트 단위
			
 
				+    if len(audio) < frame_size:
			
 
				+        return False
			
 
				+    return vad.is_speech(audio[:frame_size], SAMPLE_RATE)
			
 
				+
			
 
				+async def process_audio_in_memory(audio_buffer):
			
 
				+    """메모리에서 오디오 처리 및 텍스트 변환"""
			
 
				+    audio_segment = AudioSegment.from_file(io.BytesIO(audio_buffer), format="webm")
			
 
				+
			
 
				+    # WAV 변환 (Mono, 16-bit PCM, 16kHz 샘플링)
			
 
				+    audio_segment = audio_segment.set_channels(1).set_frame_rate(SAMPLE_RATE).set_sample_width(2)
			
 
				+
			
 
				+    wav_buffer = io.BytesIO()
			
 
				+    audio_segment.export(wav_buffer, format="wav")
			
 
				+    wav_buffer.seek(0)
			
 
				+
			
 
				+    # Whisper로 음성 인식
			
 
				+    result = model.transcribe(wav_buffer, fp16=False)
			
 
				+    return result["text"]
			
 
				+
			
 
				+@app.websocket("/audio-stream")
			
 
				+async def websocket_endpoint(websocket: WebSocket):
			
 
				+    await websocket.accept()
			
 
				+    client_id = str(id(websocket))
			
 
				+    clients[client_id] = websocket
			
 
				+    print(f"Client {client_id} connected")
			
 
				+
			
 
				+    audio_buffer = bytearray()
			
 
				+    last_speech_time = 0
			
 
				+    silence_start = None
			
 
				+
			
 
				+    try:
			
 
				+        while True:
			
 
				+            # 오디오 청크 수신
			
 
				+            audio_chunk = await websocket.receive_bytes()
			
 
				+
			
 
				+            # 오디오 데이터를 새로운 버퍼에 저장 (기존 데이터 누적 방지)
			
 
				+            audio_buffer = bytearray(audio_chunk)  # 🔥 새로운 데이터로 덮어쓰기
			
 
				+
			
 
				+            # 수신 크기 확인
			
 
				+            print(f"Received data size: {len(audio_chunk)} bytes") 
			
 
				+            
			
 
				+            # 오디오 바이너리 데이터 => 숫자배열(numpy)로 해석
			
 
				+            audio_np = np.frombuffer(audio_buffer, dtype=np.int16).copy()
			
 
				+
			
 
				+            # WAV 파일로 저장 (덮어쓰기)
			
 
				+            output_file = "recorded_audio.wav"
			
 
				+            wavfile.write(output_file, 16000, audio_np)
			
 
				+
			
 
				+            # STT 처리
			
 
				+            stt_result = model.transcribe(output_file, language="ko")
			
 
				+            transcription = stt_result["text"]
			
 
				+            
			
 
				+            # 클라이언트에 데이터 전송
			
 
				+            await websocket.send_text(transcription)
			
 
				+            
			
 
				+            # VAD로 발화 감지
			
 
				+            # if is_speech_vad(audio_chunk):
			
 
				+            #     last_speech_time = asyncio.get_event_loop().time()
			
 
				+            #     silence_start = None
			
 
				+            #     await websocket.send_text("Speech detected...")
			
 
				+            # else:
			
 
				+            #     if silence_start is None:
			
 
				+            #         silence_start = asyncio.get_event_loop().time()
			
 
				+            #     elif (asyncio.get_event_loop().time() - silence_start) > SILENCE_DURATION and last_speech_time > 0:
			
 
				+            #         # 침묵이 지속되면 음성 인식 수행
			
 
				+            #         transcription = await process_audio_in_memory(bytes(audio_buffer))
			
 
				+            #         await websocket.send_text(transcription)
			
 
				+            #         audio_buffer = bytearray()  # 버퍼 초기화
			
 
				+            #         silence_start = None
			
 
				+            #         last_speech_time = 0
			
 
				+
			
 
				+    except WebSocketDisconnect:
			
 
				+        print(f"Client {client_id} disconnected")
			
 
				+        del clients[client_id]
			
 
				+    except Exception as e:
			
 
				+        print(f"Error: {e}")
			
 
				+        await websocket.send_text(f"Error: {str(e)}")
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import uvicorn
			
 
				+    uvicorn.run(app, host="0.0.0.0", port=8000)
		`@@ -0,0 +1 @@`
		`+Small Korean for Vosk (Android, RPi, other small devices)`
		`@@ -0,0 +1 @@`
		`+# configuration file for apply-cmvn-online, used when invoking online2-wav-nnet3-latgen-faster.`