duzon
/
whisper-back


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
							# START : uvicorn stt_vosk:app --host 0.0.0.0 --port 8000

from fastapi import FastAPI, WebSocket
from vosk import Model, KaldiRecognizer
import json
import asyncio

app = FastAPI()

# Vosk 한국어 모델 로드
model = Model("./model/vosk-model-small-ko-0.22")
SAMPLE_RATE = 16000

@app.websocket("/audio-stream")
async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    recognizer = KaldiRecognizer(model, SAMPLE_RATE)
    recognizer.SetWords(True)  # 단어 정보 포함

    print("🎤 클라이언트 연결됨")
    is_speaking = False  # 발화 상태 추적

    try:
        while True:
            audio_chunk = await websocket.receive_bytes()

            if recognizer.AcceptWaveform(audio_chunk):
                # 최종 결과 (발화 종료 시 전송)
                final_result = json.loads(recognizer.Result())
                if final_result.get("text"):  # 텍스트가 존재할 경우에만 전송
                    await websocket.send_text(json.dumps({
                        "type": "finalTranscript",
                        "text": final_result["text"]
                    }))
                    is_speaking = False  # 발화 종료

            else:
                # 부분 결과 (발화 중일 때만 전송)
                partial_result = json.loads(recognizer.PartialResult())
                if partial_result.get("partial"):
                    if not is_speaking:  # 새로운 발화 시작 감지
                        is_speaking = True
                    await websocket.send_text(json.dumps({
                        "type": "interimTranscript",
                        "text": partial_result["partial"]
                    }))

    except Exception as e:
        print(f"❌ 오류 발생: {e}")