| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- from fastapi import FastAPI, WebSocket
- import whisper
- import numpy as np
- import scipy.io.wavfile as wavfile
- import base64
- import logging
- import os
- app = FastAPI()
- model = whisper.load_model("large") # 정확도 향상을 위해 medium 사용
- logging.basicConfig(level=logging.INFO)
- @app.websocket("/audio-stream")
- async def transcribe_audio(websocket: WebSocket):
- await websocket.accept()
- buffer = bytearray() # 전체 오디오 데이터 저장
- sample_rate = 16000
- try:
- while True:
- data = await websocket.receive_text()
- if data == "STOP": # 종료 신호 수신
- break
- audio_data = base64.b64decode(data)
- buffer.extend(audio_data)
- logging.info(f"Received audio chunk of length: {len(audio_data)}")
- # 녹음 종료 후 파일 저장 및 처리
- if len(buffer) > 0:
- audio_np = np.frombuffer(buffer, dtype=np.int16)
- output_file = "recorded_audio.wav"
- wavfile.write(output_file, sample_rate, audio_np)
- logging.info(f"Saved audio file: {output_file}, size: {os.path.getsize(output_file)} bytes")
- # STT: 한국어 음성 인식
- stt_result = model.transcribe(output_file, language="ko")
- transcription = stt_result["text"]
- language = stt_result["language"]
- if transcription.strip():
- logging.info(f"Transcription: {transcription} [lang: {language}]")
- await websocket.send_json({
- "text": transcription,
- "language": language,
- # "translated_text": translated_text,
- "audio_url": "/audio"
- })
- else:
- await websocket.send_json({"text": "No transcription available", "language": "unknown"})
- else:
- await websocket.send_json({"text": "No audio data received", "language": "unknown"})
- except Exception as e:
- logging.error(f"Error: {e}")
- await websocket.send_json({"text": f"Error: {str(e)}", "language": "unknown"})
- finally:
- await websocket.close()
- @app.get("/health")
- async def health_check():
- return {"status": "healthy"}
|