from fastapi import FastAPI, WebSocket import whisper import numpy as np import scipy.io.wavfile as wavfile import base64 import logging import os app = FastAPI() model = whisper.load_model("large") # 정확도 향상을 위해 medium 사용 logging.basicConfig(level=logging.INFO) @app.websocket("/audio-stream") async def transcribe_audio(websocket: WebSocket): await websocket.accept() buffer = bytearray() # 전체 오디오 데이터 저장 sample_rate = 16000 try: while True: data = await websocket.receive_text() if data == "STOP": # 종료 신호 수신 break audio_data = base64.b64decode(data) buffer.extend(audio_data) logging.info(f"Received audio chunk of length: {len(audio_data)}") # 녹음 종료 후 파일 저장 및 처리 if len(buffer) > 0: audio_np = np.frombuffer(buffer, dtype=np.int16) output_file = "recorded_audio.wav" wavfile.write(output_file, sample_rate, audio_np) logging.info(f"Saved audio file: {output_file}, size: {os.path.getsize(output_file)} bytes") # STT: 한국어 음성 인식 stt_result = model.transcribe(output_file, language="ko") transcription = stt_result["text"] language = stt_result["language"] if transcription.strip(): logging.info(f"Transcription: {transcription} [lang: {language}]") await websocket.send_json({ "text": transcription, "language": language, # "translated_text": translated_text, "audio_url": "/audio" }) else: await websocket.send_json({"text": "No transcription available", "language": "unknown"}) else: await websocket.send_json({"text": "No audio data received", "language": "unknown"}) except Exception as e: logging.error(f"Error: {e}") await websocket.send_json({"text": f"Error: {str(e)}", "language": "unknown"}) finally: await websocket.close() @app.get("/health") async def health_check(): return {"status": "healthy"}