duzon
/
whisper-back


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
							import asyncio
import numpy as np
import soundfile as sf
import torch
import websockets
import whisper
from io import BytesIO

# Whisper 모델 로드
model = whisper.load_model("large")  # 원하는 모델 크기 지정

async def process_audio(websocket):
    print("클라이언가 연결되었습니다.")
    
    transcription = ['']
    try:
        async for message in websocket:
            # 브라우저에서 받은 오디오 데이터 (webm 형식)
            audio_data = message

            # webm 데이터를 메모리에서 처리
            with BytesIO(audio_data) as audio_file:
                # soundfile로 webm을 읽고 PCM 데이터로 변환
                audio, sample_rate = sf.read(audio_file, dtype='float32')

                # Whisper로 텍스트 변환
                result = model.transcribe(audio, language="ko", fp16=torch.cuda.is_available())
                text = result['text'].strip()

                # 변환된 텍스트를 리스트에 추가
                transcription.append(text)
                print(text)  # 콘솔에 출력

                # 클라이언트로 텍스트 전송
                await websocket.send(text)

    except websockets.ConnectionClosed:
        print("클라이언트 연결이 종료되었습니다.")
    except Exception as e:
        print(f"오류 발생: {e}")
    finally:
        print("WebSocket 연결 정리 완료")

async def main():
    # WebSocket 서버 시작
    server = await websockets.serve(
        process_audio,
        "localhost",
        8765,
        ping_interval=20,  # 클라이언트와의 연결 상태 확인
        ping_timeout=10    # 타임아웃 설정
    )
    print("WebSocket 서버가 localhost:8765에서 실행 중...")
    await server.wait_closed()

if __name__ == "__main__":
    asyncio.run(main())