Spaces:

legusxyz
/

whisper

Sleeping

File size: 1,582 Bytes

d248be2
b9cd341
 
 
3619068
 
8d7f55f
e32fd5c
edf3685
16d2214
b9cd341
 
 
 
edf3685
 
 
 
e32fd5c
b9cd341
4652073
 
 
 
 
 
 
b9cd341
4652073
 
 
b9cd341
4652073
 
 
 
 
 
 
 
 
 
 
b9cd341
e32fd5c

from fastapi import FastAPI, UploadFile, File
from transformers import pipeline
import torch
import tempfile
import os
import time

# Define FastAPI app
app = FastAPI()

# Load the Whisper model once during startup
device = 0 if torch.cuda.is_available() else -1  # Use GPU if available, otherwise CPU
asr_pipeline = pipeline(model="openai/whisper-small", device=device)  # Initialize Whisper model

# Basic GET endpoint
@app.get("/")
def read_root():
    return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"}

# POST endpoint to transcribe audio
@app.post("/transcribe/")
async def transcribe_audio(file: UploadFile = File(...)):
    start_time = time.time()

    # Save the uploaded file using a temporary file manager
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
        temp_audio_file.write(await file.read())
        temp_file_path = temp_audio_file.name

    # Transcribe the audio
    transcription_start = time.time()
    transcription = asr_pipeline(temp_file_path)  # Call the ASR pipeline
    transcription_end = time.time()

    # Clean up temporary file after use
    os.remove(temp_file_path)

    # Log time durations
    end_time = time.time()
    print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds")
    print(f"Total execution time: {end_time - start_time:.4f} seconds")

    return {"transcription": transcription['text']}

# If running as the main module, start Uvicorn
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)