File size: 4,689 Bytes
d248be2 8117fc8 b9cd341 3619068 5abad2e 8d7f55f e32fd5c edf3685 16d2214 b9cd341 44edf06 1512a4c b34a6b9 b9cd341 edf3685 e32fd5c b9cd341 4652073 b9cd341 4652073 20e96b8 4652073 20e96b8 4652073 fb29d64 5abad2e b9cd341 5abad2e e32fd5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from fastapi import FastAPI, UploadFile, File
from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
import torch
import tempfile
import os
import time
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
# Define FastAPI app
app = FastAPI()
# Load the Whisper model once during startup
device = 0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwise CPU
asr_pipeline = pipeline(model="openai/whisper-small", device=device) # Initialize Whisper model
# asr_pipeline = pipeline( model="openai/whisper-small", device=device, language="pt")
# Basic GET endpoint
@app.get("/")
def read_root():
return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"}
# POST endpoint to transcribe audio
@app.post("/transcribe/")
async def transcribe_audio(file: UploadFile = File(...)):
start_time = time.time()
# Save the uploaded file using a temporary file manager
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
temp_audio_file.write(await file.read())
temp_file_path = temp_audio_file.name
# Transcribe the audio with long-form generation enabled
transcription_start = time.time()
transcription = asr_pipeline(temp_file_path, return_timestamps=True) # Enable timestamp return for long audio files
transcription_end = time.time()
# Clean up temporary file after use
os.remove(temp_file_path)
# Log time durations
end_time = time.time()
print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds")
print(f"Total execution time: {end_time - start_time:.4f} seconds")
return {"transcription": transcription['text']}
@app.get("/playground/", response_class=HTMLResponse)
def playground():
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Voice Recorder</title>
</head>
<body>
<h1>Record your voice</h1>
<button id="startBtn">Start Recording</button>
<button id="stopBtn" disabled>Stop Recording</button>
<p id="status">Press start to record your voice...</p>
<audio id="audioPlayback" controls style="display:none;"></audio>
<script>
let mediaRecorder;
let audioChunks = [];
const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const status = document.getElementById('status');
const audioPlayback = document.getElementById('audioPlayback');
// Start Recording
startBtn.addEventListener('click', async () => {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
mediaRecorder.start();
status.textContent = 'Recording...';
startBtn.disabled = true;
stopBtn.disabled = false;
mediaRecorder.ondataavailable = event => {
audioChunks.push(event.data);
};
});
// Stop Recording
stopBtn.addEventListener('click', () => {
mediaRecorder.stop();
mediaRecorder.onstop = async () => {
status.textContent = 'Recording stopped. Preparing to send...';
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(audioBlob);
audioPlayback.src = audioUrl;
audioPlayback.style.display = 'block';
audioChunks = [];
// Send audio blob to FastAPI endpoint
const formData = new FormData();
formData.append('file', audioBlob, 'recording.wav');
const response = await fetch('/transcribe/', {
method: 'POST',
body: formData,
});
const result = await response.json();
status.textContent = 'Transcription: ' + result.transcription;
};
startBtn.disabled = false;
stopBtn.disabled = true;
});
</script>
</body>
</html>
"""
return HTMLResponse(content=html_content)
# If running as the main module, start Uvicorn
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|