|
from fastapi import FastAPI, UploadFile, File |
|
from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor |
|
import torch |
|
import tempfile |
|
import os |
|
import time |
|
from fastapi.responses import HTMLResponse |
|
from fastapi.staticfiles import StaticFiles |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
asr_pipeline = pipeline(model="openai/whisper-small", device=device) |
|
|
|
|
|
|
|
|
|
@app.get("/") |
|
def read_root(): |
|
return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"} |
|
|
|
|
|
@app.post("/transcribe/") |
|
async def transcribe_audio(file: UploadFile = File(...)): |
|
start_time = time.time() |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: |
|
temp_audio_file.write(await file.read()) |
|
temp_file_path = temp_audio_file.name |
|
|
|
|
|
transcription_start = time.time() |
|
transcription = asr_pipeline(temp_file_path, return_timestamps=True) |
|
transcription_end = time.time() |
|
|
|
|
|
os.remove(temp_file_path) |
|
|
|
|
|
end_time = time.time() |
|
print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds") |
|
print(f"Total execution time: {end_time - start_time:.4f} seconds") |
|
|
|
return {"transcription": transcription['text']} |
|
|
|
@app.get("/playground/", response_class=HTMLResponse) |
|
def playground(): |
|
html_content = """ |
|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Voice Recorder</title> |
|
</head> |
|
<body> |
|
<h1>Record your voice</h1> |
|
<button id="startBtn">Start Recording</button> |
|
<button id="stopBtn" disabled>Stop Recording</button> |
|
<p id="status">Press start to record your voice...</p> |
|
|
|
<audio id="audioPlayback" controls style="display:none;"></audio> |
|
<script> |
|
let mediaRecorder; |
|
let audioChunks = []; |
|
|
|
const startBtn = document.getElementById('startBtn'); |
|
const stopBtn = document.getElementById('stopBtn'); |
|
const status = document.getElementById('status'); |
|
const audioPlayback = document.getElementById('audioPlayback'); |
|
|
|
// Start Recording |
|
startBtn.addEventListener('click', async () => { |
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
mediaRecorder = new MediaRecorder(stream); |
|
mediaRecorder.start(); |
|
|
|
status.textContent = 'Recording...'; |
|
startBtn.disabled = true; |
|
stopBtn.disabled = false; |
|
|
|
mediaRecorder.ondataavailable = event => { |
|
audioChunks.push(event.data); |
|
}; |
|
}); |
|
|
|
// Stop Recording |
|
stopBtn.addEventListener('click', () => { |
|
mediaRecorder.stop(); |
|
mediaRecorder.onstop = async () => { |
|
status.textContent = 'Recording stopped. Preparing to send...'; |
|
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); |
|
const audioUrl = URL.createObjectURL(audioBlob); |
|
audioPlayback.src = audioUrl; |
|
audioPlayback.style.display = 'block'; |
|
audioChunks = []; |
|
|
|
// Send audio blob to FastAPI endpoint |
|
const formData = new FormData(); |
|
formData.append('file', audioBlob, 'recording.wav'); |
|
|
|
const response = await fetch('/transcribe/', { |
|
method: 'POST', |
|
body: formData, |
|
}); |
|
|
|
const result = await response.json(); |
|
status.textContent = 'Transcription: ' + result.transcription; |
|
}; |
|
|
|
startBtn.disabled = false; |
|
stopBtn.disabled = true; |
|
}); |
|
</script> |
|
</body> |
|
</html> |
|
""" |
|
return HTMLResponse(content=html_content) |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|