Spaces:

legusxyz
/

whisper

Sleeping

App Files Files Community

whisper / app.py

legusxyz

Update app.py

d520218 verified 5 months ago

raw

history blame

5.27 kB

	from fastapi import FastAPI, UploadFile, File
	from transformers import pipeline, WhisperForConditionalGeneration, WhisperProcessor
	import torch
	import tempfile
	import os
	import time
	from fastapi.responses import HTMLResponse
	from fastapi.staticfiles import StaticFiles

	# Define FastAPI app
	app = FastAPI()

	# Check if GPU is available
	device = 0 if torch.cuda.is_available() else -1

	# Load Whisper model and processor
	model_name = "openai/whisper-large-v2" # You can change to other variants like "openai/whisper-small"
	model = WhisperForConditionalGeneration.from_pretrained(model_name)
	processor = WhisperProcessor.from_pretrained(model_name)

	# Set forced_decoder_ids to enforce Portuguese language transcription
	forced_decoder_ids = processor.get_decoder_prompt_ids(language="portuguese", task="transcribe")
	model.config.forced_decoder_ids = forced_decoder_ids

	# Initialize the ASR pipeline with the modified model and processor
	asr_pipeline = pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer, # Explicitly set the tokenizer from the processor
	feature_extractor=processor.feature_extractor, # Also set the feature extractor
	device=device
	)



	# Basic GET endpoint
	@app.get("/")
	def read_root():
	return {"message": "Welcome to the FastAPI app on Hugging Face Spaces!"}

	# POST endpoint to transcribe audio
	@app.post("/transcribe/")
	async def transcribe_audio(file: UploadFile = File(...)):
	start_time = time.time()

	# Save the uploaded file using a temporary file manager
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
	temp_audio_file.write(await file.read())
	temp_file_path = temp_audio_file.name

	# Transcribe the audio with long-form generation enabled
	transcription_start = time.time()
	transcription = asr_pipeline(temp_file_path, return_timestamps=True) # Enable timestamp return for long audio files
	transcription_end = time.time()

	# Clean up temporary file after use
	os.remove(temp_file_path)

	# Log time durations
	end_time = time.time()
	print(f"Time to transcribe audio: {transcription_end - transcription_start:.4f} seconds")
	print(f"Total execution time: {end_time - start_time:.4f} seconds")

	return {"transcription": transcription['text']}

	@app.get("/playground/", response_class=HTMLResponse)
	def playground():
	html_content = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Voice Recorder</title>
	</head>
	<body>
	<h1>Record your voice</h1>
	<button id="startBtn">Start Recording</button>
	<button id="stopBtn" disabled>Stop Recording</button>
	<p id="status">Press start to record your voice...</p>

	<audio id="audioPlayback" controls style="display:none;"></audio>
	<script>
	let mediaRecorder;
	let audioChunks = [];

	const startBtn = document.getElementById('startBtn');
	const stopBtn = document.getElementById('stopBtn');
	const status = document.getElementById('status');
	const audioPlayback = document.getElementById('audioPlayback');

	// Start Recording
	startBtn.addEventListener('click', async () => {
	const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
	mediaRecorder = new MediaRecorder(stream);
	mediaRecorder.start();

	status.textContent = 'Recording...';
	startBtn.disabled = true;
	stopBtn.disabled = false;

	mediaRecorder.ondataavailable = event => {
	audioChunks.push(event.data);
	};
	});

	// Stop Recording
	stopBtn.addEventListener('click', () => {
	mediaRecorder.stop();
	mediaRecorder.onstop = async () => {
	status.textContent = 'Recording stopped. Preparing to send...';
	const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
	const audioUrl = URL.createObjectURL(audioBlob);
	audioPlayback.src = audioUrl;
	audioPlayback.style.display = 'block';
	audioChunks = [];

	// Send audio blob to FastAPI endpoint
	const formData = new FormData();
	formData.append('file', audioBlob, 'recording.wav');

	const response = await fetch('/transcribe/', {
	method: 'POST',
	body: formData,
	});

	const result = await response.json();
	status.textContent = 'Transcription: ' + result.transcription;
	};

	startBtn.disabled = false;
	stopBtn.disabled = true;
	});
	</script>
	</body>
	</html>
	"""
	return HTMLResponse(content=html_content)
	# If running as the main module, start Uvicorn
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)