garyuzair's picture
Update app.py
473689e verified
import gradio as gr
from transformers import pipeline
import tempfile
import os
import time
import ffmpeg
import numpy as np
# Cache the model with CPU optimization
def load_model():
return pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny",
device="cpu" # Force CPU usage
)
# Load model at startup
model = load_model()
def extract_audio(video_path):
"""Optimized audio extraction for CPU"""
audio_path = tempfile.mktemp(suffix=".wav")
# Fixed ffmpeg command syntax
(
ffmpeg
.input(video_path)
.output(audio_path, ac=1, ar=16000, acodec='pcm_s16le')
.overwrite_output()
.run(quiet=True)
)
return audio_path
def transcribe_video(video_file):
"""Process video and return transcript"""
start_time = time.time()
# Save video to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
tmp_video.write(video_file)
video_path = tmp_video.name
# Get file size
file_size = os.path.getsize(video_path) / (1024 * 1024) # in MB
# Extract audio
audio_path = extract_audio(video_path)
# Clean up video file
os.unlink(video_path)
# Transcribe
result = model(audio_path)
transcript = result["text"]
# Clean up audio file
os.unlink(audio_path)
process_time = time.time() - start_time
return transcript, f"✅ Processed {file_size:.1f}MB video in {process_time:.1f} seconds"
# Gradio interface
with gr.Blocks(title="Free Video Transcriber", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎥 Free Video Transcriber")
gr.Markdown("Upload any video to transcribe using Whisper Tiny (optimized for CPU)")
with gr.Row():
with gr.Column():
video_input = gr.File(label="Upload Video", file_types=["video"])
transcribe_btn = gr.Button("Transcribe Video", variant="primary")
with gr.Column():
transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=True)
status_output = gr.Textbox(label="Status", interactive=False)
download_btn = gr.DownloadButton(label="Download Transcript")
# Processing function
def process_video(video_file):
if video_file is None:
return "", "Please upload a video file first", None
# Read file content
with open(video_file.name, "rb") as f:
video_bytes = f.read()
transcript, status = transcribe_video(video_bytes)
return transcript, status, transcript
# Set up button actions
transcribe_btn.click(
fn=process_video,
inputs=video_input,
outputs=[transcript_output, status_output, download_btn]
)
# Info section
with gr.Accordion("ℹ️ About this app", open=False):
gr.Markdown("""
**How it works:**
- Uses OpenAI's Whisper Tiny model optimized for CPU
- Extracts audio from video using FFmpeg
- Transcribes audio to text
- Works with MP4, MOV, AVI, MKV, WEBM formats
**Performance notes:**
- 1 min video: ~10-20 seconds
- 5 min video: ~1-2 minutes
- 10 min video: ~2-4 minutes
**Optimized for:** Hugging Face Spaces free tier (CPU only)
""")
# Launch the app
if __name__ == "__main__":
demo.launch()