Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline | |
| import tempfile | |
| import os | |
| import time | |
| import ffmpeg | |
| import numpy as np | |
| # Cache the model with CPU optimization | |
| def load_model(): | |
| return pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-tiny", | |
| device="cpu" # Force CPU usage | |
| ) | |
| # Load model at startup | |
| model = load_model() | |
| def extract_audio(video_path): | |
| """Optimized audio extraction for CPU""" | |
| audio_path = tempfile.mktemp(suffix=".wav") | |
| # Fixed ffmpeg command syntax | |
| ( | |
| ffmpeg | |
| .input(video_path) | |
| .output(audio_path, ac=1, ar=16000, acodec='pcm_s16le') | |
| .overwrite_output() | |
| .run(quiet=True) | |
| ) | |
| return audio_path | |
| def transcribe_video(video_file): | |
| """Process video and return transcript""" | |
| start_time = time.time() | |
| # Save video to temp file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video: | |
| tmp_video.write(video_file) | |
| video_path = tmp_video.name | |
| # Get file size | |
| file_size = os.path.getsize(video_path) / (1024 * 1024) # in MB | |
| # Extract audio | |
| audio_path = extract_audio(video_path) | |
| # Clean up video file | |
| os.unlink(video_path) | |
| # Transcribe | |
| result = model(audio_path) | |
| transcript = result["text"] | |
| # Clean up audio file | |
| os.unlink(audio_path) | |
| process_time = time.time() - start_time | |
| return transcript, f"✅ Processed {file_size:.1f}MB video in {process_time:.1f} seconds" | |
| # Gradio interface | |
| with gr.Blocks(title="Free Video Transcriber", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🎥 Free Video Transcriber") | |
| gr.Markdown("Upload any video to transcribe using Whisper Tiny (optimized for CPU)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| video_input = gr.File(label="Upload Video", file_types=["video"]) | |
| transcribe_btn = gr.Button("Transcribe Video", variant="primary") | |
| with gr.Column(): | |
| transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=True) | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| download_btn = gr.DownloadButton(label="Download Transcript") | |
| # Processing function | |
| def process_video(video_file): | |
| if video_file is None: | |
| return "", "Please upload a video file first", None | |
| # Read file content | |
| with open(video_file.name, "rb") as f: | |
| video_bytes = f.read() | |
| transcript, status = transcribe_video(video_bytes) | |
| return transcript, status, transcript | |
| # Set up button actions | |
| transcribe_btn.click( | |
| fn=process_video, | |
| inputs=video_input, | |
| outputs=[transcript_output, status_output, download_btn] | |
| ) | |
| # Info section | |
| with gr.Accordion("ℹ️ About this app", open=False): | |
| gr.Markdown(""" | |
| **How it works:** | |
| - Uses OpenAI's Whisper Tiny model optimized for CPU | |
| - Extracts audio from video using FFmpeg | |
| - Transcribes audio to text | |
| - Works with MP4, MOV, AVI, MKV, WEBM formats | |
| **Performance notes:** | |
| - 1 min video: ~10-20 seconds | |
| - 5 min video: ~1-2 minutes | |
| - 10 min video: ~2-4 minutes | |
| **Optimized for:** Hugging Face Spaces free tier (CPU only) | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |