File size: 3,470 Bytes
f874e3c
 
 
 
 
 
473689e
f874e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a9205d
f874e3c
 
 
 
 
5a9205d
 
f874e3c
 
473689e
f874e3c
 
 
473689e
 
 
 
 
5a9205d
473689e
f874e3c
 
473689e
 
 
 
f874e3c
 
 
 
 
473689e
f874e3c
473689e
f874e3c
 
 
 
 
 
 
 
 
 
 
473689e
f874e3c
 
 
 
 
 
 
 
473689e
 
5a9205d
f874e3c
473689e
 
 
 
 
5a9205d
f874e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr
from transformers import pipeline
import tempfile
import os
import time
import ffmpeg
import numpy as np

# Cache the model with CPU optimization
def load_model():
    return pipeline(
        "automatic-speech-recognition", 
        model="openai/whisper-tiny",
        device="cpu"  # Force CPU usage
    )

# Load model at startup
model = load_model()

def extract_audio(video_path):
    """Optimized audio extraction for CPU"""
    audio_path = tempfile.mktemp(suffix=".wav")
    # Fixed ffmpeg command syntax
    (
        ffmpeg
        .input(video_path)
        .output(audio_path, ac=1, ar=16000, acodec='pcm_s16le')
        .overwrite_output()
        .run(quiet=True)
    )
    return audio_path

def transcribe_video(video_file):
    """Process video and return transcript"""
    start_time = time.time()
    
    # Save video to temp file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
        tmp_video.write(video_file)
        video_path = tmp_video.name
    
    # Get file size
    file_size = os.path.getsize(video_path) / (1024 * 1024)  # in MB
    
    # Extract audio
    audio_path = extract_audio(video_path)
    
    # Clean up video file
    os.unlink(video_path)
    
    # Transcribe
    result = model(audio_path)
    transcript = result["text"]
    
    # Clean up audio file
    os.unlink(audio_path)
    
    process_time = time.time() - start_time
    
    return transcript, f"✅ Processed {file_size:.1f}MB video in {process_time:.1f} seconds"

# Gradio interface
with gr.Blocks(title="Free Video Transcriber", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎥 Free Video Transcriber")
    gr.Markdown("Upload any video to transcribe using Whisper Tiny (optimized for CPU)")
    
    with gr.Row():
        with gr.Column():
            video_input = gr.File(label="Upload Video", file_types=["video"])
            transcribe_btn = gr.Button("Transcribe Video", variant="primary")
            
        with gr.Column():
            transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=True)
            status_output = gr.Textbox(label="Status", interactive=False)
            download_btn = gr.DownloadButton(label="Download Transcript")
    
    # Processing function
    def process_video(video_file):
        if video_file is None:
            return "", "Please upload a video file first", None
        
        # Read file content
        with open(video_file.name, "rb") as f:
            video_bytes = f.read()
        
        transcript, status = transcribe_video(video_bytes)
        return transcript, status, transcript
    
    # Set up button actions
    transcribe_btn.click(
        fn=process_video,
        inputs=video_input,
        outputs=[transcript_output, status_output, download_btn]
    )
    
    # Info section
    with gr.Accordion("ℹ️ About this app", open=False):
        gr.Markdown("""
        **How it works:**
        - Uses OpenAI's Whisper Tiny model optimized for CPU
        - Extracts audio from video using FFmpeg
        - Transcribes audio to text
        - Works with MP4, MOV, AVI, MKV, WEBM formats
        
        **Performance notes:**
        - 1 min video: ~10-20 seconds
        - 5 min video: ~1-2 minutes
        - 10 min video: ~2-4 minutes
        
        **Optimized for:** Hugging Face Spaces free tier (CPU only)
        """)

# Launch the app
if __name__ == "__main__":
    demo.launch()