Spaces:

garyuzair
/

VideoTranscriber

Running

App Files Files Community

VideoTranscriber / app.py

garyuzair

Update app.py

473689e verified 7 months ago

raw

history blame contribute delete

3.47 kB

	import gradio as gr
	from transformers import pipeline
	import tempfile
	import os
	import time
	import ffmpeg
	import numpy as np

	# Cache the model with CPU optimization
	def load_model():
	return pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-tiny",
	device="cpu" # Force CPU usage
	)

	# Load model at startup
	model = load_model()

	def extract_audio(video_path):
	"""Optimized audio extraction for CPU"""
	audio_path = tempfile.mktemp(suffix=".wav")
	# Fixed ffmpeg command syntax
	(
	ffmpeg
	.input(video_path)
	.output(audio_path, ac=1, ar=16000, acodec='pcm_s16le')
	.overwrite_output()
	.run(quiet=True)
	)
	return audio_path

	def transcribe_video(video_file):
	"""Process video and return transcript"""
	start_time = time.time()

	# Save video to temp file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
	tmp_video.write(video_file)
	video_path = tmp_video.name

	# Get file size
	file_size = os.path.getsize(video_path) / (1024 * 1024) # in MB

	# Extract audio
	audio_path = extract_audio(video_path)

	# Clean up video file
	os.unlink(video_path)

	# Transcribe
	result = model(audio_path)
	transcript = result["text"]

	# Clean up audio file
	os.unlink(audio_path)

	process_time = time.time() - start_time

	return transcript, f"✅ Processed {file_size:.1f}MB video in {process_time:.1f} seconds"

	# Gradio interface
	with gr.Blocks(title="Free Video Transcriber", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🎥 Free Video Transcriber")
	gr.Markdown("Upload any video to transcribe using Whisper Tiny (optimized for CPU)")

	with gr.Row():
	with gr.Column():
	video_input = gr.File(label="Upload Video", file_types=["video"])
	transcribe_btn = gr.Button("Transcribe Video", variant="primary")

	with gr.Column():
	transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=True)
	status_output = gr.Textbox(label="Status", interactive=False)
	download_btn = gr.DownloadButton(label="Download Transcript")

	# Processing function
	def process_video(video_file):
	if video_file is None:
	return "", "Please upload a video file first", None

	# Read file content
	with open(video_file.name, "rb") as f:
	video_bytes = f.read()

	transcript, status = transcribe_video(video_bytes)
	return transcript, status, transcript

	# Set up button actions
	transcribe_btn.click(
	fn=process_video,
	inputs=video_input,
	outputs=[transcript_output, status_output, download_btn]
	)

	# Info section
	with gr.Accordion("ℹ️ About this app", open=False):
	gr.Markdown("""
	How it works:
	- Uses OpenAI's Whisper Tiny model optimized for CPU
	- Extracts audio from video using FFmpeg
	- Transcribes audio to text
	- Works with MP4, MOV, AVI, MKV, WEBM formats

	Performance notes:
	- 1 min video: ~10-20 seconds
	- 5 min video: ~1-2 minutes
	- 10 min video: ~2-4 minutes

	Optimized for: Hugging Face Spaces free tier (CPU only)
	""")

	# Launch the app
	if __name__ == "__main__":
	demo.launch()