import whisper from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM import os import tempfile import subprocess # Load Whisper model model = whisper.load_model("base") def process_video(video_file, language): # Save uploaded video to a temporary file temp_dir = tempfile.gettempdir() video_path = os.path.join(temp_dir, "input_video") # No extension output_video_path = os.path.join(temp_dir, "converted_video.mp4") # Convert to MP4 for compatibility try: # Save the uploaded file print("Saving uploaded video...") with open(video_path, "wb") as f: f.write(video_file.read()) print(f"Video saved to {video_path}") # Convert the video to MP4 using ffmpeg print("Converting video to MP4...") subprocess.run(["ffmpeg", "-i", video_path, "-c:v", "libx264", "-preset", "fast", output_video_path], check=True) print(f"Video converted and saved to {output_video_path}") # Transcribe the video print("Transcribing video to English...") result = model.transcribe(output_video_path, language="en") print("Transcription completed!") # Translation logic segments = [] if language == "English": segments = result["segments"] else: # Define translation models model_map = { "Hindi": "Helsinki-NLP/opus-mt-en-hi", "Spanish": "Helsinki-NLP/opus-mt-en-es", "French": "Helsinki-NLP/opus-mt-en-fr", "German": "Helsinki-NLP/opus-mt-en-de", "Telugu": "facebook/nllb-200-distilled-600M", "Portuguese": "Helsinki-NLP/opus-mt-en-pt", "Russian": "Helsinki-NLP/opus-mt-en-ru", "Chinese": "Helsinki-NLP/opus-mt-en-zh", "Arabic": "Helsinki-NLP/opus-mt-en-ar", "Japanese": "Helsinki-NLP/opus-mt-en-jap" } model_name = model_map.get(language) if not model_name: return f"Unsupported language: {language}" print(f"Loading translation model for {language}: {model_name}") if language == "Telugu": tokenizer = AutoTokenizer.from_pretrained(model_name) translation_model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tgt_lang = "tel_Telu" print(f"Translating to Telugu using NLLB-200 Distilled...") for segment in result["segments"]: inputs = tokenizer(segment["text"], return_tensors="pt", padding=True) translated_tokens = translation_model.generate( **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang) ) translated_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0] segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]}) else: tokenizer = MarianTokenizer.from_pretrained(model_name) translation_model = MarianMTModel.from_pretrained(model_name) print(f"Translating to {language}...") for segment in result["segments"]: inputs = tokenizer(segment["text"], return_tensors="pt", padding=True) translated = translation_model.generate(**inputs) translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) segments.append({"text": translated_text, "start": segment["start"], "end": segment["end"]}) # Create SRT file srt_path = os.path.join(tempfile.gettempdir(), "subtitles.srt") print(f"Creating SRT file at {srt_path}") with open(srt_path, "w", encoding="utf-8") as f: for i, segment in enumerate(segments, 1): start = f"{segment['start']:.3f}".replace(".", ",") end = f"{segment['end']:.3f}".replace(".", ",") text = segment["text"].strip() f.write(f"{i}\n00:00:{start} --> 00:00:{end}\n{text}\n\n") print("SRT file created successfully!") return srt_path except subprocess.CalledProcessError as e: return f"FFmpeg Error: {str(e)}" except Exception as e: return f"Unexpected Error: {str(e)}" finally: # Clean up temporary files print("Cleaning up temporary files...") if os.path.exists(video_path): os.remove(video_path) if os.path.exists(output_video_path): os.remove(output_video_path)