|
|
|
|
|
|
|
|
|
|
|
|
|
import httpcore |
|
|
|
import pathlib |
|
import sys, os |
|
from gtts import gTTS |
|
import gradio as gr |
|
import os |
|
import speech_recognition as sr |
|
from googletrans import Translator, constants |
|
from pprint import pprint |
|
from moviepy.editor import * |
|
from pytube import YouTube |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
from utils import * |
|
import json |
|
import re |
|
from pytube import YouTube |
|
from yt_dlp import YoutubeDL |
|
from yt_dlp import YoutubeDL |
|
import os |
|
|
|
import yt_dlp |
|
|
|
def download_video(url): |
|
""" |
|
Downloads a video from YouTube using yt-dlp with browser authentication. |
|
""" |
|
print("Starting download...") |
|
|
|
ydl_opts = { |
|
'format': 'bestvideo+bestaudio/best', |
|
'merge_output_format': 'mp4', |
|
'outtmpl': '%(title)s.%(ext)s', |
|
'quiet': False, |
|
'cookies': 'youtube_cookies.txt', |
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', |
|
} |
|
|
|
try: |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info = ydl.extract_info(url, download=True) |
|
local_file = ydl.prepare_filename(info) |
|
print(f"✅ Downloaded: {local_file}") |
|
return local_file |
|
except Exception as e: |
|
print(f"❌ Download failed: {str(e)}") |
|
return None |
|
|
|
|
|
url = "https://www.youtube.com/watch?v=uLVRZE8OAI4" |
|
download_video(url) |
|
|
|
|
|
|
|
|
|
|
|
def validate_youtube(url): |
|
""" |
|
Validates a YouTube URL, checks if the video exists, and returns whether its length exceeds 10 minutes. |
|
Uses yt-dlp for more robust URL handling. |
|
|
|
:param url: str - YouTube video URL |
|
:return: bool - True if the URL is invalid or video is longer than 10 minutes, otherwise False |
|
""" |
|
try: |
|
with YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl: |
|
info = ydl.extract_info(url, download=False) |
|
video_length = info.get('duration') |
|
|
|
if video_length is None: |
|
print("Could not determine video length.") |
|
return True |
|
|
|
if video_length > 600: |
|
print("Your video is longer than 10 minutes.") |
|
return True |
|
else: |
|
print("Your video is 10 minutes or shorter.") |
|
return False |
|
|
|
except Exception as e: |
|
print(f"Error: The provided URL is invalid or not accessible. ({e})") |
|
return True |
|
|
|
def validate_url(url): |
|
import validators |
|
if not validators.url(url): |
|
print("Hi there URL seems invalid ") |
|
return True |
|
else: |
|
return False |
|
def cleanup(): |
|
import pathlib |
|
import glob |
|
types = ('*.mp4', '*.wav') |
|
|
|
junks = [] |
|
for files in types: |
|
junks.extend(glob.glob(files)) |
|
try: |
|
|
|
for junk in junks: |
|
print("Deleting",junk) |
|
|
|
file = pathlib.Path(junk) |
|
|
|
file.unlink() |
|
except Exception: |
|
print("I cannot delete the file because it is being used by another process") |
|
|
|
def getSize(filename): |
|
st = os.stat(filename) |
|
return st.st_size |
|
|
|
|
|
def clean_transcript(transcript_list): |
|
script = "" |
|
for text in transcript_list: |
|
t = text["text"] |
|
if( (t != '[music]') and \ |
|
(t != '[Music]') and \ |
|
(t != '[музыка]') and \ |
|
(t != '[Музыка]') and \ |
|
(t != '[musik]') and \ |
|
(t != '[Musik]') and \ |
|
(t != '[musica]') and \ |
|
(t != '[Musica]') and \ |
|
(t != '[música]') and \ |
|
(t != '[Música]') and \ |
|
(t != '[音楽]') and \ |
|
(t != '[音乐]') |
|
) : |
|
script += t + " " |
|
return script |
|
|
|
|
|
def get_transcript(url,desired_language): |
|
id_you= url[url.index("=")+1:] |
|
try: |
|
|
|
transcript_list = YouTubeTranscriptApi.list_transcripts(id_you) |
|
|
|
except Exception: |
|
print('TranscriptsDisabled:') |
|
is_translated = False |
|
return " ", " ", is_translated |
|
|
|
lista=[] |
|
transcript_translation_languages=[] |
|
|
|
for transcript in transcript_list: |
|
lista.extend([ |
|
transcript.language_code, |
|
transcript.is_generated, |
|
transcript.is_translatable, |
|
transcript_translation_languages.append(transcript.translation_languages), |
|
]) |
|
print(lista) |
|
n_size=int(len(lista)/4) |
|
print("There are {} avialable scripts".format(n_size)) |
|
import numpy as np |
|
matrix = np.array(lista) |
|
shape = (n_size,4) |
|
matrix=matrix.reshape(shape) |
|
matrix=matrix.tolist() |
|
is_manually=False |
|
is_automatic=False |
|
for lista in matrix: |
|
|
|
language_code=lista[0] |
|
is_generated=lista[1] |
|
is_translatable=lista[2] |
|
if not is_generated and is_translatable : |
|
print("Script found manually generated") |
|
is_manually=True |
|
language_code_man=language_code |
|
if is_generated and is_translatable : |
|
print("Script found automatic generated") |
|
is_automatic=True |
|
language_code_au=language_code |
|
|
|
if is_manually: |
|
|
|
print('We extract manually created transcripts') |
|
transcript = transcript_list.find_manually_created_transcript([language_code]) |
|
|
|
elif is_automatic: |
|
print('We extract generated transcript') |
|
|
|
transcript = transcript_list.find_generated_transcript([language_code]) |
|
else: |
|
print('We try find the transcript') |
|
|
|
transcript = transcript_list.find_transcript([language_code]) |
|
|
|
is_translated = False |
|
if is_translatable : |
|
for available_trad in transcript_translation_languages[0]: |
|
if available_trad['language_code']==desired_language: |
|
print("It was found the translation for lang:",desired_language) |
|
print('We translate directly the transcript') |
|
transcript_translated = transcript.translate(desired_language) |
|
transcript_translated=transcript_translated.fetch() |
|
translated=clean_transcript(transcript_translated) |
|
is_translated = True |
|
script_translated = "" |
|
if is_translated : |
|
script_translated = translated |
|
|
|
transcript=transcript.fetch() |
|
script = clean_transcript(transcript) |
|
|
|
return script, script_translated, is_translated |
|
|
|
|
|
home_dir = os.getcwd() |
|
temp_dir=os.path.join(home_dir, "temp") |
|
|
|
pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) |
|
os.environ['home_dir'] = home_dir |
|
os.environ['temp_dir'] = temp_dir |
|
|
|
def video_to_translate(url,initial_language,final_language): |
|
print('Checking the url') |
|
check =validate_youtube(url) |
|
if check is True: return "./demo/tryagain2.mp4" |
|
|
|
|
|
if initial_language == "English": |
|
lang_in='en-US' |
|
lang_api='en' |
|
elif initial_language == "Italian": |
|
lang_in='it-IT' |
|
lang_api='it' |
|
elif initial_language == "Chinese": |
|
lang_in='zh-CN' |
|
lang_api='zh' |
|
elif initial_language == "Spanish": |
|
lang_in='es-MX' |
|
lang_api='es' |
|
elif initial_language == "Russian": |
|
lang_in='ru-RU' |
|
lang_api='rus' |
|
elif initial_language == "German": |
|
lang_in='de-DE' |
|
lang_api='de' |
|
elif initial_language == "Japanese": |
|
lang_in='ja-JP' |
|
lang_api='ja' |
|
if final_language == "English": |
|
lang='en' |
|
elif final_language == "Italian": |
|
lang='it' |
|
elif final_language == "Spanish": |
|
lang='es' |
|
elif final_language == "Russian": |
|
lang='ru' |
|
elif final_language == "German": |
|
lang='de' |
|
elif final_language == "Vietnamese": |
|
lang='vi' |
|
elif final_language == "Japanese": |
|
lang='ja' |
|
|
|
home_dir= os.getenv('home_dir') |
|
print('Initial directory:',home_dir) |
|
|
|
cleanup() |
|
file_obj=download_video(url) |
|
print(file_obj) |
|
|
|
videoclip = VideoFileClip(file_obj) |
|
is_traduc=False |
|
|
|
|
|
text, trans, is_traduc = get_transcript(url,desired_language=lang) |
|
print("Transcript Found") |
|
|
|
if not is_traduc: |
|
print("No Transcript Found") |
|
|
|
|
|
videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le') |
|
|
|
r = sr.Recognizer() |
|
|
|
with sr.AudioFile("audio.wav") as source: |
|
|
|
audio_data = r.record(source) |
|
|
|
print("Recognize from ",lang_in) |
|
|
|
size_wav=getSize("audio.wav") |
|
if size_wav > 50000000: |
|
print("The wav is too large") |
|
audio_chunks=split_audio_wav("audio.wav") |
|
text="" |
|
for chunk in audio_chunks: |
|
print("Converting audio to text",chunk) |
|
try: |
|
text_chunk= r.recognize_google(audio_data, language = lang_in) |
|
except Exception: |
|
print("This video cannot be recognized") |
|
cleanup() |
|
return "./demo/tryagain.mp4" |
|
text=text+text_chunk+" " |
|
text=str(text) |
|
print(type(text)) |
|
|
|
else: |
|
try: |
|
text = r.recognize_google(audio_data, language = lang_in) |
|
except Exception: |
|
print("This video cannot be recognized") |
|
cleanup() |
|
return "./demo/tryagain.mp4" |
|
|
|
|
|
print("Destination language ",lang) |
|
|
|
|
|
translator = Translator() |
|
|
|
|
|
try: |
|
translation = translator.translate(text, dest=lang) |
|
except Exception: |
|
print("This text cannot be translated") |
|
cleanup() |
|
return "./demo/tryagain.mp4" |
|
|
|
|
|
trans=translation.text |
|
|
|
myobj = gTTS(text=trans, lang=lang, slow=False) |
|
myobj.save("audio.wav") |
|
|
|
audioclip = AudioFileClip("audio.wav") |
|
|
|
|
|
new_audioclip = CompositeAudioClip([audioclip]) |
|
videoclip.audio = new_audioclip |
|
new_video="video_translated_"+lang+".mp4" |
|
|
|
|
|
os.chdir(home_dir) |
|
print('Final directory',os.getcwd()) |
|
|
|
videoclip.write_videofile(new_video) |
|
|
|
videoclip.close() |
|
del file_obj |
|
|
|
return new_video |
|
|
|
initial_language = gr.Dropdown(choices=["English", "Italian", "Japanese", "Russian", "Spanish", "German"], label="Initial Language") |
|
final_language = gr.Dropdown(choices=["Russian", "Italian", "Spanish", "German", "English", "Japanese"], label="Final Language") |
|
url = gr.Textbox(label="Enter the YouTube URL below:") |
|
gr.Interface( |
|
fn=video_to_translate, |
|
inputs=[url, initial_language, final_language], |
|
outputs="video", |
|
title="Video YouTube Translator", |
|
description="A simple application that translates YouTube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English, and Japanese. Wait one minute to process.", |
|
article="""<div> |
|
<p style="text-align: center"> All you need to do is to paste the YouTube link and hit submit, then wait for compiling. After that, click on Play/Pause to listen to the video. The video is saved in an MP4 format. |
|
The length video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>. |
|
</p> |
|
</div>""", |
|
examples=[ |
|
["https://youtu.be/uLVRZE8OAI4?si=LA08t9hUJHLYg8K_", "English", "Spanish"], |
|
|
|
], |
|
).launch() |
|
|