tomaseo2022 commited on
Commit
2ba55d9
·
1 Parent(s): 6a92a82

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +342 -0
app.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf8
2
+ # Youtube Video Translator
3
+ # Developed by Ruslan Magana Vsevolodovna
4
+ # https://ruslanmv.com/
5
+
6
+ # importing all necessary libraries
7
+ import pathlib
8
+ import sys, os
9
+ from gtts import gTTS
10
+ import gradio as gr
11
+ import os
12
+ import speech_recognition as sr
13
+ from googletrans import Translator, constants
14
+ from pprint import pprint
15
+ from moviepy.editor import *
16
+ from pytube import YouTube
17
+ from youtube_transcript_api import YouTubeTranscriptApi
18
+ from utils import *
19
+
20
+ def download_video(url):
21
+ print("Downloading...")
22
+ local_file = (
23
+ YouTube(url)
24
+ .streams.filter(progressive=True, file_extension="mp4")
25
+ .first()
26
+ .download()
27
+ )
28
+ print("Downloaded")
29
+ return local_file
30
+
31
+ def validate_youtube(url):
32
+ #This creates a youtube objet
33
+ try:
34
+ yt = YouTube(url)
35
+ except Exception:
36
+ print("Hi there URL seems invalid")
37
+ return True
38
+ #This will return the length of the video in sec as an int
39
+ video_length = yt.length
40
+ if video_length > 600:
41
+ print("Your video is larger than 10 minutes")
42
+ return True
43
+ else:
44
+ print("Your video is less than 10 minutes")
45
+ return False
46
+
47
+ def validate_url(url):
48
+ import validators
49
+ if not validators.url(url):
50
+ print("Hi there URL seems invalid ")
51
+ return True
52
+ else:
53
+ return False
54
+
55
+
56
+ def cleanup():
57
+ import pathlib
58
+ import glob
59
+ types = ('*.mp4', '*.wav') # the tuple of file types
60
+ #Finding mp4 and wave files
61
+ junks = []
62
+ for files in types:
63
+ junks.extend(glob.glob(files))
64
+ try:
65
+ # Deleting those files
66
+ for junk in junks:
67
+ print("Deleting",junk)
68
+ # Setting the path for the file to delete
69
+ file = pathlib.Path(junk)
70
+ # Calling the unlink method on the path
71
+ file.unlink()
72
+ except Exception:
73
+ print("I cannot delete the file because it is being used by another process")
74
+
75
+ def getSize(filename):
76
+ st = os.stat(filename)
77
+ return st.st_size
78
+
79
+
80
+ def clean_transcript(transcript_list):
81
+ script = ""
82
+ for text in transcript_list:
83
+ t = text["text"]
84
+ if( (t != '[music]') and \
85
+ (t != '[Music]') and \
86
+ (t != '[музыка]') and \
87
+ (t != '[Музыка]') and \
88
+ (t != '[musik]') and \
89
+ (t != '[Musik]') and \
90
+ (t != '[musica]') and \
91
+ (t != '[Musica]') and \
92
+ (t != '[música]') and \
93
+ (t != '[Música]') and \
94
+ (t != '[音楽]') and \
95
+ (t != '[音乐]')
96
+ ) :
97
+ script += t + " "
98
+ return script
99
+
100
+
101
+ def get_transcript(url,desired_language):
102
+ id_you= url[url.index("=")+1:]
103
+ try:
104
+ # retrieve the available transcripts
105
+ transcript_list = YouTubeTranscriptApi.list_transcripts(id_you)
106
+
107
+ except Exception:
108
+ print('TranscriptsDisabled:')
109
+ is_translated = False
110
+ return " ", " ", is_translated
111
+
112
+ lista=[]
113
+ transcript_translation_languages=[]
114
+ # iterate over all available transcripts
115
+ for transcript in transcript_list:
116
+ lista.extend([
117
+ transcript.language_code,
118
+ transcript.is_generated,
119
+ transcript.is_translatable,
120
+ transcript_translation_languages.append(transcript.translation_languages),
121
+ ])
122
+ print(lista)
123
+ n_size=int(len(lista)/4)
124
+ print("There are {} avialable scripts".format(n_size))
125
+ import numpy as np
126
+ matrix = np.array(lista)
127
+ shape = (n_size,4)
128
+ matrix=matrix.reshape(shape)
129
+ matrix=matrix.tolist()
130
+ is_manually=False
131
+ is_automatic=False
132
+ for lista in matrix:
133
+ #print(lista)
134
+ language_code=lista[0]
135
+ is_generated=lista[1]
136
+ is_translatable=lista[2]
137
+ if not is_generated and is_translatable :
138
+ print("Script found manually generated")
139
+ is_manually=True
140
+ language_code_man=language_code
141
+ if is_generated and is_translatable :
142
+ print("Script found automatic generated")
143
+ is_automatic=True
144
+ language_code_au=language_code
145
+
146
+ if is_manually:
147
+ # we try filter for manually created transcripts
148
+ print('We extract manually created transcripts')
149
+ transcript = transcript_list.find_manually_created_transcript([language_code])
150
+
151
+ elif is_automatic:
152
+ print('We extract generated transcript')
153
+ # or automatically generated ones, but not translated
154
+ transcript = transcript_list.find_generated_transcript([language_code])
155
+ else:
156
+ print('We try find the transcript')
157
+ # we directly filter for the language you are looking for, using the transcript list
158
+ transcript = transcript_list.find_transcript([language_code])
159
+
160
+ is_translated = False
161
+ if is_translatable :
162
+ for available_trad in transcript_translation_languages[0]:
163
+ if available_trad['language_code']==desired_language:
164
+ print("It was found the translation for lang:",desired_language)
165
+ print('We translate directly the transcript')
166
+ transcript_translated = transcript.translate(desired_language)
167
+ transcript_translated=transcript_translated.fetch()
168
+ translated=clean_transcript(transcript_translated)
169
+ is_translated = True
170
+ script_translated = ""
171
+ if is_translated :
172
+ script_translated = translated
173
+
174
+ transcript=transcript.fetch()
175
+ script = clean_transcript(transcript)
176
+
177
+ return script, script_translated, is_translated
178
+
179
+ # Set environment variables
180
+ home_dir = os.getcwd()
181
+ temp_dir=os.path.join(home_dir, "temp")
182
+ #Create temp directory
183
+ pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
184
+ os.environ['home_dir'] = home_dir
185
+ os.environ['temp_dir'] = temp_dir
186
+
187
+ def video_to_translate(url,initial_language,final_language):
188
+ print('Checking the url')
189
+ check =validate_youtube(url)
190
+ if check is True: return "./demo/tryagain2.mp4"
191
+
192
+ #Internal definitions
193
+ if initial_language == "English":
194
+ lang_in='en-US'
195
+ lang_api='en'
196
+ elif initial_language == "Italian":
197
+ lang_in='it-IT'
198
+ lang_api='it'
199
+ elif initial_language == "Spanish":
200
+ lang_in='es-MX'
201
+ lang_api='es'
202
+ elif initial_language == "Russian":
203
+ lang_in='ru-RU'
204
+ lang_api='rus'
205
+ elif initial_language == "German":
206
+ lang_in='de-DE'
207
+ lang_api='de'
208
+ elif initial_language == "Japanese":
209
+ lang_in='ja-JP'
210
+ lang_api='ja'
211
+ if final_language == "English":
212
+ lang='en'
213
+ elif final_language == "Italian":
214
+ lang='it'
215
+ elif final_language == "Spanish":
216
+ lang='es'
217
+ elif final_language == "Russian":
218
+ lang='ru'
219
+ elif final_language == "German":
220
+ lang='de'
221
+ elif final_language == "Japanese":
222
+ lang='ja'
223
+ # Initial directory
224
+ home_dir= os.getenv('home_dir')
225
+ print('Initial directory:',home_dir)
226
+ # Cleaning previous files
227
+ cleanup()
228
+ file_obj=download_video(url)
229
+ print(file_obj)
230
+ # Insert Local Video File Path
231
+ videoclip = VideoFileClip(file_obj)
232
+ is_traduc=False
233
+ # Trying to get transcripts
234
+
235
+ text, trans, is_traduc = get_transcript(url,desired_language=lang)
236
+ print("Transcript Found")
237
+
238
+ if not is_traduc:
239
+ print("No Transcript Found")
240
+ # Trying to recognize audio
241
+ # Insert Local Audio File Path
242
+ videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
243
+ # initialize the recognizer
244
+ r = sr.Recognizer()
245
+ # open the file
246
+ with sr.AudioFile("audio.wav") as source:
247
+ # listen for the data (load audio to memory)
248
+ audio_data = r.record(source)
249
+ # recognize (convert from speech to text)
250
+ print("Recognize from ",lang_in)
251
+ #There is a limit of 10 MB on all single requests sent to the API using local file
252
+ size_wav=getSize("audio.wav")
253
+ if size_wav > 50000000:
254
+ print("The wav is too large")
255
+ audio_chunks=split_audio_wav("audio.wav")
256
+ text=""
257
+ for chunk in audio_chunks:
258
+ print("Converting audio to text",chunk)
259
+ try:
260
+ text_chunk= r.recognize_google(audio_data, language = lang_in)
261
+ except Exception:
262
+ print("This video cannot be recognized")
263
+ cleanup()
264
+ return "./demo/tryagain.mp4"
265
+ text=text+text_chunk+" "
266
+ text=str(text)
267
+ print(type(text))
268
+
269
+ else:
270
+ try:
271
+ text = r.recognize_google(audio_data, language = lang_in)
272
+ except Exception:
273
+ print("This video cannot be recognized")
274
+ cleanup()
275
+ return "./demo/tryagain.mp4"
276
+
277
+ #print(text)
278
+ print("Destination language ",lang)
279
+
280
+ # init the Google API translator
281
+ translator = Translator()
282
+
283
+
284
+ try:
285
+ translation = translator.translate(text, dest=lang)
286
+ except Exception:
287
+ print("This text cannot be translated")
288
+ cleanup()
289
+ return "./demo/tryagain.mp4"
290
+
291
+ #translation.text
292
+ trans=translation.text
293
+
294
+ myobj = gTTS(text=trans, lang=lang, slow=False)
295
+ myobj.save("audio.wav")
296
+ # loading audio file
297
+ audioclip = AudioFileClip("audio.wav")
298
+
299
+ # adding audio to the video clip
300
+ new_audioclip = CompositeAudioClip([audioclip])
301
+ videoclip.audio = new_audioclip
302
+ new_video="video_translated_"+lang+".mp4"
303
+
304
+ # Return back to main directory
305
+ os.chdir(home_dir)
306
+ print('Final directory',os.getcwd())
307
+
308
+ videoclip.write_videofile(new_video)
309
+
310
+ videoclip.close()
311
+ del file_obj
312
+
313
+ return new_video
314
+
315
+ initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
316
+ final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
317
+ url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
318
+
319
+
320
+ gr.Interface(fn = video_to_translate,
321
+ inputs = [url,initial_language,final_language],
322
+ outputs = 'video',
323
+ verbose = True,
324
+ title = 'Video Youtube Translator',
325
+ description = 'A simple application that translates Youtube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.',
326
+ article =
327
+ '''<div>
328
+ <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit,, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
329
+ The lenght video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
330
+ </p>
331
+ </div>''',
332
+
333
+ examples = [
334
+ ["https://www.youtube.com/watch?v=uLVRZE8OAI4", "English","Spanish"],
335
+ ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
336
+ ["https://www.youtube.com/watch?v=6Q6hFtitthQ", "Italian","English"],
337
+ ["https://www.youtube.com/watch?v=s5XvjAC7ai8", "Russian","English"],
338
+ ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
339
+ ["https://www.youtube.com/watch?v=nOGZvu6tJFE", "German","Spanish"]
340
+
341
+ ]
342
+ ).launch()