whisper-webui-translate

Running

App Files Files Community

aadnk commited on Sep 27, 2022

Commit

6a308c6

1 Parent(s): 1a68fc3

Let max line width depend on the language

Browse files

Files changed (2) hide show

app.py +24 -9
utils.py +9 -6

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import Iterator
 from io import StringIO
@@ -15,15 +16,15 @@ from download import downloadUrl
 from utils import slugify, write_srt, write_vtt
-#import os
-#os.system("pip install git+https://github.com/openai/whisper.git")
 # Limitations (set to -1 to disable)
 DEFAULT_INPUT_AUDIO_MAX_DURATION = 600 # seconds
 # Whether or not to automatically delete all uploaded files, to save disk space
 DELETE_UPLOADED_FILES = True
 LANGUAGES = [
  "English", "Chinese", "German", "Spanish", "Russian", "Korean",
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
@@ -74,8 +75,13 @@ class UI:
             result = model.transcribe(source, language=selectedLanguage, task=task)
             text = result["text"]
-            vtt = getSubs(result["segments"], "vtt")
-            srt = getSubs(result["segments"], "srt")
             # Files that can be downloaded
             downloadDirectory = tempfile.mkdtemp()
@@ -95,6 +101,15 @@ class UI:
                 os.remove(source)
 def getSource(urlData, uploadFile, microphoneData):
     if urlData:
         # Download from YouTube
@@ -104,7 +119,7 @@ def getSource(urlData, uploadFile, microphoneData):
         source = uploadFile if uploadFile is not None else microphoneData
     file_path = pathlib.Path(source)
-    sourceName = file_path.stem[:18] + file_path.suffix
     return source, sourceName
@@ -115,13 +130,13 @@ def createFile(text: str, directory: str, fileName: str) -> str:
     return file.name
-def getSubs(segments: Iterator[dict], format: str) -> str:
     segmentStream = StringIO()
     if format == 'vtt':
-        write_vtt(segments, file=segmentStream)
     elif format == 'srt':
-        write_srt(segments, file=segmentStream)
     else:
         raise Exception("Unknown format " + format)

+import re
 from typing import Iterator
 from io import StringIO
 from utils import slugify, write_srt, write_vtt
 # Limitations (set to -1 to disable)
 DEFAULT_INPUT_AUDIO_MAX_DURATION = 600 # seconds
 # Whether or not to automatically delete all uploaded files, to save disk space
 DELETE_UPLOADED_FILES = True
+# Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
+MAX_FILE_PREFIX_LENGTH = 17
 LANGUAGES = [
  "English", "Chinese", "German", "Spanish", "Russian", "Korean",
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
             result = model.transcribe(source, language=selectedLanguage, task=task)
             text = result["text"]
+            language = result["language"]
+            languageMaxLineWidth = getMaxLineWidth(language)
+            print("Max line width " + str(languageMaxLineWidth))
+            vtt = getSubs(result["segments"], "vtt", languageMaxLineWidth)
+            srt = getSubs(result["segments"], "srt", languageMaxLineWidth)
             # Files that can be downloaded
             downloadDirectory = tempfile.mkdtemp()
                 os.remove(source)
+def getMaxLineWidth(language: str) -> int:
+    if (language == "ja" or language == "zh"):
+        # Chinese characters and kana are wider, so limit line length to 40 characters
+        return 40
+    else:
+        # TODO: Add more languages
+        # 80 latin characters should fit on a 1080p/720p screen
+        return 80
 def getSource(urlData, uploadFile, microphoneData):
     if urlData:
         # Download from YouTube
         source = uploadFile if uploadFile is not None else microphoneData
     file_path = pathlib.Path(source)
+    sourceName = file_path.stem[:MAX_FILE_PREFIX_LENGTH] + file_path.suffix
     return source, sourceName
     return file.name
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
     segmentStream = StringIO()
     if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
     elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
     else:
         raise Exception("Unknown format " + format)

utils.py CHANGED Viewed

@@ -53,10 +53,10 @@ def write_txt(transcript: Iterator[dict], file: TextIO):
         print(segment['text'].strip(), file=file, flush=True)
-def write_vtt(transcript: Iterator[dict], file: TextIO):
     print("WEBVTT\n", file=file)
     for segment in transcript:
-        text = processText(segment['text']).replace('-->', '->')
         print(
             f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
@@ -66,7 +66,7 @@ def write_vtt(transcript: Iterator[dict], file: TextIO):
         )
-def write_srt(transcript: Iterator[dict], file: TextIO):
     """
     Write a transcript to a file in SRT format.
     Example usage:
@@ -79,7 +79,7 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
             write_srt(result["segments"], file=srt)
     """
     for i, segment in enumerate(transcript, start=1):
-        text = processText(segment['text'].strip()).replace('-->', '->')
         # write srt lines
         print(
@@ -91,8 +91,11 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
             flush=True,
         )
-def processText(text: str):
-    lines = textwrap.wrap(text, width=47, tabsize=4)
     return '\n'.join(lines)
 def slugify(value, allow_unicode=False):

         print(segment['text'].strip(), file=file, flush=True)
+def write_vtt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
     print("WEBVTT\n", file=file)
     for segment in transcript:
+        text = processText(segment['text'], maxLineWidth).replace('-->', '->')
         print(
             f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
         )
+def write_srt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
     """
     Write a transcript to a file in SRT format.
     Example usage:
             write_srt(result["segments"], file=srt)
     """
     for i, segment in enumerate(transcript, start=1):
+        text = processText(segment['text'].strip(), maxLineWidth).replace('-->', '->')
         # write srt lines
         print(
             flush=True,
         )
+def processText(text: str, maxLineWidth=None):
+    if (maxLineWidth is None or maxLineWidth < 0):
+        return text
+    lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
     return '\n'.join(lines)
 def slugify(value, allow_unicode=False):