Spaces:

marquesafonso
/

multilang-asr-captioner

Running

App Files Files Community

marquesafonso commited on Nov 12, 2024

Commit

6d792ca

1 Parent(s): c8a9e77

add model_version arg. bump gradio_client dep. improve html and dockerfile.

Browse files

Files changed (7) hide show

Dockerfile +3 -3
main.py +3 -2
requirements.txt +0 -0
static/landing_page.html +2 -2
static/submit_video.html +5 -0
utils/process_video.py +2 -1
utils/transcriber.py +2 -1

Dockerfile CHANGED Viewed

@@ -1,9 +1,7 @@
 # Use an official Python runtime as a parent image
 FROM python:3.11.7-slim-bullseye
-RUN useradd -m -u 1000 user
-USER user
-ENV PATH="/home/user/.local/bin:$PATH"
 # Set the working directory in the container to /app
 WORKDIR /app
@@ -21,3 +19,5 @@ EXPOSE 8000
 # Run main.py when the container launches
 CMD ["python", "main.py"]

 # Use an official Python runtime as a parent image
 FROM python:3.11.7-slim-bullseye
+USER root
 # Set the working directory in the container to /app
 WORKDIR /app
 # Run main.py when the container launches
 CMD ["python", "main.py"]
+USER 1001

main.py CHANGED Viewed

@@ -75,6 +75,7 @@ async def get_temp_dir():
 async def process_video_api(video_file: MP4Video = Depends(),
                             srt_file: SRTFile = Depends(),
                             task: Optional[str] = Form("transcribe"),
                             max_words_per_line: Optional[int] = Form(6),
                             fontsize: Optional[int] = Form(42),
                             font: Optional[str] = Form("FuturaPTHeavy"),
@@ -99,14 +100,14 @@ async def process_video_api(video_file: MP4Video = Depends(),
                     finally:
                         srt_file.file.close()
                 logging.info("Processing the video...")
-                output_path, _ = process_video(temp_file.name, temp_srt_file.name, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
                 return Response(content = zip_file)
             with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
                 logging.info("Processing the video...")
-                output_path, srt_path = process_video(temp_file.name, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode, api_configs_file)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])

 async def process_video_api(video_file: MP4Video = Depends(),
                             srt_file: SRTFile = Depends(),
                             task: Optional[str] = Form("transcribe"),
+                            model_version: Optional[str] = Form("deepdml/faster-whisper-large-v3-turbo-ct2"),
                             max_words_per_line: Optional[int] = Form(6),
                             fontsize: Optional[int] = Form(42),
                             font: Optional[str] = Form("FuturaPTHeavy"),
                     finally:
                         srt_file.file.close()
                 logging.info("Processing the video...")
+                output_path, _ = process_video(temp_file.name, temp_srt_file.name, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
                 return Response(content = zip_file)
             with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
                 logging.info("Processing the video...")
+                output_path, srt_path = process_video(temp_file.name, None, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode, api_configs_file)
                 logging.info("Zipping response...")
                 with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
                     zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

static/landing_page.html CHANGED Viewed

@@ -142,8 +142,8 @@
     <body>
         <div class="container">
             <h1>Multilang-ASR-Captioner</h1>
-            <p>A multilingual automatic speech recognition and video captioning tool using faster whisper.</p>
-            <p>Supports real-time translation to english. Runs on consumer grade cpu.</p>
             <a href="/submit_video" class="button submit">Submit Video</a>
             <a href="/docs" class="button docs">Documentation</a>
         </div>

     <body>
         <div class="container">
             <h1>Multilang-ASR-Captioner</h1>
+            <p>A multilingual automatic speech recognition and video captioning tool using faster whisper.<br>
+            Supports real-time translation to english. Runs on consumer grade cpu.</p>
             <a href="/submit_video" class="button submit">Submit Video</a>
             <a href="/docs" class="button docs">Documentation</a>
         </div>

static/submit_video.html CHANGED Viewed

@@ -164,6 +164,11 @@
                         <option value="transcribe">Transcribe</option>
                         <option value="translate">Translate</option>
                     </select>
                 </div>
                 <div class="form-group">
                     <h3>Visual Parameters</h3>

                         <option value="transcribe">Transcribe</option>
                         <option value="translate">Translate</option>
                     </select>
+                    <label for="model_version">Model Version</label>
+                    <select id="model_version" name="model_version">
+                        <option value="deepdml/faster-whisper-large-v3-turbo-ct2">faster-whisper-large-v3-turbo</option>
+                        <option value="large-v3">large-v3</option>
+                    </select>
                 </div>
                 <div class="form-group">
                     <h3>Visual Parameters</h3>

utils/process_video.py CHANGED Viewed

@@ -5,6 +5,7 @@ from utils.subtitler import subtitler
 def process_video(invideo_file: str,
                   srt_file: str | None,
                   task: str,
                   max_words_per_line:int,
                   fontsize:str,
                   font:str,
@@ -21,7 +22,7 @@ def process_video(invideo_file: str,
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     else:
         srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
-        transcriber(invideo_file, srt_file, max_words_per_line, task, config_file)
         logging.info("Subtitling...")
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     return OUTVIDEO_PATH, srt_file

 def process_video(invideo_file: str,
                   srt_file: str | None,
                   task: str,
+                  model_version: str,
                   max_words_per_line:int,
                   fontsize:str,
                   font:str,
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     else:
         srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
+        transcriber(invideo_file, srt_file, max_words_per_line, task, model_version, config_file)
         logging.info("Subtitling...")
         subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
     return OUTVIDEO_PATH, srt_file

utils/transcriber.py CHANGED Viewed

@@ -2,7 +2,7 @@ from gradio_client import Client, handle_file
 from utils.api_configs import api_configs
 def transcriber(invideo_file:str, srt_file:str,
-        max_words_per_line:int, task:str,
         config_file:str):
         HF_TOKEN = api_configs(config_file)["secrets"]["hf-token"]
         HF_SPACE = api_configs(config_file)["secrets"]["hf-space"]
@@ -11,6 +11,7 @@ def transcriber(invideo_file:str, srt_file:str,
                 video_input=handle_file(invideo_file),
                 max_words_per_line=max_words_per_line,
                 task=task,
                 api_name="/predict"
         )
         with open(srt_file, "w", encoding='utf-8') as file:

 from utils.api_configs import api_configs
 def transcriber(invideo_file:str, srt_file:str,
+        max_words_per_line:int, task:str, model_version:str,
         config_file:str):
         HF_TOKEN = api_configs(config_file)["secrets"]["hf-token"]
         HF_SPACE = api_configs(config_file)["secrets"]["hf-space"]
                 video_input=handle_file(invideo_file),
                 max_words_per_line=max_words_per_line,
                 task=task,
+                model_version=model_version,
                 api_name="/predict"
         )
         with open(srt_file, "w", encoding='utf-8') as file: