marquesafonso commited on
Commit
6d792ca
·
1 Parent(s): c8a9e77

add model_version arg. bump gradio_client dep. improve html and dockerfile.

Browse files
Dockerfile CHANGED
@@ -1,9 +1,7 @@
1
  # Use an official Python runtime as a parent image
2
  FROM python:3.11.7-slim-bullseye
3
 
4
- RUN useradd -m -u 1000 user
5
- USER user
6
- ENV PATH="/home/user/.local/bin:$PATH"
7
 
8
  # Set the working directory in the container to /app
9
  WORKDIR /app
@@ -21,3 +19,5 @@ EXPOSE 8000
21
 
22
  # Run main.py when the container launches
23
  CMD ["python", "main.py"]
 
 
 
1
  # Use an official Python runtime as a parent image
2
  FROM python:3.11.7-slim-bullseye
3
 
4
+ USER root
 
 
5
 
6
  # Set the working directory in the container to /app
7
  WORKDIR /app
 
19
 
20
  # Run main.py when the container launches
21
  CMD ["python", "main.py"]
22
+
23
+ USER 1001
main.py CHANGED
@@ -75,6 +75,7 @@ async def get_temp_dir():
75
  async def process_video_api(video_file: MP4Video = Depends(),
76
  srt_file: SRTFile = Depends(),
77
  task: Optional[str] = Form("transcribe"),
 
78
  max_words_per_line: Optional[int] = Form(6),
79
  fontsize: Optional[int] = Form(42),
80
  font: Optional[str] = Form("FuturaPTHeavy"),
@@ -99,14 +100,14 @@ async def process_video_api(video_file: MP4Video = Depends(),
99
  finally:
100
  srt_file.file.close()
101
  logging.info("Processing the video...")
102
- output_path, _ = process_video(temp_file.name, temp_srt_file.name, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
103
  logging.info("Zipping response...")
104
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
105
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
106
  return Response(content = zip_file)
107
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
108
  logging.info("Processing the video...")
109
- output_path, srt_path = process_video(temp_file.name, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode, api_configs_file)
110
  logging.info("Zipping response...")
111
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
112
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
 
75
  async def process_video_api(video_file: MP4Video = Depends(),
76
  srt_file: SRTFile = Depends(),
77
  task: Optional[str] = Form("transcribe"),
78
+ model_version: Optional[str] = Form("deepdml/faster-whisper-large-v3-turbo-ct2"),
79
  max_words_per_line: Optional[int] = Form(6),
80
  fontsize: Optional[int] = Form(42),
81
  font: Optional[str] = Form("FuturaPTHeavy"),
 
100
  finally:
101
  srt_file.file.close()
102
  logging.info("Processing the video...")
103
+ output_path, _ = process_video(temp_file.name, temp_srt_file.name, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
104
  logging.info("Zipping response...")
105
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
106
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
107
  return Response(content = zip_file)
108
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
109
  logging.info("Processing the video...")
110
+ output_path, srt_path = process_video(temp_file.name, None, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode, api_configs_file)
111
  logging.info("Zipping response...")
112
  with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
113
  zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
static/landing_page.html CHANGED
@@ -142,8 +142,8 @@
142
  <body>
143
  <div class="container">
144
  <h1>Multilang-ASR-Captioner</h1>
145
- <p>A multilingual automatic speech recognition and video captioning tool using faster whisper.</p>
146
- <p>Supports real-time translation to english. Runs on consumer grade cpu.</p>
147
  <a href="/submit_video" class="button submit">Submit Video</a>
148
  <a href="/docs" class="button docs">Documentation</a>
149
  </div>
 
142
  <body>
143
  <div class="container">
144
  <h1>Multilang-ASR-Captioner</h1>
145
+ <p>A multilingual automatic speech recognition and video captioning tool using faster whisper.<br>
146
+ Supports real-time translation to english. Runs on consumer grade cpu.</p>
147
  <a href="/submit_video" class="button submit">Submit Video</a>
148
  <a href="/docs" class="button docs">Documentation</a>
149
  </div>
static/submit_video.html CHANGED
@@ -164,6 +164,11 @@
164
  <option value="transcribe">Transcribe</option>
165
  <option value="translate">Translate</option>
166
  </select>
 
 
 
 
 
167
  </div>
168
  <div class="form-group">
169
  <h3>Visual Parameters</h3>
 
164
  <option value="transcribe">Transcribe</option>
165
  <option value="translate">Translate</option>
166
  </select>
167
+ <label for="model_version">Model Version</label>
168
+ <select id="model_version" name="model_version">
169
+ <option value="deepdml/faster-whisper-large-v3-turbo-ct2">faster-whisper-large-v3-turbo</option>
170
+ <option value="large-v3">large-v3</option>
171
+ </select>
172
  </div>
173
  <div class="form-group">
174
  <h3>Visual Parameters</h3>
utils/process_video.py CHANGED
@@ -5,6 +5,7 @@ from utils.subtitler import subtitler
5
  def process_video(invideo_file: str,
6
  srt_file: str | None,
7
  task: str,
 
8
  max_words_per_line:int,
9
  fontsize:str,
10
  font:str,
@@ -21,7 +22,7 @@ def process_video(invideo_file: str,
21
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
22
  else:
23
  srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
24
- transcriber(invideo_file, srt_file, max_words_per_line, task, config_file)
25
  logging.info("Subtitling...")
26
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
27
  return OUTVIDEO_PATH, srt_file
 
5
  def process_video(invideo_file: str,
6
  srt_file: str | None,
7
  task: str,
8
+ model_version: str,
9
  max_words_per_line:int,
10
  fontsize:str,
11
  font:str,
 
22
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
23
  else:
24
  srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
25
+ transcriber(invideo_file, srt_file, max_words_per_line, task, model_version, config_file)
26
  logging.info("Subtitling...")
27
  subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
28
  return OUTVIDEO_PATH, srt_file
utils/transcriber.py CHANGED
@@ -2,7 +2,7 @@ from gradio_client import Client, handle_file
2
  from utils.api_configs import api_configs
3
 
4
  def transcriber(invideo_file:str, srt_file:str,
5
- max_words_per_line:int, task:str,
6
  config_file:str):
7
  HF_TOKEN = api_configs(config_file)["secrets"]["hf-token"]
8
  HF_SPACE = api_configs(config_file)["secrets"]["hf-space"]
@@ -11,6 +11,7 @@ def transcriber(invideo_file:str, srt_file:str,
11
  video_input=handle_file(invideo_file),
12
  max_words_per_line=max_words_per_line,
13
  task=task,
 
14
  api_name="/predict"
15
  )
16
  with open(srt_file, "w", encoding='utf-8') as file:
 
2
  from utils.api_configs import api_configs
3
 
4
  def transcriber(invideo_file:str, srt_file:str,
5
+ max_words_per_line:int, task:str, model_version:str,
6
  config_file:str):
7
  HF_TOKEN = api_configs(config_file)["secrets"]["hf-token"]
8
  HF_SPACE = api_configs(config_file)["secrets"]["hf-space"]
 
11
  video_input=handle_file(invideo_file),
12
  max_words_per_line=max_words_per_line,
13
  task=task,
14
+ model_version=model_version,
15
  api_name="/predict"
16
  )
17
  with open(srt_file, "w", encoding='utf-8') as file: