Commit
·
6d792ca
1
Parent(s):
c8a9e77
add model_version arg. bump gradio_client dep. improve html and dockerfile.
Browse files- Dockerfile +3 -3
- main.py +3 -2
- requirements.txt +0 -0
- static/landing_page.html +2 -2
- static/submit_video.html +5 -0
- utils/process_video.py +2 -1
- utils/transcriber.py +2 -1
Dockerfile
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
# Use an official Python runtime as a parent image
|
2 |
FROM python:3.11.7-slim-bullseye
|
3 |
|
4 |
-
|
5 |
-
USER user
|
6 |
-
ENV PATH="/home/user/.local/bin:$PATH"
|
7 |
|
8 |
# Set the working directory in the container to /app
|
9 |
WORKDIR /app
|
@@ -21,3 +19,5 @@ EXPOSE 8000
|
|
21 |
|
22 |
# Run main.py when the container launches
|
23 |
CMD ["python", "main.py"]
|
|
|
|
|
|
1 |
# Use an official Python runtime as a parent image
|
2 |
FROM python:3.11.7-slim-bullseye
|
3 |
|
4 |
+
USER root
|
|
|
|
|
5 |
|
6 |
# Set the working directory in the container to /app
|
7 |
WORKDIR /app
|
|
|
19 |
|
20 |
# Run main.py when the container launches
|
21 |
CMD ["python", "main.py"]
|
22 |
+
|
23 |
+
USER 1001
|
main.py
CHANGED
@@ -75,6 +75,7 @@ async def get_temp_dir():
|
|
75 |
async def process_video_api(video_file: MP4Video = Depends(),
|
76 |
srt_file: SRTFile = Depends(),
|
77 |
task: Optional[str] = Form("transcribe"),
|
|
|
78 |
max_words_per_line: Optional[int] = Form(6),
|
79 |
fontsize: Optional[int] = Form(42),
|
80 |
font: Optional[str] = Form("FuturaPTHeavy"),
|
@@ -99,14 +100,14 @@ async def process_video_api(video_file: MP4Video = Depends(),
|
|
99 |
finally:
|
100 |
srt_file.file.close()
|
101 |
logging.info("Processing the video...")
|
102 |
-
output_path, _ = process_video(temp_file.name, temp_srt_file.name, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
|
103 |
logging.info("Zipping response...")
|
104 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
105 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
106 |
return Response(content = zip_file)
|
107 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
|
108 |
logging.info("Processing the video...")
|
109 |
-
output_path, srt_path = process_video(temp_file.name, None, task, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode, api_configs_file)
|
110 |
logging.info("Zipping response...")
|
111 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
112 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
|
|
75 |
async def process_video_api(video_file: MP4Video = Depends(),
|
76 |
srt_file: SRTFile = Depends(),
|
77 |
task: Optional[str] = Form("transcribe"),
|
78 |
+
model_version: Optional[str] = Form("deepdml/faster-whisper-large-v3-turbo-ct2"),
|
79 |
max_words_per_line: Optional[int] = Form(6),
|
80 |
fontsize: Optional[int] = Form(42),
|
81 |
font: Optional[str] = Form("FuturaPTHeavy"),
|
|
|
100 |
finally:
|
101 |
srt_file.file.close()
|
102 |
logging.info("Processing the video...")
|
103 |
+
output_path, _ = process_video(temp_file.name, temp_srt_file.name, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode)
|
104 |
logging.info("Zipping response...")
|
105 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
106 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
107 |
return Response(content = zip_file)
|
108 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.srt"), 'w+b') as temp_srt_file:
|
109 |
logging.info("Processing the video...")
|
110 |
+
output_path, srt_path = process_video(temp_file.name, None, task, model_version, max_words_per_line, fontsize, font, bg_color, text_color, caption_mode, api_configs_file)
|
111 |
logging.info("Zipping response...")
|
112 |
with open(os.path.join(temp_dir, f"{video_file.filename.split('.')[0]}.zip"), 'w+b') as temp_zip_file:
|
113 |
zip_file = zip_response(temp_zip_file.name, [output_path, srt_path])
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
static/landing_page.html
CHANGED
@@ -142,8 +142,8 @@
|
|
142 |
<body>
|
143 |
<div class="container">
|
144 |
<h1>Multilang-ASR-Captioner</h1>
|
145 |
-
<p>A multilingual automatic speech recognition and video captioning tool using faster whisper
|
146 |
-
|
147 |
<a href="/submit_video" class="button submit">Submit Video</a>
|
148 |
<a href="/docs" class="button docs">Documentation</a>
|
149 |
</div>
|
|
|
142 |
<body>
|
143 |
<div class="container">
|
144 |
<h1>Multilang-ASR-Captioner</h1>
|
145 |
+
<p>A multilingual automatic speech recognition and video captioning tool using faster whisper.<br>
|
146 |
+
Supports real-time translation to english. Runs on consumer grade cpu.</p>
|
147 |
<a href="/submit_video" class="button submit">Submit Video</a>
|
148 |
<a href="/docs" class="button docs">Documentation</a>
|
149 |
</div>
|
static/submit_video.html
CHANGED
@@ -164,6 +164,11 @@
|
|
164 |
<option value="transcribe">Transcribe</option>
|
165 |
<option value="translate">Translate</option>
|
166 |
</select>
|
|
|
|
|
|
|
|
|
|
|
167 |
</div>
|
168 |
<div class="form-group">
|
169 |
<h3>Visual Parameters</h3>
|
|
|
164 |
<option value="transcribe">Transcribe</option>
|
165 |
<option value="translate">Translate</option>
|
166 |
</select>
|
167 |
+
<label for="model_version">Model Version</label>
|
168 |
+
<select id="model_version" name="model_version">
|
169 |
+
<option value="deepdml/faster-whisper-large-v3-turbo-ct2">faster-whisper-large-v3-turbo</option>
|
170 |
+
<option value="large-v3">large-v3</option>
|
171 |
+
</select>
|
172 |
</div>
|
173 |
<div class="form-group">
|
174 |
<h3>Visual Parameters</h3>
|
utils/process_video.py
CHANGED
@@ -5,6 +5,7 @@ from utils.subtitler import subtitler
|
|
5 |
def process_video(invideo_file: str,
|
6 |
srt_file: str | None,
|
7 |
task: str,
|
|
|
8 |
max_words_per_line:int,
|
9 |
fontsize:str,
|
10 |
font:str,
|
@@ -21,7 +22,7 @@ def process_video(invideo_file: str,
|
|
21 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
22 |
else:
|
23 |
srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
|
24 |
-
transcriber(invideo_file, srt_file, max_words_per_line, task, config_file)
|
25 |
logging.info("Subtitling...")
|
26 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
27 |
return OUTVIDEO_PATH, srt_file
|
|
|
5 |
def process_video(invideo_file: str,
|
6 |
srt_file: str | None,
|
7 |
task: str,
|
8 |
+
model_version: str,
|
9 |
max_words_per_line:int,
|
10 |
fontsize:str,
|
11 |
font:str,
|
|
|
22 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
23 |
else:
|
24 |
srt_file = os.path.normpath(f"{invideo_file.split('.')[0]}.srt")
|
25 |
+
transcriber(invideo_file, srt_file, max_words_per_line, task, model_version, config_file)
|
26 |
logging.info("Subtitling...")
|
27 |
subtitler(invideo_file, srt_file, OUTVIDEO_PATH, fontsize, font, bg_color, text_color, caption_mode)
|
28 |
return OUTVIDEO_PATH, srt_file
|
utils/transcriber.py
CHANGED
@@ -2,7 +2,7 @@ from gradio_client import Client, handle_file
|
|
2 |
from utils.api_configs import api_configs
|
3 |
|
4 |
def transcriber(invideo_file:str, srt_file:str,
|
5 |
-
max_words_per_line:int, task:str,
|
6 |
config_file:str):
|
7 |
HF_TOKEN = api_configs(config_file)["secrets"]["hf-token"]
|
8 |
HF_SPACE = api_configs(config_file)["secrets"]["hf-space"]
|
@@ -11,6 +11,7 @@ def transcriber(invideo_file:str, srt_file:str,
|
|
11 |
video_input=handle_file(invideo_file),
|
12 |
max_words_per_line=max_words_per_line,
|
13 |
task=task,
|
|
|
14 |
api_name="/predict"
|
15 |
)
|
16 |
with open(srt_file, "w", encoding='utf-8') as file:
|
|
|
2 |
from utils.api_configs import api_configs
|
3 |
|
4 |
def transcriber(invideo_file:str, srt_file:str,
|
5 |
+
max_words_per_line:int, task:str, model_version:str,
|
6 |
config_file:str):
|
7 |
HF_TOKEN = api_configs(config_file)["secrets"]["hf-token"]
|
8 |
HF_SPACE = api_configs(config_file)["secrets"]["hf-space"]
|
|
|
11 |
video_input=handle_file(invideo_file),
|
12 |
max_words_per_line=max_words_per_line,
|
13 |
task=task,
|
14 |
+
model_version=model_version,
|
15 |
api_name="/predict"
|
16 |
)
|
17 |
with open(srt_file, "w", encoding='utf-8') as file:
|