Spaces:
Running
on
Zero
Running
on
Zero
liuyang
commited on
Commit
·
c59adf8
1
Parent(s):
2861a47
add prompt
Browse files
app.py
CHANGED
@@ -15,6 +15,14 @@ os.environ.update(
|
|
15 |
MPLCONFIGDIR = f"{CACHE_ROOT}/mpl",
|
16 |
)
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# make sure the directories exist
|
19 |
for path in os.environ.values():
|
20 |
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
|
@@ -384,9 +392,11 @@ class WhisperTranscriber:
|
|
384 |
language_detection_segments=1,
|
385 |
task="translate" if translate else "transcribe",
|
386 |
)
|
387 |
-
|
|
|
|
|
388 |
# Use batched inference for better performance
|
389 |
-
segments, transcript_info =
|
390 |
audio_path,
|
391 |
#batch_size=batch_size,
|
392 |
**options
|
|
|
15 |
MPLCONFIGDIR = f"{CACHE_ROOT}/mpl",
|
16 |
)
|
17 |
|
18 |
+
INITIAL_PROMPT = '''
|
19 |
+
Transcribe the audio verbatim in the original language(s). Do NOT translate or summarize.
|
20 |
+
Use the standard punctuation of each language to avoid run-on sentences.
|
21 |
+
• End every sentence with a sentence-final mark (., ?, ! or the local equivalent such as 。!?).
|
22 |
+
• Insert commas/pauses where they naturally occur; prefer more punctuation over less.
|
23 |
+
• For CJK languages, use native full-width marks (,、。!?:;「」『』( )); for others, use their customary marks.
|
24 |
+
'''
|
25 |
+
|
26 |
# make sure the directories exist
|
27 |
for path in os.environ.values():
|
28 |
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
|
|
|
392 |
language_detection_segments=1,
|
393 |
task="translate" if translate else "transcribe",
|
394 |
)
|
395 |
+
transcribe_model = whisper
|
396 |
+
if batch_size > 1:
|
397 |
+
transcribe_model = batched_whisper
|
398 |
# Use batched inference for better performance
|
399 |
+
segments, transcript_info = transcribe_model.transcribe(
|
400 |
audio_path,
|
401 |
#batch_size=batch_size,
|
402 |
**options
|