liuyang commited on
Commit
c59adf8
·
1 Parent(s): 2861a47

add prompt

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -15,6 +15,14 @@ os.environ.update(
15
  MPLCONFIGDIR = f"{CACHE_ROOT}/mpl",
16
  )
17
 
 
 
 
 
 
 
 
 
18
  # make sure the directories exist
19
  for path in os.environ.values():
20
  pathlib.Path(path).mkdir(parents=True, exist_ok=True)
@@ -384,9 +392,11 @@ class WhisperTranscriber:
384
  language_detection_segments=1,
385
  task="translate" if translate else "transcribe",
386
  )
387
-
 
 
388
  # Use batched inference for better performance
389
- segments, transcript_info = whisper.transcribe(
390
  audio_path,
391
  #batch_size=batch_size,
392
  **options
 
15
  MPLCONFIGDIR = f"{CACHE_ROOT}/mpl",
16
  )
17
 
18
+ INITIAL_PROMPT = '''
19
+ Transcribe the audio verbatim in the original language(s). Do NOT translate or summarize.
20
+ Use the standard punctuation of each language to avoid run-on sentences.
21
+ • End every sentence with a sentence-final mark (., ?, ! or the local equivalent such as 。!?).
22
+ • Insert commas/pauses where they naturally occur; prefer more punctuation over less.
23
+ • For CJK languages, use native full-width marks (,、。!?:;「」『』( )); for others, use their customary marks.
24
+ '''
25
+
26
  # make sure the directories exist
27
  for path in os.environ.values():
28
  pathlib.Path(path).mkdir(parents=True, exist_ok=True)
 
392
  language_detection_segments=1,
393
  task="translate" if translate else "transcribe",
394
  )
395
+ transcribe_model = whisper
396
+ if batch_size > 1:
397
+ transcribe_model = batched_whisper
398
  # Use batched inference for better performance
399
+ segments, transcript_info = transcribe_model.transcribe(
400
  audio_path,
401
  #batch_size=batch_size,
402
  **options