Fedir Zadniprovskyi commited on
Commit
48ce933
·
1 Parent(s): e41bc7f

feat: add translation endpoint

Browse files
Files changed (1) hide show
  1. speaches/main.py +36 -1
speaches/main.py CHANGED
@@ -49,6 +49,40 @@ def health() -> Response:
49
  return Response(status_code=200, content="Everything is peachy!")
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # https://platform.openai.com/docs/api-reference/audio/createTranscription
53
  # https://github.com/openai/openai-openapi/blob/master/openapi.yaml#L8915
54
  @app.post("/v1/audio/transcriptions")
@@ -70,6 +104,7 @@ async def transcribe_file(
70
  start = time.perf_counter()
71
  segments, transcription_info = whisper.transcribe(
72
  file.file,
 
73
  language=language,
74
  initial_prompt=prompt,
75
  word_timestamps="words" in timestamp_granularities,
@@ -79,7 +114,7 @@ async def transcribe_file(
79
  segments = list(segments)
80
  end = time.perf_counter()
81
  logger.info(
82
- f"Transcribed {transcription_info.duration}({transcription_info.duration_after_vad}) in {end - start:.2f} seconds"
83
  )
84
  if response_format == ResponseFormat.TEXT:
85
  return utils.segments_text(segments)
 
49
  return Response(status_code=200, content="Everything is peachy!")
50
 
51
 
52
+ @app.post("/v1/audio/translations")
53
+ async def translate_file(
54
+ file: Annotated[UploadFile, Form()],
55
+ model: Annotated[Model, Form()] = config.whisper.model,
56
+ prompt: Annotated[str | None, Form()] = None,
57
+ response_format: Annotated[ResponseFormat, Form()] = ResponseFormat.JSON,
58
+ temperature: Annotated[float, Form()] = 0.0,
59
+ ):
60
+ assert (
61
+ model == config.whisper.model
62
+ ), "Specifying a model that is different from the default is not supported yet."
63
+ start = time.perf_counter()
64
+ segments, transcription_info = whisper.transcribe(
65
+ file.file,
66
+ task="translate",
67
+ initial_prompt=prompt,
68
+ temperature=temperature,
69
+ vad_filter=True,
70
+ )
71
+ segments = list(segments)
72
+ end = time.perf_counter()
73
+ logger.info(
74
+ f"Translated {transcription_info.duration}({transcription_info.duration_after_vad}) seconds of audio in {end - start:.2f} seconds"
75
+ )
76
+ if response_format == ResponseFormat.TEXT:
77
+ return utils.segments_text(segments)
78
+ elif response_format == ResponseFormat.JSON:
79
+ return TranscriptionJsonResponse.from_segments(segments)
80
+ elif response_format == ResponseFormat.VERBOSE_JSON:
81
+ return TranscriptionVerboseJsonResponse.from_segments(
82
+ segments, transcription_info
83
+ )
84
+
85
+
86
  # https://platform.openai.com/docs/api-reference/audio/createTranscription
87
  # https://github.com/openai/openai-openapi/blob/master/openapi.yaml#L8915
88
  @app.post("/v1/audio/transcriptions")
 
104
  start = time.perf_counter()
105
  segments, transcription_info = whisper.transcribe(
106
  file.file,
107
+ task="transcribe",
108
  language=language,
109
  initial_prompt=prompt,
110
  word_timestamps="words" in timestamp_granularities,
 
114
  segments = list(segments)
115
  end = time.perf_counter()
116
  logger.info(
117
+ f"Transcribed {transcription_info.duration}({transcription_info.duration_after_vad}) seconds of audio in {end - start:.2f} seconds"
118
  )
119
  if response_format == ResponseFormat.TEXT:
120
  return utils.segments_text(segments)