Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
48ce933
1
Parent(s):
e41bc7f
feat: add translation endpoint
Browse files- speaches/main.py +36 -1
speaches/main.py
CHANGED
@@ -49,6 +49,40 @@ def health() -> Response:
|
|
49 |
return Response(status_code=200, content="Everything is peachy!")
|
50 |
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
# https://platform.openai.com/docs/api-reference/audio/createTranscription
|
53 |
# https://github.com/openai/openai-openapi/blob/master/openapi.yaml#L8915
|
54 |
@app.post("/v1/audio/transcriptions")
|
@@ -70,6 +104,7 @@ async def transcribe_file(
|
|
70 |
start = time.perf_counter()
|
71 |
segments, transcription_info = whisper.transcribe(
|
72 |
file.file,
|
|
|
73 |
language=language,
|
74 |
initial_prompt=prompt,
|
75 |
word_timestamps="words" in timestamp_granularities,
|
@@ -79,7 +114,7 @@ async def transcribe_file(
|
|
79 |
segments = list(segments)
|
80 |
end = time.perf_counter()
|
81 |
logger.info(
|
82 |
-
f"Transcribed {transcription_info.duration}({transcription_info.duration_after_vad}) in {end - start:.2f} seconds"
|
83 |
)
|
84 |
if response_format == ResponseFormat.TEXT:
|
85 |
return utils.segments_text(segments)
|
|
|
49 |
return Response(status_code=200, content="Everything is peachy!")
|
50 |
|
51 |
|
52 |
+
@app.post("/v1/audio/translations")
|
53 |
+
async def translate_file(
|
54 |
+
file: Annotated[UploadFile, Form()],
|
55 |
+
model: Annotated[Model, Form()] = config.whisper.model,
|
56 |
+
prompt: Annotated[str | None, Form()] = None,
|
57 |
+
response_format: Annotated[ResponseFormat, Form()] = ResponseFormat.JSON,
|
58 |
+
temperature: Annotated[float, Form()] = 0.0,
|
59 |
+
):
|
60 |
+
assert (
|
61 |
+
model == config.whisper.model
|
62 |
+
), "Specifying a model that is different from the default is not supported yet."
|
63 |
+
start = time.perf_counter()
|
64 |
+
segments, transcription_info = whisper.transcribe(
|
65 |
+
file.file,
|
66 |
+
task="translate",
|
67 |
+
initial_prompt=prompt,
|
68 |
+
temperature=temperature,
|
69 |
+
vad_filter=True,
|
70 |
+
)
|
71 |
+
segments = list(segments)
|
72 |
+
end = time.perf_counter()
|
73 |
+
logger.info(
|
74 |
+
f"Translated {transcription_info.duration}({transcription_info.duration_after_vad}) seconds of audio in {end - start:.2f} seconds"
|
75 |
+
)
|
76 |
+
if response_format == ResponseFormat.TEXT:
|
77 |
+
return utils.segments_text(segments)
|
78 |
+
elif response_format == ResponseFormat.JSON:
|
79 |
+
return TranscriptionJsonResponse.from_segments(segments)
|
80 |
+
elif response_format == ResponseFormat.VERBOSE_JSON:
|
81 |
+
return TranscriptionVerboseJsonResponse.from_segments(
|
82 |
+
segments, transcription_info
|
83 |
+
)
|
84 |
+
|
85 |
+
|
86 |
# https://platform.openai.com/docs/api-reference/audio/createTranscription
|
87 |
# https://github.com/openai/openai-openapi/blob/master/openapi.yaml#L8915
|
88 |
@app.post("/v1/audio/transcriptions")
|
|
|
104 |
start = time.perf_counter()
|
105 |
segments, transcription_info = whisper.transcribe(
|
106 |
file.file,
|
107 |
+
task="transcribe",
|
108 |
language=language,
|
109 |
initial_prompt=prompt,
|
110 |
word_timestamps="words" in timestamp_granularities,
|
|
|
114 |
segments = list(segments)
|
115 |
end = time.perf_counter()
|
116 |
logger.info(
|
117 |
+
f"Transcribed {transcription_info.duration}({transcription_info.duration_after_vad}) seconds of audio in {end - start:.2f} seconds"
|
118 |
)
|
119 |
if response_format == ResponseFormat.TEXT:
|
120 |
return utils.segments_text(segments)
|