Spaces:
Sleeping
Sleeping
| import io | |
| from typing import Any, Optional | |
| from openai import NOT_GIVEN, AsyncOpenAI, OpenAI | |
| from pydub import AudioSegment | |
| from ...base import BotBase | |
| class STT(BotBase): | |
| model_id: str = "whisper-1" | |
| endpoint: Optional[str] = None | |
| api_key: str | |
| language: Optional[str] = None | |
| response_format: str = "verbose_json" | |
| class Config: | |
| """Configuration for this pydantic object.""" | |
| extra = "allow" | |
| protected_namespaces = () | |
| def __init__(self, /, **data: Any) -> None: | |
| super().__init__(**data) | |
| self.client = OpenAI(base_url=self.endpoint, api_key=self.api_key) | |
| self.aclient = AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key) | |
| def _as2bytes(self, audio: AudioSegment) -> io.BytesIO: | |
| audio_bytes = io.BytesIO() | |
| audio.export(audio_bytes, format="mp3") | |
| audio_bytes.seek(0) | |
| audio_bytes.name = "buffer.mp3" | |
| return audio_bytes | |
| def infer(self, audio: AudioSegment) -> dict: | |
| audio_bytes = self._as2bytes(audio) | |
| trans = self.client.audio.transcriptions.create( | |
| model=self.model_id, | |
| file=audio_bytes, | |
| response_format=self.response_format, | |
| language=NOT_GIVEN if self.language is None else self.language, | |
| ) | |
| return trans.to_dict() | |
| async def ainfer(self, audio: AudioSegment) -> dict: | |
| audio_bytes = self._as2bytes(audio) | |
| trans = await self.aclient.audio.transcriptions.create( | |
| model=self.model_id, | |
| file=audio_bytes, | |
| response_format=self.response_format, | |
| language=NOT_GIVEN if self.language is None else self.language, | |
| ) | |
| return trans.to_dict() | |