Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from vosk import SetLogLevel, Model, KaldiRecognizer | |
| import ray | |
| SetLogLevel(-1) # mutes vosk verbosity | |
| class SpeechToTextVoskActor: | |
| def __init__(self, model='small', audio_bit_rate=16000) -> None: | |
| self.model = model | |
| self.audio_bit_rate = audio_bit_rate | |
| # load vosk model | |
| # get path of current file | |
| current_file_path = os.path.abspath(__file__) | |
| current_directory = os.path.dirname(current_file_path) | |
| _path = os.path.join(current_directory, 'models', 'vosk', self.model) | |
| self.model_voice = Model(_path) | |
| self.vosk = KaldiRecognizer(self.model_voice, self.audio_bit_rate) | |
| self.text_queue = [] | |
| self.finished_queue = [] | |
| def process_speech(self, data: bytearray) -> tuple[str, bool]: | |
| text = '' | |
| speaker_finished = False | |
| if self.vosk.AcceptWaveform(data): | |
| result = self.vosk.Result() | |
| result_json = json.loads(result) | |
| text = result_json['text'] | |
| speaker_finished = True | |
| else: | |
| result = self.vosk.PartialResult() | |
| result_json = json.loads(result) | |
| text = result_json['partial'] | |
| return text, speaker_finished | |
| def add_speech_bytes(self, data: bytearray): | |
| text, speaker_finished = self._process_speech(data) | |
| self.text_queue.append(text) | |
| if speaker_finished: | |
| self.finished_queue.append(speaker_finished) | |
| def _process_speech(self, data: bytearray) -> tuple[str, bool]: | |
| text = '' | |
| speaker_finished = False | |
| if self.vosk.AcceptWaveform(data): | |
| result = self.vosk.Result() | |
| result_json = json.loads(result) | |
| text = result_json['text'] | |
| speaker_finished = True | |
| else: | |
| result = self.vosk.PartialResult() | |
| result_json = json.loads(result) | |
| text = result_json['partial'] | |
| return text, speaker_finished | |
| def get_text(self): | |
| text = '' | |
| speaker_finished = False | |
| while self.text_queue: | |
| result = self.text_queue.pop(0) | |
| text += result | |
| if self.finished_queue: | |
| speaker_finished = self.finished_queue.pop(0) | |
| break | |
| return text, speaker_finished | |
| def get_audio_bit_rate(self): | |
| return self.audio_bit_rate | |