Spaces:
Runtime error
Runtime error
import torch | |
import torchaudio | |
import torchaudio.functional as AF | |
from transformers import Wav2Vec2ForCTC, AutoProcessor | |
from pydub import AudioSegment | |
from pydub.silence import split_on_silence | |
class Transcribe: | |
def __init__(self, freq: float = 16000.0) -> None: | |
self.freq = freq | |
self.model_id = "facebook/mms-1b-fl102" | |
self.processor = AutoProcessor.from_pretrained(self.model_id) | |
self.model = Wav2Vec2ForCTC.from_pretrained(self.model_id) | |
def __call__(self, audio_tensor: torch.tensor, lang: str = "amh"): | |
print(lang) | |
self.processor.tokenizer.set_target_lang(lang) | |
self.model.load_adapter(lang) | |
outputs = self.model(audio_tensor) | |
logits = outputs.logits | |
ids = torch.argmax(logits, dim=-1)[0] | |
decoded_token = self.processor.decode(ids) | |
return decoded_token | |