Update app.py
Browse files
app.py
CHANGED
|
@@ -40,13 +40,14 @@ def handle_audio(audio):
|
|
| 40 |
audio = audio[1]
|
| 41 |
input_values = wav2vec2_processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
|
| 42 |
# Convert to the expected tensor type
|
| 43 |
-
input_values = input_values.to(torch.
|
| 44 |
logits = wav2vec2_model(input_values).logits
|
| 45 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 46 |
transcriptions = wav2vec2_processor.decode(predicted_ids[0])
|
| 47 |
return handle_text(transcriptions)
|
| 48 |
|
| 49 |
|
|
|
|
| 50 |
def chatbot(text, img, audio):
|
| 51 |
text_output = handle_text(text) if text is not None else ''
|
| 52 |
img_output = handle_image(img) if img is not None else ''
|
|
|
|
| 40 |
audio = audio[1]
|
| 41 |
input_values = wav2vec2_processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
|
| 42 |
# Convert to the expected tensor type
|
| 43 |
+
input_values = input_values.to(torch.float32)
|
| 44 |
logits = wav2vec2_model(input_values).logits
|
| 45 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 46 |
transcriptions = wav2vec2_processor.decode(predicted_ids[0])
|
| 47 |
return handle_text(transcriptions)
|
| 48 |
|
| 49 |
|
| 50 |
+
|
| 51 |
def chatbot(text, img, audio):
|
| 52 |
text_output = handle_text(text) if text is not None else ''
|
| 53 |
img_output = handle_image(img) if img is not None else ''
|