HakimHa commited on
Commit
b615dfa
Β·
1 Parent(s): a9fada1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -5,7 +5,7 @@ import soundfile as sf
5
  import torch
6
  import numpy as np
7
 
8
- model_name_or_path = "microsoft/DialoGPT-large"
9
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side="left", use_fast=False)
11
  tokenizer.pad_token = tokenizer.eos_token
@@ -39,11 +39,14 @@ def handle_audio(audio):
39
  # we only need the audio data, hence accessing the second element
40
  audio = audio[1]
41
  input_values = wav2vec2_processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
 
 
42
  logits = wav2vec2_model(input_values).logits
43
  predicted_ids = torch.argmax(logits, dim=-1)
44
  transcriptions = wav2vec2_processor.decode(predicted_ids[0])
45
  return handle_text(transcriptions)
46
 
 
47
  def chatbot(text, img, audio):
48
  text_output = handle_text(text) if text is not None else ''
49
  img_output = handle_image(img) if img is not None else ''
 
5
  import torch
6
  import numpy as np
7
 
8
+ model_name_or_path = "tiiuae/falcon-7b-instruct"
9
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side="left", use_fast=False)
11
  tokenizer.pad_token = tokenizer.eos_token
 
39
  # we only need the audio data, hence accessing the second element
40
  audio = audio[1]
41
  input_values = wav2vec2_processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
42
+ # Convert to the expected tensor type
43
+ input_values = input_values.to(torch.float64)
44
  logits = wav2vec2_model(input_values).logits
45
  predicted_ids = torch.argmax(logits, dim=-1)
46
  transcriptions = wav2vec2_processor.decode(predicted_ids[0])
47
  return handle_text(transcriptions)
48
 
49
+
50
  def chatbot(text, img, audio):
51
  text_output = handle_text(text) if text is not None else ''
52
  img_output = handle_image(img) if img is not None else ''