helvekami commited on
Commit
f80cfc8
·
1 Parent(s): 0a3e7f6

Updated Gradio App

Browse files
Files changed (2) hide show
  1. app.py +11 -18
  2. requirements.txt +7 -0
app.py CHANGED
@@ -15,33 +15,26 @@ def transcribe_and_respond(audio_file):
15
  torch_dtype=torch.bfloat16
16
  )
17
 
18
- # Load the audio file at 16kHz
19
  audio, sr = librosa.load(audio_file, sr=16000)
20
-
21
- # Ensure audio is a contiguous floating-point numpy array
22
- audio = np.ascontiguousarray(audio, dtype=np.float32)
23
-
24
- # If audio has more than one channel, convert to mono by averaging the channels
25
- if audio.ndim > 1:
26
- audio = np.mean(audio, axis=-1)
27
-
28
- # Debug: Print audio properties
29
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
30
-
31
  turns = [
32
- {'role': 'system', 'content': 'Please transcribe the following audio exactly.'},
33
  {'role': 'user', 'content': '<|audio|>'}
34
  ]
35
-
36
  # Debug: Print the initial turns
37
  print(f"Initial turns: {turns}")
38
-
39
  # Call the model with the audio and prompt
40
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
41
-
42
  # Debug: Print the final output from the model
43
  print(f"Model output: {output}")
44
-
45
  return output
46
 
47
  except Exception as e:
@@ -52,9 +45,9 @@ iface = gr.Interface(
52
  inputs=gr.Audio(sources="microphone", type="filepath"),
53
  outputs="text",
54
  title="Live Transcription and Response",
55
- description="Speak into your microphone, and the model will transcribe your speech.",
56
  live=True
57
  )
58
 
59
  if __name__ == "__main__":
60
- iface.launch()
 
15
  torch_dtype=torch.bfloat16
16
  )
17
 
18
+ # Load the audio file
19
  audio, sr = librosa.load(audio_file, sr=16000)
20
+
21
+ # Print audio properties for debugging
 
 
 
 
 
 
 
22
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
23
+
24
  turns = [
25
+ {'role': 'system', 'content': 'Repeat the following text exactly, without any changes'},
26
  {'role': 'user', 'content': '<|audio|>'}
27
  ]
28
+
29
  # Debug: Print the initial turns
30
  print(f"Initial turns: {turns}")
31
+
32
  # Call the model with the audio and prompt
33
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
34
+
35
  # Debug: Print the final output from the model
36
  print(f"Model output: {output}")
37
+
38
  return output
39
 
40
  except Exception as e:
 
45
  inputs=gr.Audio(sources="microphone", type="filepath"),
46
  outputs="text",
47
  title="Live Transcription and Response",
48
+ description="Speak into your microphone, and the model will respond naturally and informatively.",
49
  live=True
50
  )
51
 
52
  if __name__ == "__main__":
53
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers==4.41.2
2
+ peft==0.11.1
3
+ librosa==0.10.2
4
+ gradio==3.50.2
5
+ huggingface_hub
6
+ torch
7
+ spaces