helvekami commited on
Commit
86fab4a
·
1 Parent(s): fbc6758

Updated Gradio App

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import transformers
3
  import librosa
4
  import torch
 
5
 
6
  # Load the Shuka model pipeline.
7
  pipe = transformers.pipeline(
@@ -17,7 +18,7 @@ def process_audio(audio):
17
  """
18
  if audio is None:
19
  return "No audio provided. Please upload or record an audio file."
20
-
21
  try:
22
  # Gradio returns a tuple: (sample_rate, numpy_array)
23
  sample_rate, audio_data = audio
@@ -27,7 +28,11 @@ def process_audio(audio):
27
  if audio_data is None or len(audio_data) == 0:
28
  return "Audio data is empty. Please try again with a valid audio file."
29
 
30
- # Resample to 16000 Hz if necessary
 
 
 
 
31
  if sample_rate != 16000:
32
  try:
33
  audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
@@ -35,7 +40,7 @@ def process_audio(audio):
35
  except Exception as e:
36
  return f"Error during resampling: {e}"
37
 
38
- # Define conversation turns for the model
39
  turns = [
40
  {'role': 'system', 'content': 'Respond naturally and informatively.'},
41
  {'role': 'user', 'content': '<|audio|>'}
@@ -46,7 +51,7 @@ def process_audio(audio):
46
  except Exception as e:
47
  return f"Error during model processing: {e}"
48
 
49
- # Extract generated text
50
  if isinstance(result, list) and len(result) > 0:
51
  response = result[0].get('generated_text', '')
52
  else:
@@ -55,15 +60,14 @@ def process_audio(audio):
55
  return response
56
 
57
  # Create the Gradio interface.
58
- # If you wish to record audio directly, you may need to upgrade Gradio to a version that supports "source" for the Audio component.
59
  iface = gr.Interface(
60
  fn=process_audio,
61
- inputs=gr.Audio(type="numpy"), # using file upload input for audio
62
  outputs="text",
63
  title="Sarvam AI Shuka Voice Demo",
64
  description="Upload an audio file and get a response using Sarvam AI's Shuka model."
65
  )
66
 
67
  if __name__ == "__main__":
68
- # If port 7860 is in use, you can specify another port (here we use 7861)
69
- iface.launch(server_port=7861)
 
2
  import transformers
3
  import librosa
4
  import torch
5
+ import numpy as np
6
 
7
  # Load the Shuka model pipeline.
8
  pipe = transformers.pipeline(
 
18
  """
19
  if audio is None:
20
  return "No audio provided. Please upload or record an audio file."
21
+
22
  try:
23
  # Gradio returns a tuple: (sample_rate, numpy_array)
24
  sample_rate, audio_data = audio
 
28
  if audio_data is None or len(audio_data) == 0:
29
  return "Audio data is empty. Please try again with a valid audio file."
30
 
31
+ # Convert audio data to float if not already floating-point.
32
+ if not np.issubdtype(audio_data.dtype, np.floating):
33
+ audio_data = audio_data.astype(np.float32)
34
+
35
+ # Resample to 16000 Hz if necessary.
36
  if sample_rate != 16000:
37
  try:
38
  audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
 
40
  except Exception as e:
41
  return f"Error during resampling: {e}"
42
 
43
+ # Define conversation turns for the model.
44
  turns = [
45
  {'role': 'system', 'content': 'Respond naturally and informatively.'},
46
  {'role': 'user', 'content': '<|audio|>'}
 
51
  except Exception as e:
52
  return f"Error during model processing: {e}"
53
 
54
+ # Extract the generated text response.
55
  if isinstance(result, list) and len(result) > 0:
56
  response = result[0].get('generated_text', '')
57
  else:
 
60
  return response
61
 
62
  # Create the Gradio interface.
 
63
  iface = gr.Interface(
64
  fn=process_audio,
65
+ inputs=gr.Audio(type="numpy"), # File upload for audio.
66
  outputs="text",
67
  title="Sarvam AI Shuka Voice Demo",
68
  description="Upload an audio file and get a response using Sarvam AI's Shuka model."
69
  )
70
 
71
  if __name__ == "__main__":
72
+ # Set share=True to create a public link, and specify a server port.
73
+ iface.launch(share=True, server_port=7861)