helvekami commited on
Commit
e2f65f6
·
1 Parent(s): 98333ca

Updated Gradio App

Browse files
Files changed (1) hide show
  1. app.py +52 -4
app.py CHANGED
@@ -1,7 +1,55 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import transformers
3
+ import librosa
4
+ import torch
5
 
6
+ # Load the Shuka model pipeline.
7
+ pipe = transformers.pipeline(
8
+ model="sarvamai/shuka_v1",
9
+ trust_remote_code=True,
10
+ device=0 if torch.cuda.is_available() else -1,
11
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
12
+ )
13
 
14
+ def process_audio(audio):
15
+ """
16
+ Processes the input audio and returns a text response generated by the Shuka model.
17
+ """
18
+ if audio is None:
19
+ return "No audio provided."
20
+
21
+ # Gradio returns a tuple (sample_rate, numpy_array)
22
+ sample_rate, audio_data = audio
23
+
24
+ # Resample to 16000 Hz if necessary
25
+ if sample_rate != 16000:
26
+ audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
27
+ sample_rate = 16000
28
+
29
+ # Define conversation turns with a system prompt and a user prompt that signals audio input
30
+ turns = [
31
+ {'role': 'system', 'content': 'Respond naturally and informatively.'},
32
+ {'role': 'user', 'content': '<|audio|>'}
33
+ ]
34
+
35
+ # Run the pipeline with the audio input and conversation context
36
+ result = pipe({'audio': audio_data, 'turns': turns, 'sampling_rate': sample_rate}, max_new_tokens=512)
37
+
38
+ # Extract the generated text response
39
+ if isinstance(result, list) and len(result) > 0:
40
+ response = result[0].get('generated_text', '')
41
+ else:
42
+ response = str(result)
43
+ return response
44
+
45
+ # Create the Gradio interface without the 'source' parameter.
46
+ iface = gr.Interface(
47
+ fn=process_audio,
48
+ inputs=gr.Audio(type="numpy"),
49
+ outputs="text",
50
+ title="Sarvam AI Shuka Voice Demo",
51
+ description="Upload a voice note and get a response using Sarvam AI's Shuka model."
52
+ )
53
+
54
+ if __name__ == "__main__":
55
+ iface.launch()