Spaces:

helvekami
/

ShukaNote

Running on Zero

App Files Files Community

helvekami commited on 16 days ago

Commit

e2f65f6

1 Parent(s): 98333ca

Updated Gradio App

Browse files

Files changed (1) hide show

app.py +52 -4

app.py CHANGED Viewed

@@ -1,7 +1,55 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import transformers
+import librosa
+import torch
+# Load the Shuka model pipeline.
+pipe = transformers.pipeline(
+    model="sarvamai/shuka_v1",
+    trust_remote_code=True,
+    device=0 if torch.cuda.is_available() else -1,
+    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
+)
+def process_audio(audio):
+    """
+    Processes the input audio and returns a text response generated by the Shuka model.
+    """
+    if audio is None:
+        return "No audio provided."
+    # Gradio returns a tuple (sample_rate, numpy_array)
+    sample_rate, audio_data = audio
+    # Resample to 16000 Hz if necessary
+    if sample_rate != 16000:
+        audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
+        sample_rate = 16000
+    # Define conversation turns with a system prompt and a user prompt that signals audio input
+    turns = [
+        {'role': 'system', 'content': 'Respond naturally and informatively.'},
+        {'role': 'user', 'content': '<|audio|>'}
+    ]
+    # Run the pipeline with the audio input and conversation context
+    result = pipe({'audio': audio_data, 'turns': turns, 'sampling_rate': sample_rate}, max_new_tokens=512)
+    # Extract the generated text response
+    if isinstance(result, list) and len(result) > 0:
+        response = result[0].get('generated_text', '')
+    else:
+        response = str(result)
+    return response
+# Create the Gradio interface without the 'source' parameter.
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="numpy"),
+    outputs="text",
+    title="Sarvam AI Shuka Voice Demo",
+    description="Upload a voice note and get a response using Sarvam AI's Shuka model."
+)
+if __name__ == "__main__":
+    iface.launch()