File size: 2,524 Bytes
e2f65f6 8c679c2 e2f65f6 8c679c2 86fab4a a9b9492 98333ca 8c679c2 a9b9492 fbc6758 8c679c2 66c2b05 8c679c2 a9b9492 8c679c2 a9b9492 8c679c2 7638418 8c679c2 a9b9492 8c679c2 a9b9492 8c679c2 fbc6758 a9b9492 e2f65f6 a0b460e 57740a6 7638418 57740a6 66c2b05 02b1ff9 57740a6 289e5e4 7638418 e2f65f6 a9b9492 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import transformers
import gradio as gr
import librosa
import torch
import spaces
import numpy as np
import tempfile
@spaces.GPU(duration=60)
def transcribe_and_respond(audio_file, email):
try:
pipe = transformers.pipeline(
model='sarvamai/shuka_v1',
trust_remote_code=True,
device=0,
torch_dtype=torch.bfloat16
)
# Load the audio file at 16kHz
audio, sr = librosa.load(audio_file, sr=16000)
# Convert the audio to a contiguous float32 array
audio = np.ascontiguousarray(audio, dtype=np.float32)
# If audio is multi-channel, convert to mono by averaging channels
if audio.ndim > 1:
audio = np.mean(audio, axis=-1)
# Debug: Print audio properties
print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
# Set up the prompt to get key takeaways
turns = [
{'role': 'system', 'content': 'You are an exact echo assistant. Output the previous text exactly as given, without any modifications.'},
{'role': 'user', 'content': '<|audio|>'}
]
print(f"Initial turns: {turns}")
# Run the model inference (this call is synchronous)
output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=10000)
print(f"Model output: {output}")
# Extract transcript text from the output
transcript = str(output)
if email and email.strip():
transcript = f"Email provided: {email}\n\n{transcript}"
# Write the transcript to a temporary file for download
with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as tmp:
tmp.write(transcript)
transcript_file = tmp.name
# Return transcript text and file download path
return transcript, transcript_file
except Exception as e:
return f"Error: {str(e)}", ""
iface = gr.Interface(
fn=transcribe_and_respond,
inputs=[
gr.Audio(sources=["upload", "microphone"], type="filepath"),
# gr.Textbox(label="Email", placeholder="Enter your email address (optional)")
],
outputs=[
gr.Textbox(label="Transcript"),
gr.File(label="Download Transcript")
],
title="ShukaNotesApp",
description="Upload or record your meeting audio, and download the transcript."
)
if __name__ == "__main__":
iface.launch()
|