Spaces:

kaiku03
/

gemma3n-2b-it-challenge-demo

Sleeping

App Files Files Community

kaiku03 commited on Aug 2

Commit

a142b41

1 Parent(s): 793575c

update(9)

Browse files

Files changed (1) hide show

app.py +5 -37

app.py CHANGED Viewed

@@ -17,48 +17,15 @@ agent = NewsReporterAgent()
 # --- 2. Define Gradio Logic Handlers ---
 # These functions orchestrate the agent's actions based on UI events.
-def run_initial_generation(audio_data, image_path):
     """Handles the first step: processing inputs and generating the initial report."""
-    temp_audio_path = None
-    TARGET_SR = 16000 # The model's required sample rate
-    if audio_data is not None:
-        # --- Start of New Audio Processing Logic ---
-        # Load audio regardless of source (path or tuple)
-        if isinstance(audio_data, str):
-            waveform, sample_rate = librosa.load(audio_data, sr=None, mono=False)
-        elif isinstance(audio_data, tuple):
-            sample_rate, waveform = audio_data
-            # Ensure waveform is float for librosa
-            waveform = waveform.astype(np.float32)
-        # 1. Convert to Mono if it's stereo
-        if waveform.ndim > 1:
-            waveform = librosa.to_mono(waveform.T)
-        # 2. Resample to the target sample rate (16kHz) if necessary
-        if sample_rate != TARGET_SR:
-            waveform = librosa.resample(y=waveform, orig_sr=sample_rate, target_sr=TARGET_SR)
-        # 3. Normalize and convert to 16-bit integer format for saving
-        waveform_int16 = (waveform * 32767).astype(np.int16)
-        # Save the processed audio to a standardized file
-        os.makedirs("temp_audio", exist_ok=True)
-        temp_audio_path = "temp_audio/processed_audio.wav"
-        wavfile.write(temp_audio_path, rate=TARGET_SR, data=waveform_int16)
-        # --- End of New Audio Processing Logic ---
-    if not temp_audio_path and not image_path:
         return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
-    state = AgentState(audio_path=temp_audio_path,
                        image_path=image_path,
                        news_report=[])
     state.update(agent.transcribe_audio(state))
     state.update(agent.describe_image(state))
     state.update(agent.create_report(state))
@@ -71,6 +38,7 @@ def run_initial_generation(audio_data, image_path):
 def run_revision(feedback, current_state):
     """Handles the revision step based on user feedback."""
     if not feedback or not feedback.strip():

 # --- 2. Define Gradio Logic Handlers ---
 # These functions orchestrate the agent's actions based on UI events.
+def run_initial_generation(audio_path, image_path):
     """Handles the first step: processing inputs and generating the initial report."""
+    if not audio_path and not image_path:
         return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
+    state = AgentState(audio_path=audio_path,
                        image_path=image_path,
                        news_report=[])
     state.update(agent.transcribe_audio(state))
     state.update(agent.describe_image(state))
     state.update(agent.create_report(state))
 def run_revision(feedback, current_state):
     """Handles the revision step based on user feedback."""
     if not feedback or not feedback.strip():