update(9)
Browse files
app.py
CHANGED
@@ -17,48 +17,15 @@ agent = NewsReporterAgent()
|
|
17 |
|
18 |
# --- 2. Define Gradio Logic Handlers ---
|
19 |
# These functions orchestrate the agent's actions based on UI events.
|
20 |
-
def run_initial_generation(
|
21 |
"""Handles the first step: processing inputs and generating the initial report."""
|
22 |
-
|
23 |
-
temp_audio_path = None
|
24 |
-
TARGET_SR = 16000 # The model's required sample rate
|
25 |
-
|
26 |
-
if audio_data is not None:
|
27 |
-
# --- Start of New Audio Processing Logic ---
|
28 |
-
|
29 |
-
# Load audio regardless of source (path or tuple)
|
30 |
-
if isinstance(audio_data, str):
|
31 |
-
waveform, sample_rate = librosa.load(audio_data, sr=None, mono=False)
|
32 |
-
elif isinstance(audio_data, tuple):
|
33 |
-
sample_rate, waveform = audio_data
|
34 |
-
# Ensure waveform is float for librosa
|
35 |
-
waveform = waveform.astype(np.float32)
|
36 |
-
|
37 |
-
# 1. Convert to Mono if it's stereo
|
38 |
-
if waveform.ndim > 1:
|
39 |
-
waveform = librosa.to_mono(waveform.T)
|
40 |
-
|
41 |
-
# 2. Resample to the target sample rate (16kHz) if necessary
|
42 |
-
if sample_rate != TARGET_SR:
|
43 |
-
waveform = librosa.resample(y=waveform, orig_sr=sample_rate, target_sr=TARGET_SR)
|
44 |
-
|
45 |
-
# 3. Normalize and convert to 16-bit integer format for saving
|
46 |
-
waveform_int16 = (waveform * 32767).astype(np.int16)
|
47 |
-
|
48 |
-
# Save the processed audio to a standardized file
|
49 |
-
os.makedirs("temp_audio", exist_ok=True)
|
50 |
-
temp_audio_path = "temp_audio/processed_audio.wav"
|
51 |
-
wavfile.write(temp_audio_path, rate=TARGET_SR, data=waveform_int16)
|
52 |
-
|
53 |
-
# --- End of New Audio Processing Logic ---
|
54 |
-
|
55 |
-
if not temp_audio_path and not image_path:
|
56 |
return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
|
57 |
|
58 |
-
state = AgentState(audio_path=
|
59 |
image_path=image_path,
|
60 |
news_report=[])
|
61 |
-
|
62 |
state.update(agent.transcribe_audio(state))
|
63 |
state.update(agent.describe_image(state))
|
64 |
state.update(agent.create_report(state))
|
@@ -71,6 +38,7 @@ def run_initial_generation(audio_data, image_path):
|
|
71 |
|
72 |
|
73 |
|
|
|
74 |
def run_revision(feedback, current_state):
|
75 |
"""Handles the revision step based on user feedback."""
|
76 |
if not feedback or not feedback.strip():
|
|
|
17 |
|
18 |
# --- 2. Define Gradio Logic Handlers ---
|
19 |
# These functions orchestrate the agent's actions based on UI events.
|
20 |
+
def run_initial_generation(audio_path, image_path):
|
21 |
"""Handles the first step: processing inputs and generating the initial report."""
|
22 |
+
if not audio_path and not image_path:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
|
24 |
|
25 |
+
state = AgentState(audio_path=audio_path,
|
26 |
image_path=image_path,
|
27 |
news_report=[])
|
28 |
+
|
29 |
state.update(agent.transcribe_audio(state))
|
30 |
state.update(agent.describe_image(state))
|
31 |
state.update(agent.create_report(state))
|
|
|
38 |
|
39 |
|
40 |
|
41 |
+
|
42 |
def run_revision(feedback, current_state):
|
43 |
"""Handles the revision step based on user feedback."""
|
44 |
if not feedback or not feedback.strip():
|