kaiku03 commited on
Commit
a142b41
·
1 Parent(s): 793575c
Files changed (1) hide show
  1. app.py +5 -37
app.py CHANGED
@@ -17,48 +17,15 @@ agent = NewsReporterAgent()
17
 
18
  # --- 2. Define Gradio Logic Handlers ---
19
  # These functions orchestrate the agent's actions based on UI events.
20
- def run_initial_generation(audio_data, image_path):
21
  """Handles the first step: processing inputs and generating the initial report."""
22
-
23
- temp_audio_path = None
24
- TARGET_SR = 16000 # The model's required sample rate
25
-
26
- if audio_data is not None:
27
- # --- Start of New Audio Processing Logic ---
28
-
29
- # Load audio regardless of source (path or tuple)
30
- if isinstance(audio_data, str):
31
- waveform, sample_rate = librosa.load(audio_data, sr=None, mono=False)
32
- elif isinstance(audio_data, tuple):
33
- sample_rate, waveform = audio_data
34
- # Ensure waveform is float for librosa
35
- waveform = waveform.astype(np.float32)
36
-
37
- # 1. Convert to Mono if it's stereo
38
- if waveform.ndim > 1:
39
- waveform = librosa.to_mono(waveform.T)
40
-
41
- # 2. Resample to the target sample rate (16kHz) if necessary
42
- if sample_rate != TARGET_SR:
43
- waveform = librosa.resample(y=waveform, orig_sr=sample_rate, target_sr=TARGET_SR)
44
-
45
- # 3. Normalize and convert to 16-bit integer format for saving
46
- waveform_int16 = (waveform * 32767).astype(np.int16)
47
-
48
- # Save the processed audio to a standardized file
49
- os.makedirs("temp_audio", exist_ok=True)
50
- temp_audio_path = "temp_audio/processed_audio.wav"
51
- wavfile.write(temp_audio_path, rate=TARGET_SR, data=waveform_int16)
52
-
53
- # --- End of New Audio Processing Logic ---
54
-
55
- if not temp_audio_path and not image_path:
56
  return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
57
 
58
- state = AgentState(audio_path=temp_audio_path,
59
  image_path=image_path,
60
  news_report=[])
61
-
62
  state.update(agent.transcribe_audio(state))
63
  state.update(agent.describe_image(state))
64
  state.update(agent.create_report(state))
@@ -71,6 +38,7 @@ def run_initial_generation(audio_data, image_path):
71
 
72
 
73
 
 
74
  def run_revision(feedback, current_state):
75
  """Handles the revision step based on user feedback."""
76
  if not feedback or not feedback.strip():
 
17
 
18
  # --- 2. Define Gradio Logic Handlers ---
19
  # These functions orchestrate the agent's actions based on UI events.
20
+ def run_initial_generation(audio_path, image_path):
21
  """Handles the first step: processing inputs and generating the initial report."""
22
+ if not audio_path and not image_path:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  return "Please provide an audio or image file.", None, gr.update(visible=False), None, None, None
24
 
25
+ state = AgentState(audio_path=audio_path,
26
  image_path=image_path,
27
  news_report=[])
28
+
29
  state.update(agent.transcribe_audio(state))
30
  state.update(agent.describe_image(state))
31
  state.update(agent.create_report(state))
 
38
 
39
 
40
 
41
+
42
  def run_revision(feedback, current_state):
43
  """Handles the revision step based on user feedback."""
44
  if not feedback or not feedback.strip():