Spaces:

kaiku03
/

audio_news_agent_demo

Sleeping

App Files Files Community

kaiku03 commited on Jul 26

Commit

71773fd

verified ·

1 Parent(s): adfa8e6

Upload 11 files

Browse files

Files changed (11) hide show

Dockerfile +29 -0
agent_state.py +9 -0
main.py +150 -0
requirements.txt +6 -0
startup.sh +17 -0
tools/__init__.py +0 -0
tools/feedback.py +0 -0
tools/news_reporter.py +21 -0
tools/saver.py +7 -0
tools/transcriber.py +15 -0
workflow.py +37 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies needed for Ollama
+RUN apt-get update && apt-get install -y curl
+# Install Ollama using the official installation script
+RUN curl -fsSL https://ollama.com/install.sh | sh
+# Copy your application's requirements file
+COPY requirements.txt .
+# Install Python packages
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all your application files into the container
+COPY . .
+# Make the startup script executable
+RUN chmod +x ./startup.sh
+# Expose the port Gradio runs on
+EXPOSE 7860
+# Set the command to run when the container starts
+CMD ["./startup.sh"]

agent_state.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pydantic import BaseModel, Field
+from typing import Optional
+class AgentState(BaseModel):
+    audio_path: str
+    transcribed_text: Optional[str] = None
+    news_report: Optional[str] = None
+    feedback: Optional[str] = Field(default=None, description="Feedback from the human for revision")
+    approved: bool = Field(default=False, description="Has the human approved the summary?")

main.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import gradio as gr
+from agent_state import AgentState
+from workflow import build_graph
+from tools.transcriber import transcribe_fast # We need to import the tool directly
+from datetime import datetime
+# --- Main Application Logic ---
+# Build the graph once when the app starts
+app = build_graph()
+# This function is now ONLY for transcription
+def run_transcription(audio_file):
+    """
+    Step 1: Runs ONLY the transcription tool and prepares the state.
+    """
+    if not audio_file:
+        return None, None, gr.update(visible=False)
+    print("--- Step 1: Transcribing Audio ---")
+    # Create a new agent state for this session
+    initial_state = AgentState(audio_path=audio_file)
+    # Call the transcription tool directly
+    state_after_transcription = transcribe_fast(initial_state)
+    # Return the text for the UI, the updated state for the session,
+    # and make the next button visible.
+    return (
+        state_after_transcription,
+        state_after_transcription.transcribed_text,
+        gr.update(visible=True)
+    )
+# This new function handles the rest of the agent's workflow
+def generate_report(current_state):
+    """
+    Step 2: Takes the transcribed state and runs it through the main graph
+    to generate the report and enable the review process.
+    """
+    if not current_state or not current_state.transcribed_text:
+        return current_state, "Transcription not found. Please complete Step 1.", gr.update(visible=False)
+    print("--- Step 2: Generating News Report ---")
+    final_state = None
+    # Run the stream. The graph will start from the news_reporter node
+    # because the transcribed_text already exists.
+    # Note: Your graph needs to be robust enough to handle this.
+    # A simple way is to have the first node check if transcription exists.
+    # If not, run it. If yes, skip to the next node.
+    # For now, we assume the graph continues from where the state is.
+    # A simplified invocation for this flow would be to call the reporter tool directly
+    # and then handle the loop, but let's stick to the graph.
+    # To make this work, we'll manually call the next step for clarity.
+    from tools.news_reporter import create_news_report # this is our tool
+    state_after_report = create_news_report(current_state)
+    return state_after_report, state_after_report.news_report, gr.update(visible=True)
+def handle_revision(feedback, current_state):
+    """
+    Handles the revision loop.
+    """
+    if not feedback:
+        return current_state, current_state.news_report, "Please provide feedback to revise."
+    print("Revising with feedback...")
+    current_state.feedback = feedback
+    current_state.approved = False
+    # Re-run the summarization/report tool with the new feedback
+    from tools.news_reporter import create_news_report
+    state_after_revision = create_news_report(current_state)
+    return state_after_revision, state_after_revision.news_report, "✅ Report revised. Please review again."
+def handle_save(current_state):
+    """
+    Handles the final save action.
+    """
+    print("Saving final report...")
+    from tools.saver import save_summary
+    save_summary(current_state) # Call the save tool directly
+    return "✅ Final report has been saved successfully!"
+#####################################
+# --- Gradio UI ---
+#####################################
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎙️ Audio to News Agent")
+    gr.Markdown(f"### Reporting live, {datetime.now().strftime('%B %d, %Y')}.")
+    agent_state_gr = gr.State(value=None)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Step 1: Transcribe Audio")
+            audio_input = gr.Audio(type="filepath", sources=["upload", "microphone"], label="Upload or Record")
+            transcribe_btn = gr.Button("1️⃣ Transcribe Audio", variant="secondary")
+            # This button will appear after step 1 is complete
+            generate_btn = gr.Button("2️⃣ Generate News Report", variant="primary", visible=False)
+        with gr.Column(scale=2):
+            transcribed_output = gr.Textbox(label="📝 Transcription Result", lines=5, interactive=False)
+            report_output = gr.Textbox(label="📰 Generated News Report", lines=10, interactive=False)
+            with gr.Group(visible=False) as review_group:
+                feedback_input = gr.Textbox(label="❌ Provide Feedback for Revision", lines=2)
+                with gr.Row():
+                    revise_btn = gr.Button("🔁 Revise Report")
+                    save_btn = gr.Button("✅ Approve & Save Report", variant="primary")
+                status_output = gr.Textbox(label="Status", interactive=False)
+    # --- Button Click Logic ---
+    transcribe_btn.click(
+        fn=run_transcription,
+        inputs=[audio_input],
+        outputs=[agent_state_gr, transcribed_output, generate_btn]
+    )
+    generate_btn.click(
+        fn=generate_report,
+        inputs=[agent_state_gr],
+        outputs=[agent_state_gr, report_output, review_group]
+    )
+    revise_btn.click(
+        fn=handle_revision,
+        inputs=[feedback_input, agent_state_gr],
+        outputs=[agent_state_gr, report_output, status_output]
+    )
+    save_btn.click(
+        fn=handle_save,
+        inputs=[agent_state_gr],
+        outputs=[status_output]
+    )
+demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+faster-whisper==1.1.1
+gradio==5.38.2
+langchain-ollama==0.3.6
+langgraph==0.5.4
+pydantic==2.11.7
+typing==3.10.0.0

startup.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+# Start the Ollama server in the background
+ollama serve &
+# Wait a few seconds for the server to be ready
+sleep 5
+# Pull the model that your application needs
+echo "Pulling model: gemma3n:e4b-it-q4_K_M..."
+ollama pull gemma3n:e4b-it-q4_K_M
+echo "Model pulled. Starting Gradio app..."
+# Start the Gradio application
+# It will be accessible on port 7860
+python main.py

tools/__init__.py ADDED Viewed

File without changes

tools/feedback.py ADDED Viewed

File without changes

tools/news_reporter.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from langchain_ollama.llms import OllamaLLM
+from agent_state import AgentState
+llm = OllamaLLM(model="gemma3n:e4b-it-q4_K_M",
+                base_url="http://127.0.0.1:11434")
+def create_news_report(state: AgentState) -> AgentState:
+    if state.feedback:
+        prompt = f"""You are revising a news report based on the user's feedback:
+                        Transcription: "{state.transcribed_text}"
+                        Old Report: "{state.news_report}"
+                        Feedback: "{state.feedback}" """
+    else:
+        prompt = f"""Write a professional news article based on this transcription:
+                    "{state.transcribed_text}" """
+    report = llm.invoke(prompt)
+    state.news_report = report
+    state.feedback = None
+    state.approved = False
+    return state

tools/saver.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from agent_state import AgentState
+def save_summary(state: AgentState) -> AgentState:
+    with open("news_report.txt", "w") as f:
+        f.write(state.news_report)
+    print("✅ Report saved to news_report.txt")
+    return state

tools/transcriber.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from faster_whisper import WhisperModel
+from agent_state import AgentState
+import os
+model = WhisperModel("base", device="cpu", compute_type="int8")
+def transcribe_fast(state: AgentState) -> AgentState:
+    print("---TRANSCRIBING AUDIO---")
+    if not os.path.exists(state.audio_path):
+        raise FileNotFoundError(f"File not found: {state.audio_path}")
+    segments, info = model.transcribe(state.audio_path, vad_filter=False)
+    state.transcribed_text = "".join(segment.text for segment in segments)
+    return state

workflow.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from langgraph.graph import StateGraph, START, END
+from agent_state import AgentState
+from tools.transcriber import transcribe_fast
+from tools.news_reporter import create_news_report
+from tools.saver import save_summary
+# In your workflow.py
+# ... (imports and node definitions for transcribe, create_news_report, save_summary)
+def build_graph():
+    workflow = StateGraph(AgentState)
+    workflow.add_node("transcriber", transcribe_fast)
+    workflow.add_node("news_reporter", create_news_report) # Renamed for clarity
+    workflow.add_node("saver", save_summary)
+    # This is the conditional logic based on human approval
+    def check_approval(state: AgentState):
+        return "saver" if state.approved else "news_reporter"
+    # Define the graph's structure
+    workflow.add_edge(START, "transcriber")
+    workflow.add_edge("transcriber", "news_reporter")
+    # The conditional edge for the loop/save decision
+    workflow.add_conditional_edges(
+        "news_reporter",
+        check_approval,
+        {
+            "saver": "saver",
+            "news_reporter": "news_reporter" # This allows looping back if feedback is given
+        }
+    )
+    workflow.add_edge("saver", END)
+    return workflow.compile()