kaiku03 commited on
Commit
71773fd
·
verified ·
1 Parent(s): adfa8e6

Upload 11 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies needed for Ollama
8
+ RUN apt-get update && apt-get install -y curl
9
+
10
+ # Install Ollama using the official installation script
11
+ RUN curl -fsSL https://ollama.com/install.sh | sh
12
+
13
+ # Copy your application's requirements file
14
+ COPY requirements.txt .
15
+
16
+ # Install Python packages
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy all your application files into the container
20
+ COPY . .
21
+
22
+ # Make the startup script executable
23
+ RUN chmod +x ./startup.sh
24
+
25
+ # Expose the port Gradio runs on
26
+ EXPOSE 7860
27
+
28
+ # Set the command to run when the container starts
29
+ CMD ["./startup.sh"]
agent_state.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional
3
+
4
+ class AgentState(BaseModel):
5
+ audio_path: str
6
+ transcribed_text: Optional[str] = None
7
+ news_report: Optional[str] = None
8
+ feedback: Optional[str] = Field(default=None, description="Feedback from the human for revision")
9
+ approved: bool = Field(default=False, description="Has the human approved the summary?")
main.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from agent_state import AgentState
3
+ from workflow import build_graph
4
+ from tools.transcriber import transcribe_fast # We need to import the tool directly
5
+ from datetime import datetime
6
+
7
+ # --- Main Application Logic ---
8
+
9
+ # Build the graph once when the app starts
10
+ app = build_graph()
11
+
12
+ # This function is now ONLY for transcription
13
+ def run_transcription(audio_file):
14
+ """
15
+ Step 1: Runs ONLY the transcription tool and prepares the state.
16
+ """
17
+ if not audio_file:
18
+ return None, None, gr.update(visible=False)
19
+
20
+ print("--- Step 1: Transcribing Audio ---")
21
+ # Create a new agent state for this session
22
+ initial_state = AgentState(audio_path=audio_file)
23
+
24
+ # Call the transcription tool directly
25
+ state_after_transcription = transcribe_fast(initial_state)
26
+
27
+ # Return the text for the UI, the updated state for the session,
28
+ # and make the next button visible.
29
+ return (
30
+ state_after_transcription,
31
+ state_after_transcription.transcribed_text,
32
+ gr.update(visible=True)
33
+ )
34
+
35
+ # This new function handles the rest of the agent's workflow
36
+ def generate_report(current_state):
37
+ """
38
+ Step 2: Takes the transcribed state and runs it through the main graph
39
+ to generate the report and enable the review process.
40
+ """
41
+ if not current_state or not current_state.transcribed_text:
42
+ return current_state, "Transcription not found. Please complete Step 1.", gr.update(visible=False)
43
+
44
+ print("--- Step 2: Generating News Report ---")
45
+
46
+ final_state = None
47
+ # Run the stream. The graph will start from the news_reporter node
48
+ # because the transcribed_text already exists.
49
+ # Note: Your graph needs to be robust enough to handle this.
50
+ # A simple way is to have the first node check if transcription exists.
51
+ # If not, run it. If yes, skip to the next node.
52
+ # For now, we assume the graph continues from where the state is.
53
+
54
+ # A simplified invocation for this flow would be to call the reporter tool directly
55
+ # and then handle the loop, but let's stick to the graph.
56
+ # To make this work, we'll manually call the next step for clarity.
57
+
58
+ from tools.news_reporter import create_news_report # this is our tool
59
+ state_after_report = create_news_report(current_state)
60
+
61
+ return state_after_report, state_after_report.news_report, gr.update(visible=True)
62
+
63
+
64
+ def handle_revision(feedback, current_state):
65
+ """
66
+ Handles the revision loop.
67
+ """
68
+ if not feedback:
69
+ return current_state, current_state.news_report, "Please provide feedback to revise."
70
+
71
+ print("Revising with feedback...")
72
+ current_state.feedback = feedback
73
+ current_state.approved = False
74
+
75
+ # Re-run the summarization/report tool with the new feedback
76
+ from tools.news_reporter import create_news_report
77
+ state_after_revision = create_news_report(current_state)
78
+
79
+ return state_after_revision, state_after_revision.news_report, "✅ Report revised. Please review again."
80
+
81
+
82
+ def handle_save(current_state):
83
+ """
84
+ Handles the final save action.
85
+ """
86
+ print("Saving final report...")
87
+ from tools.saver import save_summary
88
+ save_summary(current_state) # Call the save tool directly
89
+
90
+ return "✅ Final report has been saved successfully!"
91
+
92
+
93
+
94
+
95
+ #####################################
96
+ # --- Gradio UI ---
97
+ #####################################
98
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
99
+ gr.Markdown("# 🎙️ Audio to News Agent")
100
+ gr.Markdown(f"### Reporting live, {datetime.now().strftime('%B %d, %Y')}.")
101
+
102
+ agent_state_gr = gr.State(value=None)
103
+
104
+ with gr.Row():
105
+ with gr.Column(scale=1):
106
+ gr.Markdown("### Step 1: Transcribe Audio")
107
+ audio_input = gr.Audio(type="filepath", sources=["upload", "microphone"], label="Upload or Record")
108
+ transcribe_btn = gr.Button("1️⃣ Transcribe Audio", variant="secondary")
109
+
110
+ # This button will appear after step 1 is complete
111
+ generate_btn = gr.Button("2️⃣ Generate News Report", variant="primary", visible=False)
112
+
113
+ with gr.Column(scale=2):
114
+ transcribed_output = gr.Textbox(label="📝 Transcription Result", lines=5, interactive=False)
115
+ report_output = gr.Textbox(label="📰 Generated News Report", lines=10, interactive=False)
116
+
117
+ with gr.Group(visible=False) as review_group:
118
+ feedback_input = gr.Textbox(label="❌ Provide Feedback for Revision", lines=2)
119
+ with gr.Row():
120
+ revise_btn = gr.Button("🔁 Revise Report")
121
+ save_btn = gr.Button("✅ Approve & Save Report", variant="primary")
122
+ status_output = gr.Textbox(label="Status", interactive=False)
123
+
124
+ # --- Button Click Logic ---
125
+
126
+ transcribe_btn.click(
127
+ fn=run_transcription,
128
+ inputs=[audio_input],
129
+ outputs=[agent_state_gr, transcribed_output, generate_btn]
130
+ )
131
+
132
+ generate_btn.click(
133
+ fn=generate_report,
134
+ inputs=[agent_state_gr],
135
+ outputs=[agent_state_gr, report_output, review_group]
136
+ )
137
+
138
+ revise_btn.click(
139
+ fn=handle_revision,
140
+ inputs=[feedback_input, agent_state_gr],
141
+ outputs=[agent_state_gr, report_output, status_output]
142
+ )
143
+
144
+ save_btn.click(
145
+ fn=handle_save,
146
+ inputs=[agent_state_gr],
147
+ outputs=[status_output]
148
+ )
149
+
150
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ faster-whisper==1.1.1
2
+ gradio==5.38.2
3
+ langchain-ollama==0.3.6
4
+ langgraph==0.5.4
5
+ pydantic==2.11.7
6
+ typing==3.10.0.0
startup.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start the Ollama server in the background
4
+ ollama serve &
5
+
6
+ # Wait a few seconds for the server to be ready
7
+ sleep 5
8
+
9
+ # Pull the model that your application needs
10
+ echo "Pulling model: gemma3n:e4b-it-q4_K_M..."
11
+ ollama pull gemma3n:e4b-it-q4_K_M
12
+
13
+ echo "Model pulled. Starting Gradio app..."
14
+
15
+ # Start the Gradio application
16
+ # It will be accessible on port 7860
17
+ python main.py
tools/__init__.py ADDED
File without changes
tools/feedback.py ADDED
File without changes
tools/news_reporter.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_ollama.llms import OllamaLLM
2
+ from agent_state import AgentState
3
+
4
+ llm = OllamaLLM(model="gemma3n:e4b-it-q4_K_M",
5
+ base_url="http://127.0.0.1:11434")
6
+
7
+ def create_news_report(state: AgentState) -> AgentState:
8
+ if state.feedback:
9
+ prompt = f"""You are revising a news report based on the user's feedback:
10
+ Transcription: "{state.transcribed_text}"
11
+ Old Report: "{state.news_report}"
12
+ Feedback: "{state.feedback}" """
13
+ else:
14
+ prompt = f"""Write a professional news article based on this transcription:
15
+ "{state.transcribed_text}" """
16
+
17
+ report = llm.invoke(prompt)
18
+ state.news_report = report
19
+ state.feedback = None
20
+ state.approved = False
21
+ return state
tools/saver.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from agent_state import AgentState
2
+
3
+ def save_summary(state: AgentState) -> AgentState:
4
+ with open("news_report.txt", "w") as f:
5
+ f.write(state.news_report)
6
+ print("✅ Report saved to news_report.txt")
7
+ return state
tools/transcriber.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from faster_whisper import WhisperModel
2
+ from agent_state import AgentState
3
+ import os
4
+
5
+ model = WhisperModel("base", device="cpu", compute_type="int8")
6
+
7
+ def transcribe_fast(state: AgentState) -> AgentState:
8
+ print("---TRANSCRIBING AUDIO---")
9
+ if not os.path.exists(state.audio_path):
10
+ raise FileNotFoundError(f"File not found: {state.audio_path}")
11
+
12
+ segments, info = model.transcribe(state.audio_path, vad_filter=False)
13
+ state.transcribed_text = "".join(segment.text for segment in segments)
14
+ return state
15
+
workflow.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, START, END
2
+ from agent_state import AgentState
3
+ from tools.transcriber import transcribe_fast
4
+ from tools.news_reporter import create_news_report
5
+ from tools.saver import save_summary
6
+
7
+ # In your workflow.py
8
+
9
+ # ... (imports and node definitions for transcribe, create_news_report, save_summary)
10
+
11
+ def build_graph():
12
+ workflow = StateGraph(AgentState)
13
+
14
+ workflow.add_node("transcriber", transcribe_fast)
15
+ workflow.add_node("news_reporter", create_news_report) # Renamed for clarity
16
+ workflow.add_node("saver", save_summary)
17
+
18
+ # This is the conditional logic based on human approval
19
+ def check_approval(state: AgentState):
20
+ return "saver" if state.approved else "news_reporter"
21
+
22
+ # Define the graph's structure
23
+ workflow.add_edge(START, "transcriber")
24
+ workflow.add_edge("transcriber", "news_reporter")
25
+
26
+ # The conditional edge for the loop/save decision
27
+ workflow.add_conditional_edges(
28
+ "news_reporter",
29
+ check_approval,
30
+ {
31
+ "saver": "saver",
32
+ "news_reporter": "news_reporter" # This allows looping back if feedback is given
33
+ }
34
+ )
35
+ workflow.add_edge("saver", END)
36
+
37
+ return workflow.compile()