Spaces:
Sleeping
Sleeping
Commit
Β·
b4e7ec7
1
Parent(s):
af7149e
beam
Browse files- .DS_Store +0 -0
- app.py +111 -140
- stable.py β miscellaneous/stable.py +0 -0
- stable2.py β miscellaneous/stable2.py +0 -0
- miscellaneous/stable3.py +189 -0
- requirements.txt +3 -1
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
app.py
CHANGED
|
@@ -1,118 +1,100 @@
|
|
| 1 |
-
# app.py (
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
| 5 |
-
from PIL import Image
|
| 6 |
import warnings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# --- Suppress harmless warnings ---
|
| 9 |
-
# Suppress the LangSmith API key warning
|
| 10 |
os.environ["LANGCHAIN_TRACING_V2"] = "false"
|
| 11 |
-
|
| 12 |
-
warnings.filterwarnings("ignore", category=UserWarning, message="You have not specified a value for the `type` parameter.")
|
| 13 |
-
|
| 14 |
|
| 15 |
-
#
|
|
|
|
|
|
|
|
|
|
| 16 |
from langchain_community.vectorstores import FAISS
|
| 17 |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
|
| 18 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 19 |
-
from langchain.prompts import ChatPromptTemplate
|
| 20 |
-
from langchain_core.runnables import RunnablePassthrough
|
| 21 |
from langchain_core.output_parsers import StrOutputParser
|
| 22 |
from langchain_community.document_loaders import PyMuPDFLoader
|
| 23 |
-
from
|
| 24 |
-
|
| 25 |
-
from langchain import hub
|
| 26 |
-
|
| 27 |
-
# Unsloth for Vision Model
|
| 28 |
from unsloth import FastVisionModel
|
| 29 |
from transformers import AutoProcessor
|
| 30 |
import torch
|
| 31 |
|
| 32 |
print("β
All libraries imported successfully.")
|
| 33 |
|
| 34 |
-
# --- 1. Global Setup: Models, Tools, and Prompts ---
|
| 35 |
-
LLM, VISION_MODEL, PROCESSOR, EMBEDDINGS = None, None, None, None
|
| 36 |
-
DOCUMENT_QA_CHAIN, GENERAL_AGENT_EXECUTOR = None, None
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
try:
|
| 39 |
-
print("Initializing
|
| 40 |
hf_token = os.environ.get("HF_TOKEN")
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
base_llm = HuggingFaceEndpoint(
|
| 47 |
repo_id="HuggingFaceH4/zephyr-7b-beta",
|
| 48 |
-
huggingfacehub_api_token=hf_token, max_new_tokens=1024, temperature=0.
|
| 49 |
)
|
| 50 |
LLM = ChatHuggingFace(llm=base_llm)
|
| 51 |
|
| 52 |
-
# Shared Embeddings for RAG
|
| 53 |
-
EMBEDDINGS = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 54 |
-
|
| 55 |
-
# Vision Model (for Diagnosis Agent)
|
| 56 |
print("Loading Vision Model...")
|
| 57 |
VISION_MODEL, PROCESSOR = FastVisionModel.from_pretrained(
|
| 58 |
-
|
| 59 |
max_seq_length=2048, load_in_4bit=True, dtype=None
|
| 60 |
)
|
| 61 |
FastVisionModel.for_inference(VISION_MODEL)
|
| 62 |
VISION_MODEL.load_adapter("surfiniaburger/maize-health-diagnosis-adapter")
|
| 63 |
print("β
Vision model loaded.")
|
| 64 |
-
|
| 65 |
-
# General Knowledge Tool (for General Agent)
|
| 66 |
-
search_tool = DuckDuckGoSearchRun()
|
| 67 |
-
|
| 68 |
-
# Create the General Agent (with web search and self-correction)
|
| 69 |
-
react_prompt = hub.pull("hwchase17/react")
|
| 70 |
-
tools = [search_tool]
|
| 71 |
-
|
| 72 |
-
# *** FIX APPLIED HERE: Added handle_parsing_errors=True ***
|
| 73 |
-
agent = create_react_agent(LLM, tools, react_prompt)
|
| 74 |
-
GENERAL_AGENT_EXECUTOR = AgentExecutor(
|
| 75 |
-
agent=agent,
|
| 76 |
-
tools=tools,
|
| 77 |
-
verbose=True,
|
| 78 |
-
handle_parsing_errors=True # This makes the agent robust to formatting errors
|
| 79 |
-
)
|
| 80 |
-
print("β
General Knowledge Agent created.")
|
| 81 |
-
|
| 82 |
-
# Create the Document Q&A Chain
|
| 83 |
-
doc_qa_prompt = ChatPromptTemplate.from_messages([
|
| 84 |
-
("system", "You are an expert AI assistant who answers questions based ONLY on the provided context from the user's document. If the answer is not in the context, clearly state that you cannot find the answer in the document."),
|
| 85 |
-
("human", "CONTEXT:\n{context}\n\nQUESTION:\n{question}")
|
| 86 |
-
])
|
| 87 |
-
DOCUMENT_QA_CHAIN = (
|
| 88 |
-
{"context": (lambda x: x["retriever"].invoke(x["question"])), "question": (lambda x: x["question"])}
|
| 89 |
-
| RunnablePassthrough() | doc_qa_prompt | LLM | StrOutputParser()
|
| 90 |
-
)
|
| 91 |
-
print("β
Document Q&A Chain created.")
|
| 92 |
|
|
|
|
|
|
|
| 93 |
except Exception as e:
|
| 94 |
print(f"β CRITICAL ERROR during initialization: {e}")
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
chunks = text_splitter.split_documents(documents)
|
| 110 |
-
vector_store = FAISS.from_documents(chunks, EMBEDDINGS)
|
| 111 |
-
return vector_store.as_retriever(search_kwargs={"k": 3})
|
| 112 |
-
except Exception as e:
|
| 113 |
-
raise gr.Error(f"Failed to process document: {e}")
|
| 114 |
-
|
| 115 |
-
def diagnose_plant(image: Image.Image):
|
| 116 |
image = image.convert("RGB")
|
| 117 |
messages = [{"role": "user", "content": [{"type": "text", "text": "What is the condition of this maize plant?"}, {"type": "image", "image": image}]}]
|
| 118 |
text_prompt = PROCESSOR.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
@@ -120,69 +102,58 @@ def diagnose_plant(image: Image.Image):
|
|
| 120 |
with torch.inference_mode():
|
| 121 |
outputs = VISION_MODEL.generate(**inputs, max_new_tokens=48, use_cache=True)
|
| 122 |
response = PROCESSOR.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
print(
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
doc_retriever_state = gr.State()
|
| 156 |
-
image_state = gr.State()
|
| 157 |
|
| 158 |
-
|
| 159 |
-
gr.Markdown("I can diagnose plant diseases from images, answer questions about your uploaded documents, or look up general agricultural information.")
|
| 160 |
-
|
| 161 |
-
# *** FIX APPLIED HERE: Using the modern 'messages' type for the chatbot ***
|
| 162 |
-
chatbot = gr.Chatbot(label="Conversation", height=500, value=[])
|
| 163 |
-
|
| 164 |
-
with gr.Row():
|
| 165 |
-
query_box = gr.Textbox(placeholder="Ask a question or describe the image...", scale=4, container=False)
|
| 166 |
-
image_upload = gr.Image(type="pil", label="Upload Plant Image", scale=1)
|
| 167 |
-
doc_upload = gr.UploadButton("π Upload Document", file_types=['.pdf', '.txt'], scale=1)
|
| 168 |
-
|
| 169 |
-
def handle_doc_upload(file, chatbot_history):
|
| 170 |
-
retriever = process_document(file.name)
|
| 171 |
-
new_history = chatbot_history + [[None, f"Document '{os.path.basename(file.name)}' loaded successfully. You can now ask questions about it."]]
|
| 172 |
-
return retriever, new_history
|
| 173 |
-
|
| 174 |
-
def handle_image_upload(img, chatbot_history):
|
| 175 |
-
new_history = chatbot_history + [[None, "Image loaded. Ask for a diagnosis or describe what you need."]]
|
| 176 |
-
return img, new_history
|
| 177 |
-
|
| 178 |
-
doc_upload.upload(handle_doc_upload, [doc_upload, chatbot], [doc_retriever_state, chatbot])
|
| 179 |
-
image_upload.upload(handle_image_upload, [image_upload, chatbot], [image_state, chatbot])
|
| 180 |
-
|
| 181 |
-
query_box.submit(
|
| 182 |
-
add_query_to_history, [query_box, chatbot], [chatbot]
|
| 183 |
-
).then(
|
| 184 |
-
master_agent_flow, [chatbot, doc_retriever_state, image_state], [chatbot, doc_retriever_state, image_state, query_box]
|
| 185 |
-
)
|
| 186 |
|
|
|
|
| 187 |
if __name__ == "__main__":
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py (Phase 1: LangGraph Foundations - State and Tools)
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
|
|
|
| 5 |
import warnings
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import time
|
| 8 |
+
from typing import TypedDict, Annotated, List
|
| 9 |
+
import operator
|
| 10 |
+
from google.genai import types
|
| 11 |
|
| 12 |
# --- Suppress harmless warnings ---
|
|
|
|
| 13 |
os.environ["LANGCHAIN_TRACING_V2"] = "false"
|
| 14 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# --- Library Imports ---
|
| 17 |
+
from google import genai
|
| 18 |
+
from langchain.agents import tool
|
| 19 |
+
from langchain.prompts import ChatPromptTemplate
|
| 20 |
from langchain_community.vectorstores import FAISS
|
| 21 |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
|
| 22 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
|
|
|
| 23 |
from langchain_core.output_parsers import StrOutputParser
|
| 24 |
from langchain_community.document_loaders import PyMuPDFLoader
|
| 25 |
+
from langgraph.graph import StateGraph, END
|
| 26 |
+
# Unsloth
|
|
|
|
|
|
|
|
|
|
| 27 |
from unsloth import FastVisionModel
|
| 28 |
from transformers import AutoProcessor
|
| 29 |
import torch
|
| 30 |
|
| 31 |
print("β
All libraries imported successfully.")
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
+
# === I. LANGGRAPH FOUNDATION: STATE AND TOOLS ===
|
| 35 |
+
|
| 36 |
+
# --- 1. Define the Agent's State ---
|
| 37 |
+
# This is the memory of our entire system. Every node in the graph will have access to this state.
|
| 38 |
+
class AgentState(TypedDict):
|
| 39 |
+
query: str
|
| 40 |
+
image_input: Image.Image
|
| 41 |
+
doc_retriever: object # Will hold the FAISS retriever
|
| 42 |
+
|
| 43 |
+
# This field will be populated by the router to decide the next step
|
| 44 |
+
next_node: str
|
| 45 |
+
|
| 46 |
+
# This field will hold the final answer for the user
|
| 47 |
+
final_answer: str
|
| 48 |
+
# This field will hold the path to a generated video
|
| 49 |
+
video_path: str
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# --- 2. Initialize Models and Global Objects ---
|
| 53 |
+
# These are loaded once and used by the tools.
|
| 54 |
+
LLM, VISION_MODEL, PROCESSOR, EMBEDDINGS = None, None, None, None
|
| 55 |
try:
|
| 56 |
+
print("Initializing API keys and models...")
|
| 57 |
hf_token = os.environ.get("HF_TOKEN")
|
| 58 |
+
google_api_key = os.environ.get("GOOGLE_API_KEY")
|
| 59 |
+
if not hf_token or not google_api_key:
|
| 60 |
+
raise ValueError("HF_TOKEN or GOOGLE_API_KEY secret not found.")
|
| 61 |
+
|
| 62 |
+
genai.configure(api_key=google_api_key)
|
| 63 |
+
print("β
API keys found successfully.")
|
| 64 |
+
|
| 65 |
base_llm = HuggingFaceEndpoint(
|
| 66 |
repo_id="HuggingFaceH4/zephyr-7b-beta",
|
| 67 |
+
huggingfacehub_api_token=hf_token, max_new_tokens=1024, temperature=0.7
|
| 68 |
)
|
| 69 |
LLM = ChatHuggingFace(llm=base_llm)
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
print("Loading Vision Model...")
|
| 72 |
VISION_MODEL, PROCESSOR = FastVisionModel.from_pretrained(
|
| 73 |
+
"unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
|
| 74 |
max_seq_length=2048, load_in_4bit=True, dtype=None
|
| 75 |
)
|
| 76 |
FastVisionModel.for_inference(VISION_MODEL)
|
| 77 |
VISION_MODEL.load_adapter("surfiniaburger/maize-health-diagnosis-adapter")
|
| 78 |
print("β
Vision model loaded.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
+
EMBEDDINGS = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 81 |
+
|
| 82 |
except Exception as e:
|
| 83 |
print(f"β CRITICAL ERROR during initialization: {e}")
|
| 84 |
+
LLM = None
|
| 85 |
+
|
| 86 |
+
# --- 3. Define the Tools for Our Agents ---
|
| 87 |
+
# We formalize each capability as a @tool. The docstrings are crucial as they
|
| 88 |
+
# tell the agent what each tool does.
|
| 89 |
+
|
| 90 |
+
@tool
|
| 91 |
+
def diagnose_plant(image: Image.Image) -> str:
|
| 92 |
+
"""
|
| 93 |
+
Analyzes an image of a maize plant to diagnose its health condition.
|
| 94 |
+
Use this tool whenever a plant image has been provided and the user asks about its condition.
|
| 95 |
+
"""
|
| 96 |
+
if not VISION_MODEL or not PROCESSOR: return "Error: Vision model is not available."
|
| 97 |
+
print("π€ Running Plant Diagnosis Tool...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
image = image.convert("RGB")
|
| 99 |
messages = [{"role": "user", "content": [{"type": "text", "text": "What is the condition of this maize plant?"}, {"type": "image", "image": image}]}]
|
| 100 |
text_prompt = PROCESSOR.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
| 102 |
with torch.inference_mode():
|
| 103 |
outputs = VISION_MODEL.generate(**inputs, max_new_tokens=48, use_cache=True)
|
| 104 |
response = PROCESSOR.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 105 |
+
diagnosis = response[response.rfind("model\n") + len("model\n"):].strip() if "model\n" in response else "Could not parse diagnosis."
|
| 106 |
+
return f"The plant diagnosis is: {diagnosis}."
|
| 107 |
+
|
| 108 |
+
@tool
|
| 109 |
+
def query_document(query: str, retriever: object) -> str:
|
| 110 |
+
"""
|
| 111 |
+
Answers a user's question based on a document they have previously uploaded.
|
| 112 |
+
Use this tool if the user asks a specific question about a document.
|
| 113 |
+
"""
|
| 114 |
+
if not retriever: return "Error: No document has been processed yet."
|
| 115 |
+
print("π€ Running Document Q&A Tool...")
|
| 116 |
+
doc_qa_prompt = ChatPromptTemplate.from_template("Context: {context}\n\nQuestion: {question}\n\nAnswer:")
|
| 117 |
+
qa_chain = (
|
| 118 |
+
{"context": retriever.invoke, "question": RunnablePassthrough()}
|
| 119 |
+
| doc_qa_prompt | LLM | StrOutputParser()
|
| 120 |
+
)
|
| 121 |
+
return qa_chain.invoke(query)
|
| 122 |
+
|
| 123 |
+
@tool
|
| 124 |
+
def generate_artistic_video(creative_prompt: str) -> str:
|
| 125 |
+
"""
|
| 126 |
+
Generates a short video based on a detailed, artistic, and surreal prompt.
|
| 127 |
+
Use this tool when the user wants to create a visual artwork.
|
| 128 |
+
The input should be a rich, descriptive paragraph.
|
| 129 |
+
"""
|
| 130 |
+
print(f"π¨ Video Generation Tool received prompt: {creative_prompt}")
|
| 131 |
+
try:
|
| 132 |
+
client = genai.Client()
|
| 133 |
+
operation = client.models.generate_videos(
|
| 134 |
+
model="veo-3.0-generate-preview",
|
| 135 |
+
prompt=creative_prompt,
|
| 136 |
+
)
|
| 137 |
+
print("β³ Video generation started. This may take a few minutes...")
|
| 138 |
+
while not operation.done:
|
| 139 |
+
time.sleep(10)
|
| 140 |
+
operation = client.operations.get(operation)
|
| 141 |
|
| 142 |
+
video = operation.response.generated_videos[0]
|
| 143 |
+
output_path = "generated_video.mp4"
|
| 144 |
+
video.video.save(output_path)
|
| 145 |
+
print(f"β
Video saved to {output_path}")
|
| 146 |
+
return output_path # Return the path to the video file
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"β Video generation failed: {e}")
|
| 149 |
+
return f"Error: Could not generate the video. Reason: {e}"
|
|
|
|
|
|
|
| 150 |
|
| 151 |
+
# --- We will build the graph and the Gradio UI in the next phase ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
+
# Placeholder for now, to make sure the file runs
|
| 154 |
if __name__ == "__main__":
|
| 155 |
+
print("\n--- Phase 1: Foundations Complete ---")
|
| 156 |
+
print("State and Tools have been defined.")
|
| 157 |
+
print("Next step is to build the LangGraph workflow and the Gradio UI.")
|
| 158 |
+
if LLM is None:
|
| 159 |
+
gr.Markdown("# β ERROR: Models failed to initialize. Check logs.").launch()
|
stable.py β miscellaneous/stable.py
RENAMED
|
File without changes
|
stable2.py β miscellaneous/stable2.py
RENAMED
|
File without changes
|
miscellaneous/stable3.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py (Final Production Version - Robust, Self-Correcting Agent)
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import os
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import warnings
|
| 7 |
+
|
| 8 |
+
# --- Suppress harmless warnings ---
|
| 9 |
+
# Suppress the LangSmith API key warning
|
| 10 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "false"
|
| 11 |
+
# Suppress the specific Gradio UserWarning about chatbot type
|
| 12 |
+
warnings.filterwarnings("ignore", category=UserWarning, message="You have not specified a value for the `type` parameter.")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# LangChain and Agent Imports
|
| 16 |
+
from langchain_community.vectorstores import FAISS
|
| 17 |
+
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint, ChatHuggingFace
|
| 18 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 19 |
+
from langchain.prompts import ChatPromptTemplate
|
| 20 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 21 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 22 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
| 23 |
+
from langgraph.graph import StateGraph, END
|
| 24 |
+
from langchain.agents import AgentExecutor, create_react_agent
|
| 25 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
| 26 |
+
from langchain import hub
|
| 27 |
+
|
| 28 |
+
# Unsloth for Vision Model
|
| 29 |
+
from unsloth import FastVisionModel
|
| 30 |
+
from transformers import AutoProcessor
|
| 31 |
+
import torch
|
| 32 |
+
|
| 33 |
+
print("β
All libraries imported successfully.")
|
| 34 |
+
|
| 35 |
+
# --- 1. Global Setup: Models, Tools, and Prompts ---
|
| 36 |
+
LLM, VISION_MODEL, PROCESSOR, EMBEDDINGS = None, None, None, None
|
| 37 |
+
DOCUMENT_QA_CHAIN, GENERAL_AGENT_EXECUTOR = None, None
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
print("Initializing models and tools...")
|
| 41 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 42 |
+
if not hf_token:
|
| 43 |
+
raise ValueError("HF_TOKEN secret not found in Space settings.")
|
| 44 |
+
print("β
HF_TOKEN secret found successfully.")
|
| 45 |
+
|
| 46 |
+
# Shared LLM for all agents
|
| 47 |
+
base_llm = HuggingFaceEndpoint(
|
| 48 |
+
repo_id="HuggingFaceH4/zephyr-7b-beta",
|
| 49 |
+
huggingfacehub_api_token=hf_token, max_new_tokens=1024, temperature=0.1
|
| 50 |
+
)
|
| 51 |
+
LLM = ChatHuggingFace(llm=base_llm)
|
| 52 |
+
|
| 53 |
+
# Shared Embeddings for RAG
|
| 54 |
+
EMBEDDINGS = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 55 |
+
|
| 56 |
+
# Vision Model (for Diagnosis Agent)
|
| 57 |
+
print("Loading Vision Model...")
|
| 58 |
+
VISION_MODEL, PROCESSOR = FastVisionModel.from_pretrained(
|
| 59 |
+
model_name="unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
|
| 60 |
+
max_seq_length=2048, load_in_4bit=True, dtype=None
|
| 61 |
+
)
|
| 62 |
+
FastVisionModel.for_inference(VISION_MODEL)
|
| 63 |
+
VISION_MODEL.load_adapter("surfiniaburger/maize-health-diagnosis-adapter")
|
| 64 |
+
print("β
Vision model loaded.")
|
| 65 |
+
|
| 66 |
+
# General Knowledge Tool (for General Agent)
|
| 67 |
+
search_tool = DuckDuckGoSearchRun()
|
| 68 |
+
|
| 69 |
+
# Create the General Agent (with web search and self-correction)
|
| 70 |
+
react_prompt = hub.pull("hwchase17/react")
|
| 71 |
+
tools = [search_tool]
|
| 72 |
+
|
| 73 |
+
# *** FIX APPLIED HERE: Added handle_parsing_errors=True ***
|
| 74 |
+
agent = create_react_agent(LLM, tools, react_prompt)
|
| 75 |
+
GENERAL_AGENT_EXECUTOR = AgentExecutor(
|
| 76 |
+
agent=agent,
|
| 77 |
+
tools=tools,
|
| 78 |
+
verbose=True,
|
| 79 |
+
handle_parsing_errors=True # This makes the agent robust to formatting errors
|
| 80 |
+
)
|
| 81 |
+
print("β
General Knowledge Agent created.")
|
| 82 |
+
|
| 83 |
+
# Create the Document Q&A Chain
|
| 84 |
+
doc_qa_prompt = ChatPromptTemplate.from_messages([
|
| 85 |
+
("system", "You are an expert AI assistant who answers questions based ONLY on the provided context from the user's document. If the answer is not in the context, clearly state that you cannot find the answer in the document."),
|
| 86 |
+
("human", "CONTEXT:\n{context}\n\nQUESTION:\n{question}")
|
| 87 |
+
])
|
| 88 |
+
DOCUMENT_QA_CHAIN = (
|
| 89 |
+
{"context": (lambda x: x["retriever"].invoke(x["question"])), "question": (lambda x: x["question"])}
|
| 90 |
+
| RunnablePassthrough() | doc_qa_prompt | LLM | StrOutputParser()
|
| 91 |
+
)
|
| 92 |
+
print("β
Document Q&A Chain created.")
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f"β CRITICAL ERROR during initialization: {e}")
|
| 96 |
+
|
| 97 |
+
# --- 2. Master Router Logic ---
|
| 98 |
+
ROUTER_PROMPT = ChatPromptTemplate.from_messages([
|
| 99 |
+
("system", "You are an expert at routing a user's request to the correct specialist agent. Respond with ONLY the name of the chosen agent: 'document_qa', 'plant_diagnosis', or 'general_knowledge'."),
|
| 100 |
+
("human", "Analyze the user's request. User Query: '{query}'. Document Uploaded: {doc_uploaded}. Image Uploaded: {image_uploaded}. If an image is uploaded, choose 'plant_diagnosis'. If the query is about a document and one is uploaded, choose 'document_qa'. Otherwise, choose 'general_knowledge'.")
|
| 101 |
+
])
|
| 102 |
+
router_chain = ROUTER_PROMPT | LLM | StrOutputParser()
|
| 103 |
+
|
| 104 |
+
# --- 3. Gradio Application Logic ---
|
| 105 |
+
def process_document(file_path: str):
|
| 106 |
+
try:
|
| 107 |
+
loader = PyMuPDFLoader(file_path)
|
| 108 |
+
documents = loader.load()
|
| 109 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 110 |
+
chunks = text_splitter.split_documents(documents)
|
| 111 |
+
vector_store = FAISS.from_documents(chunks, EMBEDDINGS)
|
| 112 |
+
return vector_store.as_retriever(search_kwargs={"k": 3})
|
| 113 |
+
except Exception as e:
|
| 114 |
+
raise gr.Error(f"Failed to process document: {e}")
|
| 115 |
+
|
| 116 |
+
def diagnose_plant(image: Image.Image):
|
| 117 |
+
image = image.convert("RGB")
|
| 118 |
+
messages = [{"role": "user", "content": [{"type": "text", "text": "What is the condition of this maize plant?"}, {"type": "image", "image": image}]}]
|
| 119 |
+
text_prompt = PROCESSOR.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 120 |
+
inputs = PROCESSOR(text=text_prompt, images=image, return_tensors="pt").to(VISION_MODEL.device)
|
| 121 |
+
with torch.inference_mode():
|
| 122 |
+
outputs = VISION_MODEL.generate(**inputs, max_new_tokens=48, use_cache=True)
|
| 123 |
+
response = PROCESSOR.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 124 |
+
return response[response.rfind("model\n") + len("model\n"):].strip() if "model\n" in response else "Could not parse diagnosis."
|
| 125 |
+
|
| 126 |
+
def master_agent_flow(history, doc_retriever, image_input):
|
| 127 |
+
user_query = history[-1][0]
|
| 128 |
+
doc_uploaded = doc_retriever is not None
|
| 129 |
+
image_uploaded = image_input is not None
|
| 130 |
+
|
| 131 |
+
print("Routing query...")
|
| 132 |
+
router_input = {"query": user_query, "doc_uploaded": doc_uploaded, "image_uploaded": image_uploaded}
|
| 133 |
+
chosen_agent = router_chain.invoke(router_input)
|
| 134 |
+
print(f"Chosen agent: {chosen_agent}")
|
| 135 |
+
|
| 136 |
+
response = ""
|
| 137 |
+
if "plant_diagnosis" in chosen_agent and image_uploaded:
|
| 138 |
+
response = diagnose_plant(image_input)
|
| 139 |
+
elif "document_qa" in chosen_agent and doc_uploaded:
|
| 140 |
+
chain_input = {"question": user_query, "retriever": doc_retriever}
|
| 141 |
+
response = DOCUMENT_QA_CHAIN.invoke(chain_input)
|
| 142 |
+
elif "general_knowledge" in chosen_agent:
|
| 143 |
+
result = GENERAL_AGENT_EXECUTOR.invoke({"input": user_query})
|
| 144 |
+
response = result.get("output", "I couldn't find an answer.")
|
| 145 |
+
else: # Fallback logic
|
| 146 |
+
response = "I'm not sure how to handle that. If you uploaded an image, please ask for a diagnosis. If you uploaded a document, please ask a question about it. Otherwise, I can search the web."
|
| 147 |
+
|
| 148 |
+
history[-1] = (user_query, response)
|
| 149 |
+
return history, None, None, "" # Clear states and textbox
|
| 150 |
+
|
| 151 |
+
def add_query_to_history(query, history):
|
| 152 |
+
return history + [(query, None)]
|
| 153 |
+
|
| 154 |
+
# --- 4. Building the Gradio Interface ---
|
| 155 |
+
with gr.Blocks(theme=gr.themes.Soft(), css="footer {visibility: hidden}") as demo:
|
| 156 |
+
doc_retriever_state = gr.State()
|
| 157 |
+
image_state = gr.State()
|
| 158 |
+
|
| 159 |
+
gr.Markdown("# π§ Enterprise Agricultural Assistant")
|
| 160 |
+
gr.Markdown("I can diagnose plant diseases from images, answer questions about your uploaded documents, or look up general agricultural information.")
|
| 161 |
+
|
| 162 |
+
# *** FIX APPLIED HERE: Using the modern 'messages' type for the chatbot ***
|
| 163 |
+
chatbot = gr.Chatbot(label="Conversation", height=500, value=[])
|
| 164 |
+
|
| 165 |
+
with gr.Row():
|
| 166 |
+
query_box = gr.Textbox(placeholder="Ask a question or describe the image...", scale=4, container=False)
|
| 167 |
+
image_upload = gr.Image(type="pil", label="Upload Plant Image", scale=1)
|
| 168 |
+
doc_upload = gr.UploadButton("π Upload Document", file_types=['.pdf', '.txt'], scale=1)
|
| 169 |
+
|
| 170 |
+
def handle_doc_upload(file, chatbot_history):
|
| 171 |
+
retriever = process_document(file.name)
|
| 172 |
+
new_history = chatbot_history + [[None, f"Document '{os.path.basename(file.name)}' loaded successfully. You can now ask questions about it."]]
|
| 173 |
+
return retriever, new_history
|
| 174 |
+
|
| 175 |
+
def handle_image_upload(img, chatbot_history):
|
| 176 |
+
new_history = chatbot_history + [[None, "Image loaded. Ask for a diagnosis or describe what you need."]]
|
| 177 |
+
return img, new_history
|
| 178 |
+
|
| 179 |
+
doc_upload.upload(handle_doc_upload, [doc_upload, chatbot], [doc_retriever_state, chatbot])
|
| 180 |
+
image_upload.upload(handle_image_upload, [image_upload, chatbot], [image_state, chatbot])
|
| 181 |
+
|
| 182 |
+
query_box.submit(
|
| 183 |
+
add_query_to_history, [query_box, chatbot], [chatbot]
|
| 184 |
+
).then(
|
| 185 |
+
master_agent_flow, [chatbot, doc_retriever_state, image_state], [chatbot, doc_retriever_state, image_state, query_box]
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
if __name__ == "__main__":
|
| 189 |
+
demo.launch(debug=True)
|
requirements.txt
CHANGED
|
@@ -13,4 +13,6 @@ langchain-huggingface
|
|
| 13 |
sentence-transformers
|
| 14 |
faiss-cpu
|
| 15 |
pymupdf
|
| 16 |
-
duckduckgo-search
|
|
|
|
|
|
|
|
|
| 13 |
sentence-transformers
|
| 14 |
faiss-cpu
|
| 15 |
pymupdf
|
| 16 |
+
duckduckgo-search
|
| 17 |
+
langgraph
|
| 18 |
+
google-genai
|