langchain-multi-agents / pdf_agent.py
Sharath1036's picture
first commit
4a8958c
raw
history blame
3.59 kB
import os
from dotenv import load_dotenv
from langchain.agents import AgentType, Tool, initialize_agent
from langchain_community.agent_toolkits.load_tools import load_tools
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from langchain_community.document_loaders import PyPDFLoader
class PDFAgent:
def __init__(self, pdf_path: str, collection_name: str = "test"):
self.pdf_path = pdf_path
self.collection_name = collection_name
self._load_environment()
self.llm = self._initialize_llm()
self.embeddings = self._initialize_embeddings()
self.vector_store = self._initialize_vector_store()
self.qa_chain = self._initialize_qa_chain()
self.tools = self._initialize_tools()
self.agent = self._initialize_agent()
def _load_environment(self):
load_dotenv(override=True)
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["QDRANT_API_KEY"] = os.getenv("QDRANT_API_KEY")
os.environ["QDRANT_URL"] = os.getenv("QDRANT_URL")
os.environ["LANGSMITH_TRACING"]= "true"
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
def _initialize_llm(self):
return ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
api_key=os.getenv("GOOGLE_API_KEY"),
temperature=0.0,
)
def _initialize_embeddings(self):
return GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
def _initialize_vector_store(self):
loader = PyPDFLoader(self.pdf_path)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_texts = text_splitter.split_documents(documents)
return QdrantVectorStore.from_documents(
documents=split_texts,
embedding=self.embeddings,
collection_name=self.collection_name,
api_key=os.getenv("QDRANT_API_KEY"),
url=os.getenv("QDRANT_URL"),
force_recreate=True
)
def _initialize_qa_chain(self):
return RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever()
)
def _initialize_tools(self):
tools = load_tools([], llm=self.llm)
tools.append(
Tool(
name="State of Union QA System",
func=self.qa_chain.run,
description=(
"Useful for answering questions from the uploaded PDF. "
"Input should be a fully formed question."
),
)
)
return tools
def _initialize_agent(self):
return initialize_agent(
self.tools,
self.llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
)
def ask(self, question: str):
print("Asking:", question)
result = self.agent.run(question)
print("Result:", result)
return result
if __name__ == "__main__":
print("Starting PDF Agent...")
pdf_agent = PDFAgent(pdf_path="Sharath_OnePage.pdf")
print("Agent initialized.")
response = pdf_agent.ask("What all organizations has Sharath worked with?")
print("Response:", response)