Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| from dotenv import load_dotenv | |
| from langchain.agents import AgentType, Tool, initialize_agent | |
| from langchain_community.agent_toolkits.load_tools import load_tools | |
| from langchain.chains import RetrievalQA | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_qdrant import QdrantVectorStore | |
| from langchain_community.document_loaders import PyPDFLoader | |
| class PDFAgent: | |
| def __init__(self, pdf_path: str, collection_name: str = "test"): | |
| self.pdf_path = pdf_path | |
| self.collection_name = collection_name | |
| self._load_environment() | |
| self.llm = self._initialize_llm() | |
| self.embeddings = self._initialize_embeddings() | |
| self.vector_store = self._initialize_vector_store() | |
| self.qa_chain = self._initialize_qa_chain() | |
| self.tools = self._initialize_tools() | |
| self.agent = self._initialize_agent() | |
| def _load_environment(self): | |
| load_dotenv(override=True) | |
| os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") | |
| os.environ["QDRANT_API_KEY"] = os.getenv("QDRANT_API_KEY") | |
| os.environ["QDRANT_URL"] = os.getenv("QDRANT_URL") | |
| os.environ["LANGSMITH_TRACING"]= "true" | |
| os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY") | |
| def _initialize_llm(self): | |
| return ChatGoogleGenerativeAI( | |
| model="gemini-2.5-flash", | |
| api_key=os.getenv("GOOGLE_API_KEY"), | |
| temperature=0.0, | |
| ) | |
| def _initialize_embeddings(self): | |
| return GoogleGenerativeAIEmbeddings(model="gemini-embedding-001") | |
| def _initialize_vector_store(self): | |
| loader = PyPDFLoader(self.pdf_path) | |
| documents = loader.load() | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
| split_texts = text_splitter.split_documents(documents) | |
| return QdrantVectorStore.from_documents( | |
| documents=split_texts, | |
| embedding=self.embeddings, | |
| collection_name=self.collection_name, | |
| api_key=os.getenv("QDRANT_API_KEY"), | |
| url=os.getenv("QDRANT_URL"), | |
| force_recreate=True | |
| ) | |
| def _initialize_qa_chain(self): | |
| return RetrievalQA.from_chain_type( | |
| llm=self.llm, | |
| chain_type="stuff", | |
| retriever=self.vector_store.as_retriever() | |
| ) | |
| def _initialize_tools(self): | |
| tools = load_tools([], llm=self.llm) | |
| tools.append( | |
| Tool( | |
| name="State of Union QA System", | |
| func=self.qa_chain.run, | |
| description=( | |
| "Useful for answering questions from the uploaded PDF. " | |
| "Input should be a fully formed question." | |
| ), | |
| ) | |
| ) | |
| return tools | |
| def _initialize_agent(self): | |
| return initialize_agent( | |
| self.tools, | |
| self.llm, | |
| agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, | |
| verbose=True, | |
| ) | |
| def ask(self, question: str): | |
| print("Asking:", question) | |
| result = self.agent.run(question) | |
| print("Result:", result) | |
| return result | |
| if __name__ == "__main__": | |
| print("Starting PDF Agent...") | |
| pdf_agent = PDFAgent(pdf_path="Sharath_OnePage.pdf") | |
| print("Agent initialized.") | |
| response = pdf_agent.ask("What all organizations has Sharath worked with?") | |
| print("Response:", response) | |