Spaces:
Sleeping
Sleeping
| import os | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain_community.llms import HuggingFaceHub | |
| import config | |
| def analyze_pdf(file_path): | |
| """ | |
| Analyzes a PDF file and returns a question-answering chain. | |
| Args: | |
| file_path: The path to the PDF file. | |
| Returns: | |
| A Langchain QA chain object. | |
| """ | |
| try: | |
| pdf_reader = PdfReader(file_path) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len | |
| ) | |
| chunks = text_splitter.split_text(text=text) | |
| embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME) | |
| vector_store = FAISS.from_texts(chunks, embedding=embeddings) | |
| llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) | |
| chain = load_qa_chain(llm=llm, chain_type="stuff") | |
| return chain, vector_store | |
| except Exception as e: | |
| print(f"Error analyzing PDF: {e}") | |
| return None, None | |
| def query_pdf(chain, vector_store, query): | |
| """ | |
| Queries the PDF using the QA chain. | |
| Args: | |
| chain: The Langchain QA chain object. | |
| vector_store: The FAISS vector store. | |
| query: The question to ask the PDF. | |
| Returns: | |
| The answer to the query. | |
| """ | |
| try: | |
| docs = vector_store.similarity_search(query=query, k=3) | |
| answer = chain.run(input_documents=docs, question=query) | |
| return answer | |
| except Exception as e: | |
| print(f"Error querying PDF: {e}") | |
| return "Sorry, I couldn't find an answer to your question in the PDF." | |
| import pandas as pd | |
| from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent | |
| from langchain_community.llms import HuggingFaceHub | |
| def analyze_spreadsheet(file_path): | |
| """ | |
| Analyzes a spreadsheet file and returns a question-answering agent. | |
| Args: | |
| file_path: The path to the spreadsheet file. | |
| Returns: | |
| A Langchain agent object. | |
| """ | |
| try: | |
| df = pd.read_csv(file_path) | |
| llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) | |
| agent = create_pandas_dataframe_agent(llm, df, verbose=True) | |
| return agent | |
| except Exception as e: | |
| print(f"Error analyzing spreadsheet: {e}") | |
| return None | |
| def query_spreadsheet(agent, query): | |
| """ | |
| Queries the spreadsheet using the agent. | |
| Args: | |
| agent: The Langchain agent object. | |
| query: The question to ask the spreadsheet. | |
| Returns: | |
| The answer to the query. | |
| """ | |
| try: | |
| answer = agent.run(query) | |
| return answer | |
| except Exception as e: | |
| print(f"Error querying spreadsheet: {e}") | |
| return "Sorry, I couldn't find an answer to your question in the spreadsheet." |