rag-tool

Sleeping

rag-tool / app.py

Update app.py

e181ae7 verified about 1 year ago

1.59 kB

	import os
	#!pip install -q gradio langchain pypdf chromadb
	import gradio as gr
	from langchain.vectorstores import Chroma
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings

	# Use Hugging Face Inference API embeddings
	inference_api_key = os.environ['HF']
	api_hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
	api_key=inference_api_key,
	model_name="sentence-transformers/all-MiniLM-l6-v2"
	)

	# Load and process the PDF files
	loader = PyPDFLoader("./new_papers/ALiBi.pdf")
	documents = loader.load()
	print("-----------")
	print(documents[0])
	print("-----------")


	# Extract the embedding arrays from the PDF documents
	embeddings = []
	for doc in documents:
	embeddings.extend(doc['embeddings'])

	# Create Chroma vector store for API embeddings
	api_db = Chroma.from_texts(embeddings, api_hf_embeddings, collection_name="api-collection")


	# Define the PDF retrieval function
	def pdf_retrieval(query):
	# Run the query through the retriever
	response = api_db.similarity_search(query)
	return response

	# Create Gradio interface for the API retriever
	# Create Gradio interface for the API retriever
	api_tool = gr.Interface(
	fn=pdf_retrieval,
	inputs=[gr.Textbox()],
	outputs=gr.Textbox(),
	live=True,
	title="API PDF Retrieval Tool",
	description="This tool indexes PDF documents and retrieves relevant answers based on a given query (HF Inference API Embeddings).",
	)

	# Launch the Gradio interface
	#api_tool.launch()