Spaces:

Shafaq25
/

Llamaindex_Pinecone_RAG

Sleeping

App Files Files Community

Shafaq25 commited on 28 days ago

Commit

5ec5beb

verified ·

1 Parent(s): 312de70

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -34

app.py CHANGED Viewed

@@ -3,30 +3,21 @@ import sys
 import logging
 import gradio as gr
 from pinecone import Pinecone, ServerlessSpec
-from langchain_pinecone import PineconeVectorStore
-from langchain_community.document_loaders import TextLoader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.chains import RetrievalQA
-from langchain_community.llms import OpenAI
-from langchain_openai import OpenAIEmbeddings
 # --- Logging ---
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-# --- Environment Variables ---
 api_key = os.getenv("PINECONE_API_KEY")
-openai_api_key = os.getenv("OPENAI_API_KEY")
 if not api_key:
     raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")
-if not openai_api_key:
-    raise ValueError("Please set the OPENAI_API_KEY as an environment variable.")
-os.environ["OPENAI_API_KEY"] = openai_api_key
 # --- Pinecone Setup ---
 index_name = "quickstart"
 dimension = 1536
-pc = Pinecone(api_key=api_key)
 # Create index if not exists
 if index_name not in [idx['name'] for idx in pc.list_indexes()]:
@@ -37,52 +28,87 @@ if index_name not in [idx['name'] for idx in pc.list_indexes()]:
         spec=ServerlessSpec(cloud="aws", region="us-east-1")
     )
-# --- Load and Process Document ---
 os.makedirs("data/paul_graham", exist_ok=True)
-file_path = "data/paul_graham/paul_graham_essay.txt"
-if not os.path.exists(file_path):
     import requests
     url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
     r = requests.get(url)
-    with open(file_path, "w") as f:
         f.write(r.text)
-loader = TextLoader(file_path)
-documents = loader.load()
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-texts = text_splitter.split_documents(documents)
-# --- Embedding and Vector Store ---
-embeddings = OpenAIEmbeddings()
-docsearch = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)
-# --- Query Engine Setup ---
-llm = OpenAI()
-retriever = docsearch.as_retriever()
-qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
 # --- Query Function ---
 def ask_question(prompt):
     try:
-        response = qa.run(prompt)
         return str(response)
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # --- Gradio UI ---
-with gr.Blocks(css="""body {    background-color: #f5f5dc;    font-family: 'Georgia', 'Merriweather', serif;}h1, h2, h3 {    color: #4e342e;}.gr-box, .gr-column, .gr-group {    border-radius: 15px;    padding: 20px;    background-color: #fffaf0;    box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1);    margin-top: 10px;}textarea, input[type="text"] {    background-color: #fffaf0;    border: 1px solid #d2b48c;    color: #4e342e;    border-radius: 8px;}button {    background-color: #a1887f;    color: white;    font-weight: bold;    border-radius: 8px;    transition: background-color 0.3s ease;}button:hover {    background-color: #8d6e63;}.gr-button {    border-radius: 8px !important;}""") as demo:
     with gr.Column():
         gr.Markdown("""
         <div style='text-align: center;'>
             <h1>🧠 Paul Graham Essay Q&A</h1>
             <div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
-                Explore insights from Paul Graham's essay using semantic search powered by <strong>LangChain</strong> + <strong>Pinecone</strong>.
             </div>
         </div>
         """)
         with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
-            gr.Markdown("""**Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers — even if they don't match word-for-word.""")
         gr.Markdown("### 📖 Ask your question below:")
         with gr.Group():
             with gr.Row():
                 user_input = gr.Textbox(
@@ -90,12 +116,15 @@ with gr.Blocks(css="""body {    background-color: #f5f5dc;    font-family: 'Geor
                     label="Your Question",
                     lines=2
                 )
             with gr.Row():
                 output = gr.Textbox(label="Answer", lines=6)
             with gr.Row():
                 submit_btn = gr.Button("🔍 Search Essay")
                 clear_btn = gr.Button("🧹 Clear")
             submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
             clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
-demo.launch()

 import logging
 import gradio as gr
 from pinecone import Pinecone, ServerlessSpec
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
+from llama_index.vector_stores.pinecone import PineconeVectorStore
 # --- Logging ---
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+# --- API Key from Environment ---
 api_key = os.getenv("PINECONE_API_KEY")
 if not api_key:
     raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")
 # --- Pinecone Setup ---
+pc = Pinecone(api_key=api_key)
 index_name = "quickstart"
 dimension = 1536
 # Create index if not exists
 if index_name not in [idx['name'] for idx in pc.list_indexes()]:
         spec=ServerlessSpec(cloud="aws", region="us-east-1")
     )
+pinecone_index = pc.Index(index_name)
+# --- Load Document ---
 os.makedirs("data/paul_graham", exist_ok=True)
+if not os.path.exists("data/paul_graham/paul_graham_essay.txt"):
     import requests
     url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
     r = requests.get(url)
+    with open("data/paul_graham/paul_graham_essay.txt", "w") as f:
         f.write(r.text)
+documents = SimpleDirectoryReader("data/paul_graham").load_data()
+# --- Indexing ---
+vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
+query_engine = index.as_query_engine()
 # --- Query Function ---
 def ask_question(prompt):
     try:
+        response = query_engine.query(prompt)
         return str(response)
     except Exception as e:
         return f"❌ Error: {str(e)}"
 # --- Gradio UI ---
+with gr.Blocks(css="""
+body {
+    background-color: #f5f5dc;
+    font-family: 'Georgia', 'Merriweather', serif;
+}
+h1, h2, h3 {
+    color: #4e342e;
+}
+.gr-box, .gr-column, .gr-group {
+    border-radius: 15px;
+    padding: 20px;
+    background-color: #fffaf0;
+    box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1);
+    margin-top: 10px;
+}
+textarea, input[type="text"] {
+    background-color: #fffaf0;
+    border: 1px solid #d2b48c;
+    color: #4e342e;
+    border-radius: 8px;
+}
+button {
+    background-color: #a1887f;
+    color: white;
+    font-weight: bold;
+    border-radius: 8px;
+    transition: background-color 0.3s ease;
+}
+button:hover {
+    background-color: #8d6e63;
+}
+.gr-button {
+    border-radius: 8px !important;
+}
+""") as demo:
     with gr.Column():
         gr.Markdown("""
         <div style='text-align: center;'>
             <h1>🧠 Paul Graham Essay Q&A</h1>
             <div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
+                Explore insights from Paul Graham's essay using semantic search powered by <strong>LlamaIndex</strong> + <strong>Pinecone</strong>.
             </div>
         </div>
         """)
         with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
+            gr.Markdown("""
+**Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers — even if they don't match word-for-word.
+""")
         gr.Markdown("### 📖 Ask your question below:")
         with gr.Group():
             with gr.Row():
                 user_input = gr.Textbox(
                     label="Your Question",
                     lines=2
                 )
             with gr.Row():
                 output = gr.Textbox(label="Answer", lines=6)
             with gr.Row():
                 submit_btn = gr.Button("🔍 Search Essay")
                 clear_btn = gr.Button("🧹 Clear")
             submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
             clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
+demo.launch()