Shafaq25's picture
Update app.py
5ec5beb verified
raw
history blame
4.17 kB
import os
import sys
import logging
import gradio as gr
from pinecone import Pinecone, ServerlessSpec
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore
# --- Logging ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# --- API Key from Environment ---
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")
# --- Pinecone Setup ---
pc = Pinecone(api_key=api_key)
index_name = "quickstart"
dimension = 1536
# Create index if not exists
if index_name not in [idx['name'] for idx in pc.list_indexes()]:
pc.create_index(
name=index_name,
dimension=dimension,
metric="euclidean",
spec=ServerlessSpec(cloud="aws", region="us-east-1")
)
pinecone_index = pc.Index(index_name)
# --- Load Document ---
os.makedirs("data/paul_graham", exist_ok=True)
if not os.path.exists("data/paul_graham/paul_graham_essay.txt"):
import requests
url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
r = requests.get(url)
with open("data/paul_graham/paul_graham_essay.txt", "w") as f:
f.write(r.text)
documents = SimpleDirectoryReader("data/paul_graham").load_data()
# --- Indexing ---
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
query_engine = index.as_query_engine()
# --- Query Function ---
def ask_question(prompt):
try:
response = query_engine.query(prompt)
return str(response)
except Exception as e:
return f"❌ Error: {str(e)}"
# --- Gradio UI ---
with gr.Blocks(css="""
body {
background-color: #f5f5dc;
font-family: 'Georgia', 'Merriweather', serif;
}
h1, h2, h3 {
color: #4e342e;
}
.gr-box, .gr-column, .gr-group {
border-radius: 15px;
padding: 20px;
background-color: #fffaf0;
box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1);
margin-top: 10px;
}
textarea, input[type="text"] {
background-color: #fffaf0;
border: 1px solid #d2b48c;
color: #4e342e;
border-radius: 8px;
}
button {
background-color: #a1887f;
color: white;
font-weight: bold;
border-radius: 8px;
transition: background-color 0.3s ease;
}
button:hover {
background-color: #8d6e63;
}
.gr-button {
border-radius: 8px !important;
}
""") as demo:
with gr.Column():
gr.Markdown("""
<div style='text-align: center;'>
<h1>🧠 Paul Graham Essay Q&A</h1>
<div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
Explore insights from Paul Graham's essay using semantic search powered by <strong>LlamaIndex</strong> + <strong>Pinecone</strong>.
</div>
</div>
""")
with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
gr.Markdown("""
**Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers β€” even if they don't match word-for-word.
""")
gr.Markdown("### πŸ“– Ask your question below:")
with gr.Group():
with gr.Row():
user_input = gr.Textbox(
placeholder="E.g., What does Paul Graham say about startups?",
label="Your Question",
lines=2
)
with gr.Row():
output = gr.Textbox(label="Answer", lines=6)
with gr.Row():
submit_btn = gr.Button("πŸ” Search Essay")
clear_btn = gr.Button("🧹 Clear")
submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
demo.launch()