🧠 Paul Graham Essay Q&A

import os
import sys
import logging
import gradio as gr
from pinecone import Pinecone, ServerlessSpec
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore

# --- Logging ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# --- API Key from Environment ---
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
    raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")

# --- Pinecone Setup ---
pc = Pinecone(api_key=api_key)
index_name = "quickstart"
dimension = 1536

# Create index if not exists
if index_name not in [idx['name'] for idx in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="euclidean",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

pinecone_index = pc.Index(index_name)

# --- Load Document ---
os.makedirs("data/paul_graham", exist_ok=True)
if not os.path.exists("data/paul_graham/paul_graham_essay.txt"):
    import requests
    url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
    r = requests.get(url)
    with open("data/paul_graham/paul_graham_essay.txt", "w") as f:
        f.write(r.text)

documents = SimpleDirectoryReader("data/paul_graham").load_data()

# --- Indexing ---
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
query_engine = index.as_query_engine()

# --- Query Function ---
def ask_question(prompt):
    try:
        response = query_engine.query(prompt)
        return str(response)
    except Exception as e:
        return f"❌ Error: {str(e)}"

# --- Gradio UI ---
with gr.Blocks(css="""
body {
    background-color: #f5f5dc;
    font-family: 'Georgia', 'Merriweather', serif;
}
h1, h2, h3 {
    color: #4e342e;
}
.gr-box, .gr-column, .gr-group {
    border-radius: 15px;
    padding: 20px;
    background-color: #fffaf0;
    box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1);
    margin-top: 10px;
}
textarea, input[type="text"] {
    background-color: #fffaf0;
    border: 1px solid #d2b48c;
    color: #4e342e;
    border-radius: 8px;
}
button {
    background-color: #a1887f;
    color: white;
    font-weight: bold;
    border-radius: 8px;
    transition: background-color 0.3s ease;
}
button:hover {
    background-color: #8d6e63;
}
.gr-button {
    border-radius: 8px !important;
}
""") as demo:
    
    with gr.Column():
        gr.Markdown("""
        <div style='text-align: center;'>
            <h1>🧠 Paul Graham Essay Q&A</h1>
            <div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
                Explore insights from Paul Graham's essay using semantic search powered by <strong>LlamaIndex</strong> + <strong>Pinecone</strong>.
            </div>
        </div>
        """)

        with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
            gr.Markdown("""
**Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers — even if they don't match word-for-word.
""")

        gr.Markdown("### 📖 Ask your question below:")

        with gr.Group():
            with gr.Row():
                user_input = gr.Textbox(
                    placeholder="E.g., What does Paul Graham say about startups?",
                    label="Your Question",
                    lines=2
                )

            with gr.Row():
                output = gr.Textbox(label="Answer", lines=6)

            with gr.Row():
                submit_btn = gr.Button("🔍 Search Essay")
                clear_btn = gr.Button("🧹 Clear")

            submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
            clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])

demo.launch()