Spaces:

Poojashetty357
/

Pinecone-Index-1

Sleeping

File size: 3,164 Bytes

f9bc7ba
 
 
 
 
 
 
 
 
 
 
 
da4b000
 
 
 
 
 
f9bc7ba

import os
import logging
import sys
import gradio as gr
import requests

from pinecone import Pinecone, ServerlessSpec
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore

# --- Logging setup ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# ✅ Load OpenAI key
openai_key = os.getenv("OPENAI_API_KEY")
if not openai_key:
    raise ValueError("❌ OPENAI_API_KEY not found")

# --- API Key ---
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
    raise ValueError("❌ PINECONE_API_KEY not found in environment. Set it in Hugging Face secrets.")

# --- Pinecone Init ---
pc = Pinecone(api_key=api_key)
index_name = "quickstart"
dimension = 1536

# Create index if not exists
if index_name not in [idx["name"] for idx in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="euclidean",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

pinecone_index = pc.Index(index_name)

# --- Download Paul Graham essay from URL ---
os.makedirs("data/paul_graham", exist_ok=True)
file_path = "data/paul_graham/paul_graham_essay.txt"
if not os.path.exists(file_path):
    url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
    response = requests.get(url)
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(response.text)

# --- Load documents and create index ---
documents = SimpleDirectoryReader("data/paul_graham").load_data()
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
query_engine = index.as_query_engine()

# --- Sample Questions ---
sample_questions = [
    "Why did the author switch from philosophy to AI in college?",
    "What was the author's experience with the IBM 1401?",
    "How did Lisp influence the author’s thinking?",
    "Why did the author start painting while in grad school?",
    "What inspired the idea behind the Viaweb startup?"
]

# --- Query Function ---
def ask_question(dropdown_choice, manual_input):
    query = manual_input if manual_input else dropdown_choice
    if not query:
        return "❗ Please select or enter a question."
    try:
        response = query_engine.query(query)
        return str(response)
    except Exception as e:
        return f"Error: {str(e)}"

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("## 📘 Ask a Question about Paul Graham's Essay")

    with gr.Row():
        dropdown = gr.Dropdown(choices=sample_questions, label="⬇️ Choose a sample question (optional)", interactive=True)
        manual = gr.Textbox(label="📝 Or type your own question")

    submit_btn = gr.Button("Submit")
    answer = gr.Textbox(label="📖 Answer")

    submit_btn.click(fn=ask_question, inputs=[dropdown, manual], outputs=answer)

demo.launch()