Shafaq25 commited on
Commit
5ec5beb
Β·
verified Β·
1 Parent(s): 312de70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -34
app.py CHANGED
@@ -3,30 +3,21 @@ import sys
3
  import logging
4
  import gradio as gr
5
  from pinecone import Pinecone, ServerlessSpec
6
- from langchain_pinecone import PineconeVectorStore
7
- from langchain_community.document_loaders import TextLoader
8
- from langchain.text_splitter import CharacterTextSplitter
9
- from langchain.chains import RetrievalQA
10
- from langchain_community.llms import OpenAI
11
- from langchain_openai import OpenAIEmbeddings
12
 
13
  # --- Logging ---
14
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
15
 
16
- # --- Environment Variables ---
17
  api_key = os.getenv("PINECONE_API_KEY")
18
- openai_api_key = os.getenv("OPENAI_API_KEY")
19
-
20
  if not api_key:
21
  raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")
22
- if not openai_api_key:
23
- raise ValueError("Please set the OPENAI_API_KEY as an environment variable.")
24
- os.environ["OPENAI_API_KEY"] = openai_api_key
25
 
26
  # --- Pinecone Setup ---
 
27
  index_name = "quickstart"
28
  dimension = 1536
29
- pc = Pinecone(api_key=api_key)
30
 
31
  # Create index if not exists
32
  if index_name not in [idx['name'] for idx in pc.list_indexes()]:
@@ -37,52 +28,87 @@ if index_name not in [idx['name'] for idx in pc.list_indexes()]:
37
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
38
  )
39
 
40
- # --- Load and Process Document ---
 
 
41
  os.makedirs("data/paul_graham", exist_ok=True)
42
- file_path = "data/paul_graham/paul_graham_essay.txt"
43
- if not os.path.exists(file_path):
44
  import requests
45
  url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
46
  r = requests.get(url)
47
- with open(file_path, "w") as f:
48
  f.write(r.text)
49
 
50
- loader = TextLoader(file_path)
51
- documents = loader.load()
52
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
53
- texts = text_splitter.split_documents(documents)
54
-
55
- # --- Embedding and Vector Store ---
56
- embeddings = OpenAIEmbeddings()
57
- docsearch = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)
58
 
59
- # --- Query Engine Setup ---
60
- llm = OpenAI()
61
- retriever = docsearch.as_retriever()
62
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
 
63
 
64
  # --- Query Function ---
65
  def ask_question(prompt):
66
  try:
67
- response = qa.run(prompt)
68
  return str(response)
69
  except Exception as e:
70
  return f"❌ Error: {str(e)}"
71
 
72
  # --- Gradio UI ---
73
- with gr.Blocks(css="""body { background-color: #f5f5dc; font-family: 'Georgia', 'Merriweather', serif;}h1, h2, h3 { color: #4e342e;}.gr-box, .gr-column, .gr-group { border-radius: 15px; padding: 20px; background-color: #fffaf0; box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1); margin-top: 10px;}textarea, input[type="text"] { background-color: #fffaf0; border: 1px solid #d2b48c; color: #4e342e; border-radius: 8px;}button { background-color: #a1887f; color: white; font-weight: bold; border-radius: 8px; transition: background-color 0.3s ease;}button:hover { background-color: #8d6e63;}.gr-button { border-radius: 8px !important;}""") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  with gr.Column():
75
  gr.Markdown("""
76
  <div style='text-align: center;'>
77
  <h1>🧠 Paul Graham Essay Q&A</h1>
78
  <div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
79
- Explore insights from Paul Graham's essay using semantic search powered by <strong>LangChain</strong> + <strong>Pinecone</strong>.
80
  </div>
81
  </div>
82
  """)
 
83
  with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
84
- gr.Markdown("""**Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers β€” even if they don't match word-for-word.""")
 
 
 
85
  gr.Markdown("### πŸ“– Ask your question below:")
 
86
  with gr.Group():
87
  with gr.Row():
88
  user_input = gr.Textbox(
@@ -90,12 +116,15 @@ with gr.Blocks(css="""body { background-color: #f5f5dc; font-family: 'Geor
90
  label="Your Question",
91
  lines=2
92
  )
 
93
  with gr.Row():
94
  output = gr.Textbox(label="Answer", lines=6)
 
95
  with gr.Row():
96
  submit_btn = gr.Button("πŸ” Search Essay")
97
  clear_btn = gr.Button("🧹 Clear")
 
98
  submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
99
  clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
100
 
101
- demo.launch()
 
3
  import logging
4
  import gradio as gr
5
  from pinecone import Pinecone, ServerlessSpec
6
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
7
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
 
 
 
 
8
 
9
  # --- Logging ---
10
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
11
 
12
+ # --- API Key from Environment ---
13
  api_key = os.getenv("PINECONE_API_KEY")
 
 
14
  if not api_key:
15
  raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")
 
 
 
16
 
17
  # --- Pinecone Setup ---
18
+ pc = Pinecone(api_key=api_key)
19
  index_name = "quickstart"
20
  dimension = 1536
 
21
 
22
  # Create index if not exists
23
  if index_name not in [idx['name'] for idx in pc.list_indexes()]:
 
28
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
29
  )
30
 
31
+ pinecone_index = pc.Index(index_name)
32
+
33
+ # --- Load Document ---
34
  os.makedirs("data/paul_graham", exist_ok=True)
35
+ if not os.path.exists("data/paul_graham/paul_graham_essay.txt"):
 
36
  import requests
37
  url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
38
  r = requests.get(url)
39
+ with open("data/paul_graham/paul_graham_essay.txt", "w") as f:
40
  f.write(r.text)
41
 
42
+ documents = SimpleDirectoryReader("data/paul_graham").load_data()
 
 
 
 
 
 
 
43
 
44
+ # --- Indexing ---
45
+ vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
46
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
47
+ index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
48
+ query_engine = index.as_query_engine()
49
 
50
  # --- Query Function ---
51
  def ask_question(prompt):
52
  try:
53
+ response = query_engine.query(prompt)
54
  return str(response)
55
  except Exception as e:
56
  return f"❌ Error: {str(e)}"
57
 
58
  # --- Gradio UI ---
59
+ with gr.Blocks(css="""
60
+ body {
61
+ background-color: #f5f5dc;
62
+ font-family: 'Georgia', 'Merriweather', serif;
63
+ }
64
+ h1, h2, h3 {
65
+ color: #4e342e;
66
+ }
67
+ .gr-box, .gr-column, .gr-group {
68
+ border-radius: 15px;
69
+ padding: 20px;
70
+ background-color: #fffaf0;
71
+ box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1);
72
+ margin-top: 10px;
73
+ }
74
+ textarea, input[type="text"] {
75
+ background-color: #fffaf0;
76
+ border: 1px solid #d2b48c;
77
+ color: #4e342e;
78
+ border-radius: 8px;
79
+ }
80
+ button {
81
+ background-color: #a1887f;
82
+ color: white;
83
+ font-weight: bold;
84
+ border-radius: 8px;
85
+ transition: background-color 0.3s ease;
86
+ }
87
+ button:hover {
88
+ background-color: #8d6e63;
89
+ }
90
+ .gr-button {
91
+ border-radius: 8px !important;
92
+ }
93
+ """) as demo:
94
+
95
  with gr.Column():
96
  gr.Markdown("""
97
  <div style='text-align: center;'>
98
  <h1>🧠 Paul Graham Essay Q&A</h1>
99
  <div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
100
+ Explore insights from Paul Graham's essay using semantic search powered by <strong>LlamaIndex</strong> + <strong>Pinecone</strong>.
101
  </div>
102
  </div>
103
  """)
104
+
105
  with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
106
+ gr.Markdown("""
107
+ **Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers β€” even if they don't match word-for-word.
108
+ """)
109
+
110
  gr.Markdown("### πŸ“– Ask your question below:")
111
+
112
  with gr.Group():
113
  with gr.Row():
114
  user_input = gr.Textbox(
 
116
  label="Your Question",
117
  lines=2
118
  )
119
+
120
  with gr.Row():
121
  output = gr.Textbox(label="Answer", lines=6)
122
+
123
  with gr.Row():
124
  submit_btn = gr.Button("πŸ” Search Essay")
125
  clear_btn = gr.Button("🧹 Clear")
126
+
127
  submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
128
  clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
129
 
130
+ demo.launch()