DebopamC commited on
Commit
aaaffa8
·
verified ·
1 Parent(s): 78cbb80

Upload 16 files

Browse files
static/database_scema.txt CHANGED
@@ -55,8 +55,8 @@ Rows: 38279, Columns: 4
55
  CREATE TABLE orders (
56
  order_id VARCHAR(255) PRIMARY KEY,
57
  customer_id VARCHAR(255),
58
- order_purchase_timestamp TIMESTAMP,
59
- order_approved_at TIMESTAMP,
60
  FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
61
  );
62
  ```
 
55
  CREATE TABLE orders (
56
  order_id VARCHAR(255) PRIMARY KEY,
57
  customer_id VARCHAR(255),
58
+ order_purchase_timestamp VARCHAR(255),
59
+ order_approved_at VARCHAR(255),
60
  FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
61
  );
62
  ```
static/default_questions.txt CHANGED
@@ -132,6 +132,33 @@ These questions are generated by ChatGpt 4o. Copy and paste the questions in the
132
  ### Hard Questions
133
 
134
  1.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  **Question:**
136
  ```
137
  Write a Query to find the total revenue (sum of `price` + `shipping_charges`) generated for each product category in the `order_items` table, joined with the `products` table.
@@ -149,25 +176,5 @@ These questions are generated by ChatGpt 4o. Copy and paste the questions in the
149
  ORDER BY total_revenue DESC;
150
  ```
151
 
152
- ---
153
-
154
- 2.
155
- **Question:**
156
- ```
157
- Write a Query to identify the top 5 products with the highest total sales value ( sum of `price` ) across all orders.
158
- ```
159
- **Fine-Tuned Model Results:**
160
- ❌ **Fail**
161
- **Answer by ChatGpt 4o:**
162
- ```sql
163
- SELECT
164
- product_id,
165
- SUM(price) AS total_sales
166
- FROM order_items
167
- GROUP BY product_id
168
- ORDER BY total_sales DESC
169
- LIMIT 5;
170
- ```
171
- **Issue:** Misalignment with finer-grained filters or lack of handling for tied ranks.
172
 
173
- ---
 
132
  ### Hard Questions
133
 
134
  1.
135
+ **Question:**
136
+ ```
137
+ Select the most recent 1000 orders, their corresponding product details, customer details, and when they were purchased.
138
+ ```
139
+ **Fine-Tuned Model Results:**
140
+ ✅ **Pass**
141
+ **Answer by ChatGpt 4o:**
142
+ ```sql
143
+ SELECT
144
+ o.order_id,
145
+ o.order_purchase_timestamp,
146
+ c.customer_id,
147
+ c.customer_city,
148
+ c.customer_state,
149
+ oi.product_id,
150
+ p.product_category_name,
151
+ oi.price,
152
+ oi.shipping_charges
153
+ FROM orders o
154
+ JOIN customers c ON o.customer_id = c.customer_id
155
+ JOIN order_items oi ON o.order_id = oi.order_id
156
+ JOIN products p ON oi.product_id = p.product_id
157
+ ORDER BY o.order_purchase_timestamp DESC
158
+ LIMIT 1000;
159
+ ```
160
+
161
+ 2.
162
  **Question:**
163
  ```
164
  Write a Query to find the total revenue (sum of `price` + `shipping_charges`) generated for each product category in the `order_items` table, joined with the `products` table.
 
176
  ORDER BY total_revenue DESC;
177
  ```
178
 
179
+ ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
 
utils/__pycache__/llm_logic.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/llm_logic.cpython-312.pyc and b/utils/__pycache__/llm_logic.cpython-312.pyc differ
 
utils/llm_logic.py CHANGED
@@ -6,6 +6,8 @@ import multiprocessing
6
  from langchain_community.chat_models import ChatLlamaCpp
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
 
 
 
9
  local_model = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
10
 
11
  stop = [
@@ -31,7 +33,7 @@ stop = [
31
 
32
 
33
  def get_local_llm():
34
- cache_llm = ChatLlamaCpp(
35
  temperature=0.0,
36
  model_path=local_model,
37
  n_ctx=10000,
@@ -39,11 +41,19 @@ def get_local_llm():
39
  n_batch=1024,
40
  max_tokens=500,
41
  n_threads=multiprocessing.cpu_count() - 1,
42
- top_p=0.97,
43
  verbose=False,
44
  stop=stop,
45
  )
46
- return cache_llm
 
 
 
 
 
 
 
 
47
 
48
 
49
  local_llm = get_local_llm()
@@ -51,14 +61,16 @@ local_llm = get_local_llm()
51
 
52
  def get_gemini_llm():
53
  gemini = ChatGoogleGenerativeAI(
54
- model="gemini-1.5-flash",
55
  temperature=0,
56
  max_tokens=None,
57
  timeout=None,
58
  max_retries=2,
 
59
  )
60
  return gemini
61
 
 
62
  gemini_llm = get_gemini_llm()
63
 
64
 
@@ -115,13 +127,15 @@ Rows: 38279, Columns: 5
115
  | 27442 | hrjNaMt3Wyo5 | toys | 1850 | 37 | 22 | 40 |
116
 
117
  Rows: 38279, Columns: 6
 
118
  """
119
 
120
  # Improved SQL generation prompt
121
  sql_system_prompt = """You are a highly skilled natural language to SQL translator. Your goal is to generate accurate SQL queries based on the provided database schema. You must only return the SQL query and no other text or explanations.
122
-
123
  DATABASE SCHEMA:
124
  {db_schema}
 
 
125
  """
126
  sql_chat_template = """
127
 
@@ -165,7 +179,7 @@ QUESTION: {question}
165
  """
166
 
167
 
168
- def classify_question(question: str, llm , use_default_schema: bool = True):
169
  classification_system_prompt_local = classification_system_prompt # Initialize here
170
  if use_default_schema:
171
  classification_system_prompt_local = classification_system_prompt_local.format(
@@ -184,7 +198,7 @@ def classify_question(question: str, llm , use_default_schema: bool = True):
184
  return response.content.strip().upper()
185
 
186
 
187
- def generate_llm_response(prompt: str, llm: str, use_default_schema: bool = True):
188
 
189
  if llm == "gemini":
190
  llm = gemini_llm
 
6
  from langchain_community.chat_models import ChatLlamaCpp
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
 
9
+ from langchain_ollama import ChatOllama
10
+
11
  local_model = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
12
 
13
  stop = [
 
33
 
34
 
35
  def get_local_llm():
36
+ llm = ChatLlamaCpp(
37
  temperature=0.0,
38
  model_path=local_model,
39
  n_ctx=10000,
 
41
  n_batch=1024,
42
  max_tokens=500,
43
  n_threads=multiprocessing.cpu_count() - 1,
44
+ top_p=0.95,
45
  verbose=False,
46
  stop=stop,
47
  )
48
+ # llm = ChatOllama(
49
+ # model="qwen2.5-coder:3b",
50
+ # temperature=0.0,
51
+ # num_predict=150,
52
+ # top_p=0.95,
53
+ # stop=stop,
54
+ # )
55
+
56
+ return llm
57
 
58
 
59
  local_llm = get_local_llm()
 
61
 
62
  def get_gemini_llm():
63
  gemini = ChatGoogleGenerativeAI(
64
+ model="gemini-2.0-flash-exp",
65
  temperature=0,
66
  max_tokens=None,
67
  timeout=None,
68
  max_retries=2,
69
+ top_p=0.95,
70
  )
71
  return gemini
72
 
73
+
74
  gemini_llm = get_gemini_llm()
75
 
76
 
 
127
  | 27442 | hrjNaMt3Wyo5 | toys | 1850 | 37 | 22 | 40 |
128
 
129
  Rows: 38279, Columns: 6
130
+
131
  """
132
 
133
  # Improved SQL generation prompt
134
  sql_system_prompt = """You are a highly skilled natural language to SQL translator. Your goal is to generate accurate SQL queries based on the provided database schema. You must only return the SQL query and no other text or explanations.
 
135
  DATABASE SCHEMA:
136
  {db_schema}
137
+
138
+ The timestamp columns are of type 'VarChar'. I am using DuckDB to execute the queries.
139
  """
140
  sql_chat_template = """
141
 
 
179
  """
180
 
181
 
182
+ def classify_question(question: str, llm, use_default_schema: bool = True):
183
  classification_system_prompt_local = classification_system_prompt # Initialize here
184
  if use_default_schema:
185
  classification_system_prompt_local = classification_system_prompt_local.format(
 
198
  return response.content.strip().upper()
199
 
200
 
201
+ def generate_llm_response(prompt: str, llm: str, use_default_schema: bool = True):
202
 
203
  if llm == "gemini":
204
  llm = gemini_llm
🤖SQL_Agent.py CHANGED
@@ -66,6 +66,7 @@ st.markdown(
66
  unsafe_allow_html=True,
67
  )
68
 
 
69
  with st.popover("Click here to see Database Schema", use_container_width=True):
70
  uploaded_df_schema = st.session_state.get("uploaded_df_schema", False)
71
 
@@ -155,14 +156,14 @@ with col1:
155
  with col2:
156
  llm_option_radio = st.radio(
157
  "Choose LLM Model",
158
- ["Gemini 1.5-Flash", "FineTuned Qwen2.5-Coder-3B for SQL"],
159
  captions=[
160
  "Used via API",
161
  "Run Locally on this Server. Extremely Slow because of Free vCPUs",
162
  ],
163
  label_visibility="collapsed",
164
  )
165
- if llm_option_radio == "Gemini 1.5-Flash":
166
  llm_option = "gemini"
167
  else:
168
  llm_option = "qwen"
@@ -236,7 +237,7 @@ if st.session_state.conversation_turns < MAX_TURNS:
236
  spinner_text = ""
237
  if llm_option == "gemini":
238
  spinner_text = (
239
- "Using Gemini-1.5-Flash to run your query. Please wait...😊"
240
  )
241
  else:
242
  spinner_text = "I know it is taking a lot of time. To run the model I'm using `Free` small vCPUs provided by `HuggingFace Spaces` for deployment. Thank you so much for your patience😊"
@@ -314,3 +315,6 @@ else:
314
  st.chat_input(
315
  "Ask me a SQL query question", disabled=True
316
  ) # Disable the input field
 
 
 
 
66
  unsafe_allow_html=True,
67
  )
68
 
69
+
70
  with st.popover("Click here to see Database Schema", use_container_width=True):
71
  uploaded_df_schema = st.session_state.get("uploaded_df_schema", False)
72
 
 
156
  with col2:
157
  llm_option_radio = st.radio(
158
  "Choose LLM Model",
159
+ ["Gemini-2.0-Flash-Exp", "FineTuned Qwen2.5-Coder-3B for SQL"],
160
  captions=[
161
  "Used via API",
162
  "Run Locally on this Server. Extremely Slow because of Free vCPUs",
163
  ],
164
  label_visibility="collapsed",
165
  )
166
+ if llm_option_radio == "Gemini-2.0-Flash-Exp":
167
  llm_option = "gemini"
168
  else:
169
  llm_option = "qwen"
 
237
  spinner_text = ""
238
  if llm_option == "gemini":
239
  spinner_text = (
240
+ "Using Gemini-2.0-Flash-Exp to run your query. Please wait...😊"
241
  )
242
  else:
243
  spinner_text = "I know it is taking a lot of time. To run the model I'm using `Free` small vCPUs provided by `HuggingFace Spaces` for deployment. Thank you so much for your patience😊"
 
315
  st.chat_input(
316
  "Ask me a SQL query question", disabled=True
317
  ) # Disable the input field
318
+
319
+ with st.sidebar:
320
+ st.caption("Made with ❤️ by @Debopam_Chowdhury")