Spaces:
Runtime error
Runtime error
Upload 16 files
Browse files- static/database_scema.txt +2 -2
- static/default_questions.txt +28 -21
- utils/__pycache__/llm_logic.cpython-312.pyc +0 -0
- utils/llm_logic.py +21 -7
- 🤖SQL_Agent.py +7 -3
static/database_scema.txt
CHANGED
@@ -55,8 +55,8 @@ Rows: 38279, Columns: 4
|
|
55 |
CREATE TABLE orders (
|
56 |
order_id VARCHAR(255) PRIMARY KEY,
|
57 |
customer_id VARCHAR(255),
|
58 |
-
order_purchase_timestamp
|
59 |
-
order_approved_at
|
60 |
FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
|
61 |
);
|
62 |
```
|
|
|
55 |
CREATE TABLE orders (
|
56 |
order_id VARCHAR(255) PRIMARY KEY,
|
57 |
customer_id VARCHAR(255),
|
58 |
+
order_purchase_timestamp VARCHAR(255),
|
59 |
+
order_approved_at VARCHAR(255),
|
60 |
FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
|
61 |
);
|
62 |
```
|
static/default_questions.txt
CHANGED
@@ -132,6 +132,33 @@ These questions are generated by ChatGpt 4o. Copy and paste the questions in the
|
|
132 |
### Hard Questions
|
133 |
|
134 |
1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
**Question:**
|
136 |
```
|
137 |
Write a Query to find the total revenue (sum of `price` + `shipping_charges`) generated for each product category in the `order_items` table, joined with the `products` table.
|
@@ -149,25 +176,5 @@ These questions are generated by ChatGpt 4o. Copy and paste the questions in the
|
|
149 |
ORDER BY total_revenue DESC;
|
150 |
```
|
151 |
|
152 |
-
---
|
153 |
-
|
154 |
-
2.
|
155 |
-
**Question:**
|
156 |
-
```
|
157 |
-
Write a Query to identify the top 5 products with the highest total sales value ( sum of `price` ) across all orders.
|
158 |
-
```
|
159 |
-
**Fine-Tuned Model Results:**
|
160 |
-
❌ **Fail**
|
161 |
-
**Answer by ChatGpt 4o:**
|
162 |
-
```sql
|
163 |
-
SELECT
|
164 |
-
product_id,
|
165 |
-
SUM(price) AS total_sales
|
166 |
-
FROM order_items
|
167 |
-
GROUP BY product_id
|
168 |
-
ORDER BY total_sales DESC
|
169 |
-
LIMIT 5;
|
170 |
-
```
|
171 |
-
**Issue:** Misalignment with finer-grained filters or lack of handling for tied ranks.
|
172 |
|
173 |
-
---
|
|
|
132 |
### Hard Questions
|
133 |
|
134 |
1.
|
135 |
+
**Question:**
|
136 |
+
```
|
137 |
+
Select the most recent 1000 orders, their corresponding product details, customer details, and when they were purchased.
|
138 |
+
```
|
139 |
+
**Fine-Tuned Model Results:**
|
140 |
+
✅ **Pass**
|
141 |
+
**Answer by ChatGpt 4o:**
|
142 |
+
```sql
|
143 |
+
SELECT
|
144 |
+
o.order_id,
|
145 |
+
o.order_purchase_timestamp,
|
146 |
+
c.customer_id,
|
147 |
+
c.customer_city,
|
148 |
+
c.customer_state,
|
149 |
+
oi.product_id,
|
150 |
+
p.product_category_name,
|
151 |
+
oi.price,
|
152 |
+
oi.shipping_charges
|
153 |
+
FROM orders o
|
154 |
+
JOIN customers c ON o.customer_id = c.customer_id
|
155 |
+
JOIN order_items oi ON o.order_id = oi.order_id
|
156 |
+
JOIN products p ON oi.product_id = p.product_id
|
157 |
+
ORDER BY o.order_purchase_timestamp DESC
|
158 |
+
LIMIT 1000;
|
159 |
+
```
|
160 |
+
|
161 |
+
2.
|
162 |
**Question:**
|
163 |
```
|
164 |
Write a Query to find the total revenue (sum of `price` + `shipping_charges`) generated for each product category in the `order_items` table, joined with the `products` table.
|
|
|
176 |
ORDER BY total_revenue DESC;
|
177 |
```
|
178 |
|
179 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
|
utils/__pycache__/llm_logic.cpython-312.pyc
CHANGED
Binary files a/utils/__pycache__/llm_logic.cpython-312.pyc and b/utils/__pycache__/llm_logic.cpython-312.pyc differ
|
|
utils/llm_logic.py
CHANGED
@@ -6,6 +6,8 @@ import multiprocessing
|
|
6 |
from langchain_community.chat_models import ChatLlamaCpp
|
7 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
8 |
|
|
|
|
|
9 |
local_model = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
|
10 |
|
11 |
stop = [
|
@@ -31,7 +33,7 @@ stop = [
|
|
31 |
|
32 |
|
33 |
def get_local_llm():
|
34 |
-
|
35 |
temperature=0.0,
|
36 |
model_path=local_model,
|
37 |
n_ctx=10000,
|
@@ -39,11 +41,19 @@ def get_local_llm():
|
|
39 |
n_batch=1024,
|
40 |
max_tokens=500,
|
41 |
n_threads=multiprocessing.cpu_count() - 1,
|
42 |
-
top_p=0.
|
43 |
verbose=False,
|
44 |
stop=stop,
|
45 |
)
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
|
49 |
local_llm = get_local_llm()
|
@@ -51,14 +61,16 @@ local_llm = get_local_llm()
|
|
51 |
|
52 |
def get_gemini_llm():
|
53 |
gemini = ChatGoogleGenerativeAI(
|
54 |
-
model="gemini-
|
55 |
temperature=0,
|
56 |
max_tokens=None,
|
57 |
timeout=None,
|
58 |
max_retries=2,
|
|
|
59 |
)
|
60 |
return gemini
|
61 |
|
|
|
62 |
gemini_llm = get_gemini_llm()
|
63 |
|
64 |
|
@@ -115,13 +127,15 @@ Rows: 38279, Columns: 5
|
|
115 |
| 27442 | hrjNaMt3Wyo5 | toys | 1850 | 37 | 22 | 40 |
|
116 |
|
117 |
Rows: 38279, Columns: 6
|
|
|
118 |
"""
|
119 |
|
120 |
# Improved SQL generation prompt
|
121 |
sql_system_prompt = """You are a highly skilled natural language to SQL translator. Your goal is to generate accurate SQL queries based on the provided database schema. You must only return the SQL query and no other text or explanations.
|
122 |
-
|
123 |
DATABASE SCHEMA:
|
124 |
{db_schema}
|
|
|
|
|
125 |
"""
|
126 |
sql_chat_template = """
|
127 |
|
@@ -165,7 +179,7 @@ QUESTION: {question}
|
|
165 |
"""
|
166 |
|
167 |
|
168 |
-
def classify_question(question: str, llm
|
169 |
classification_system_prompt_local = classification_system_prompt # Initialize here
|
170 |
if use_default_schema:
|
171 |
classification_system_prompt_local = classification_system_prompt_local.format(
|
@@ -184,7 +198,7 @@ def classify_question(question: str, llm , use_default_schema: bool = True):
|
|
184 |
return response.content.strip().upper()
|
185 |
|
186 |
|
187 |
-
def generate_llm_response(prompt: str, llm: str,
|
188 |
|
189 |
if llm == "gemini":
|
190 |
llm = gemini_llm
|
|
|
6 |
from langchain_community.chat_models import ChatLlamaCpp
|
7 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
8 |
|
9 |
+
from langchain_ollama import ChatOllama
|
10 |
+
|
11 |
local_model = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
|
12 |
|
13 |
stop = [
|
|
|
33 |
|
34 |
|
35 |
def get_local_llm():
|
36 |
+
llm = ChatLlamaCpp(
|
37 |
temperature=0.0,
|
38 |
model_path=local_model,
|
39 |
n_ctx=10000,
|
|
|
41 |
n_batch=1024,
|
42 |
max_tokens=500,
|
43 |
n_threads=multiprocessing.cpu_count() - 1,
|
44 |
+
top_p=0.95,
|
45 |
verbose=False,
|
46 |
stop=stop,
|
47 |
)
|
48 |
+
# llm = ChatOllama(
|
49 |
+
# model="qwen2.5-coder:3b",
|
50 |
+
# temperature=0.0,
|
51 |
+
# num_predict=150,
|
52 |
+
# top_p=0.95,
|
53 |
+
# stop=stop,
|
54 |
+
# )
|
55 |
+
|
56 |
+
return llm
|
57 |
|
58 |
|
59 |
local_llm = get_local_llm()
|
|
|
61 |
|
62 |
def get_gemini_llm():
|
63 |
gemini = ChatGoogleGenerativeAI(
|
64 |
+
model="gemini-2.0-flash-exp",
|
65 |
temperature=0,
|
66 |
max_tokens=None,
|
67 |
timeout=None,
|
68 |
max_retries=2,
|
69 |
+
top_p=0.95,
|
70 |
)
|
71 |
return gemini
|
72 |
|
73 |
+
|
74 |
gemini_llm = get_gemini_llm()
|
75 |
|
76 |
|
|
|
127 |
| 27442 | hrjNaMt3Wyo5 | toys | 1850 | 37 | 22 | 40 |
|
128 |
|
129 |
Rows: 38279, Columns: 6
|
130 |
+
|
131 |
"""
|
132 |
|
133 |
# Improved SQL generation prompt
|
134 |
sql_system_prompt = """You are a highly skilled natural language to SQL translator. Your goal is to generate accurate SQL queries based on the provided database schema. You must only return the SQL query and no other text or explanations.
|
|
|
135 |
DATABASE SCHEMA:
|
136 |
{db_schema}
|
137 |
+
|
138 |
+
The timestamp columns are of type 'VarChar'. I am using DuckDB to execute the queries.
|
139 |
"""
|
140 |
sql_chat_template = """
|
141 |
|
|
|
179 |
"""
|
180 |
|
181 |
|
182 |
+
def classify_question(question: str, llm, use_default_schema: bool = True):
|
183 |
classification_system_prompt_local = classification_system_prompt # Initialize here
|
184 |
if use_default_schema:
|
185 |
classification_system_prompt_local = classification_system_prompt_local.format(
|
|
|
198 |
return response.content.strip().upper()
|
199 |
|
200 |
|
201 |
+
def generate_llm_response(prompt: str, llm: str, use_default_schema: bool = True):
|
202 |
|
203 |
if llm == "gemini":
|
204 |
llm = gemini_llm
|
🤖SQL_Agent.py
CHANGED
@@ -66,6 +66,7 @@ st.markdown(
|
|
66 |
unsafe_allow_html=True,
|
67 |
)
|
68 |
|
|
|
69 |
with st.popover("Click here to see Database Schema", use_container_width=True):
|
70 |
uploaded_df_schema = st.session_state.get("uploaded_df_schema", False)
|
71 |
|
@@ -155,14 +156,14 @@ with col1:
|
|
155 |
with col2:
|
156 |
llm_option_radio = st.radio(
|
157 |
"Choose LLM Model",
|
158 |
-
["Gemini
|
159 |
captions=[
|
160 |
"Used via API",
|
161 |
"Run Locally on this Server. Extremely Slow because of Free vCPUs",
|
162 |
],
|
163 |
label_visibility="collapsed",
|
164 |
)
|
165 |
-
if llm_option_radio == "Gemini
|
166 |
llm_option = "gemini"
|
167 |
else:
|
168 |
llm_option = "qwen"
|
@@ -236,7 +237,7 @@ if st.session_state.conversation_turns < MAX_TURNS:
|
|
236 |
spinner_text = ""
|
237 |
if llm_option == "gemini":
|
238 |
spinner_text = (
|
239 |
-
"Using Gemini-
|
240 |
)
|
241 |
else:
|
242 |
spinner_text = "I know it is taking a lot of time. To run the model I'm using `Free` small vCPUs provided by `HuggingFace Spaces` for deployment. Thank you so much for your patience😊"
|
@@ -314,3 +315,6 @@ else:
|
|
314 |
st.chat_input(
|
315 |
"Ask me a SQL query question", disabled=True
|
316 |
) # Disable the input field
|
|
|
|
|
|
|
|
66 |
unsafe_allow_html=True,
|
67 |
)
|
68 |
|
69 |
+
|
70 |
with st.popover("Click here to see Database Schema", use_container_width=True):
|
71 |
uploaded_df_schema = st.session_state.get("uploaded_df_schema", False)
|
72 |
|
|
|
156 |
with col2:
|
157 |
llm_option_radio = st.radio(
|
158 |
"Choose LLM Model",
|
159 |
+
["Gemini-2.0-Flash-Exp", "FineTuned Qwen2.5-Coder-3B for SQL"],
|
160 |
captions=[
|
161 |
"Used via API",
|
162 |
"Run Locally on this Server. Extremely Slow because of Free vCPUs",
|
163 |
],
|
164 |
label_visibility="collapsed",
|
165 |
)
|
166 |
+
if llm_option_radio == "Gemini-2.0-Flash-Exp":
|
167 |
llm_option = "gemini"
|
168 |
else:
|
169 |
llm_option = "qwen"
|
|
|
237 |
spinner_text = ""
|
238 |
if llm_option == "gemini":
|
239 |
spinner_text = (
|
240 |
+
"Using Gemini-2.0-Flash-Exp to run your query. Please wait...😊"
|
241 |
)
|
242 |
else:
|
243 |
spinner_text = "I know it is taking a lot of time. To run the model I'm using `Free` small vCPUs provided by `HuggingFace Spaces` for deployment. Thank you so much for your patience😊"
|
|
|
315 |
st.chat_input(
|
316 |
"Ask me a SQL query question", disabled=True
|
317 |
) # Disable the input field
|
318 |
+
|
319 |
+
with st.sidebar:
|
320 |
+
st.caption("Made with ❤️ by @Debopam_Chowdhury")
|