Spaces:
Runtime error
Runtime error
Upload 20 files
Browse files- .dockerignore +91 -0
- .gitignore +171 -0
- .streamlit/config.toml +7 -0
- Dockerfile +28 -0
- pages/2 📊Manual SQL Executer.py +129 -0
- pages/3 📂File Upload for SQL.py +90 -0
- pages/4 👨🎓Connect with me.py +276 -0
- requirements.txt +0 -0
- static/database_scema.txt +103 -0
- static/default_questions.txt +173 -0
- static/df_Customers.csv +0 -0
- static/df_OrderItems.csv +0 -0
- static/df_Orders.csv +0 -0
- static/df_Payments.csv +0 -0
- static/df_Products.csv +0 -0
- utils/__init__.py +0 -0
- utils/handle_sql_commands.py +18 -0
- utils/llm_logic.py +230 -0
- utils/sql_utils.py +64 -0
- 🤖SQL_Agent.py +310 -0
.dockerignore
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Git
|
2 |
+
.git
|
3 |
+
.gitignore
|
4 |
+
.gitattributes
|
5 |
+
|
6 |
+
|
7 |
+
# CI
|
8 |
+
.codeclimate.yml
|
9 |
+
.travis.yml
|
10 |
+
.taskcluster.yml
|
11 |
+
|
12 |
+
# Docker
|
13 |
+
docker-compose.yml
|
14 |
+
Dockerfile
|
15 |
+
.docker
|
16 |
+
.dockerignore
|
17 |
+
|
18 |
+
# Byte-compiled / optimized / DLL files
|
19 |
+
**/__pycache__/
|
20 |
+
**/*.py[cod]
|
21 |
+
|
22 |
+
# C extensions
|
23 |
+
*.so
|
24 |
+
|
25 |
+
# Distribution / packaging
|
26 |
+
.Python
|
27 |
+
env/
|
28 |
+
build/
|
29 |
+
develop-eggs/
|
30 |
+
dist/
|
31 |
+
downloads/
|
32 |
+
eggs/
|
33 |
+
lib/
|
34 |
+
lib64/
|
35 |
+
parts/
|
36 |
+
sdist/
|
37 |
+
var/
|
38 |
+
*.egg-info/
|
39 |
+
.installed.cfg
|
40 |
+
*.egg
|
41 |
+
|
42 |
+
# PyInstaller
|
43 |
+
# Usually these files are written by a python script from a template
|
44 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
45 |
+
*.manifest
|
46 |
+
*.spec
|
47 |
+
|
48 |
+
# Installer logs
|
49 |
+
pip-log.txt
|
50 |
+
pip-delete-this-directory.txt
|
51 |
+
|
52 |
+
# Unit test / coverage reports
|
53 |
+
htmlcov/
|
54 |
+
.tox/
|
55 |
+
.coverage
|
56 |
+
.cache
|
57 |
+
nosetests.xml
|
58 |
+
coverage.xml
|
59 |
+
|
60 |
+
# Translations
|
61 |
+
*.mo
|
62 |
+
*.pot
|
63 |
+
|
64 |
+
# Django stuff:
|
65 |
+
*.log
|
66 |
+
|
67 |
+
# Sphinx documentation
|
68 |
+
docs/_build/
|
69 |
+
|
70 |
+
# PyBuilder
|
71 |
+
target/
|
72 |
+
|
73 |
+
# Virtual environment
|
74 |
+
.env
|
75 |
+
.venv/
|
76 |
+
venv/
|
77 |
+
|
78 |
+
# PyCharm
|
79 |
+
.idea
|
80 |
+
|
81 |
+
# Python mode for VIM
|
82 |
+
.ropeproject
|
83 |
+
**/.ropeproject
|
84 |
+
|
85 |
+
# Vim swap files
|
86 |
+
**/*.swp
|
87 |
+
|
88 |
+
# VS Code
|
89 |
+
.vscode/
|
90 |
+
|
91 |
+
start.sh
|
.gitignore
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# PyPI configuration file
|
171 |
+
.pypirc
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
enableStaticServing = true
|
3 |
+
maxUploadSize = 30
|
4 |
+
|
5 |
+
[client]
|
6 |
+
showErrorDetails = true
|
7 |
+
|
Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.12-slim-bookworm
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
# Update package index and install necessary tools, including curl
|
6 |
+
RUN apt-get update && apt-get install -y curl
|
7 |
+
|
8 |
+
# Download the model
|
9 |
+
RUN curl -Lo qwen2.5-coder-3b-instruct-q4_k_m.gguf https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct-GGUF/resolve/main/qwen2.5-coder-3b-instruct-q4_k_m.gguf?download=true
|
10 |
+
|
11 |
+
# Install build tools required for llama-cpp-python
|
12 |
+
RUN apt-get update && apt-get install -y build-essential
|
13 |
+
|
14 |
+
# Copy requirements and install dependencies
|
15 |
+
COPY requirements.txt .
|
16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
+
|
18 |
+
# Redundant command to invalidate cache
|
19 |
+
RUN echo "Intentionally invalidating cache"
|
20 |
+
|
21 |
+
# Copy the application code
|
22 |
+
COPY . .
|
23 |
+
|
24 |
+
# Expose the port that Streamlit will run on
|
25 |
+
EXPOSE 8501
|
26 |
+
|
27 |
+
# Define the entrypoint to run the Streamlit application
|
28 |
+
ENTRYPOINT ["streamlit", "run", "🤖SQL_Agent.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
pages/2 📊Manual SQL Executer.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils.sql_utils import load_data, load_defaultdb_schema_text
|
3 |
+
from utils.handle_sql_commands import execute_sql_duckdb
|
4 |
+
|
5 |
+
st.markdown(
|
6 |
+
"""
|
7 |
+
<style>
|
8 |
+
/* Base styles for both themes */
|
9 |
+
.stPageLink {
|
10 |
+
background-image: linear-gradient(to right, #007BFF, #6610F2); /* Gradient background */
|
11 |
+
color: white !important; /* Ensure text is readable on the gradient */
|
12 |
+
padding: 12px 20px !important; /* Slightly larger padding */
|
13 |
+
border-radius: 8px !important; /* More rounded corners */
|
14 |
+
border: none !important; /* Remove default border */
|
15 |
+
text-decoration: none !important;
|
16 |
+
font-weight: 500 !important; /* Slightly lighter font weight */
|
17 |
+
transition: transform 0.2s ease-in-out, box-shadow 0.2s ease-in-out; /* Smooth transitions */
|
18 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.15); /* Subtle shadow for depth */
|
19 |
+
display: inline-flex;
|
20 |
+
align-items: center;
|
21 |
+
justify-content: center;
|
22 |
+
}
|
23 |
+
|
24 |
+
.stPageLink:hover {
|
25 |
+
transform: scale(1.03); /* Slight scale up on hover */
|
26 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); /* Increased shadow on hover */
|
27 |
+
}
|
28 |
+
|
29 |
+
.stPageLink span { /* Style the label text */
|
30 |
+
margin-left: 5px; /* Space between icon and text */
|
31 |
+
}
|
32 |
+
|
33 |
+
/* Dark theme adjustments (optional, if needed for better contrast) */
|
34 |
+
/* Consider using Streamlit's theme variables if possible for a more robust solution */
|
35 |
+
/* For simplicity, this example uses fixed colors that should work reasonably well */
|
36 |
+
/* [data-theme="dark"] .stPageLink {
|
37 |
+
}
|
38 |
+
|
39 |
+
[data-theme="dark"] .stPageLink:hover {
|
40 |
+
} */
|
41 |
+
</style>
|
42 |
+
""",
|
43 |
+
unsafe_allow_html=True,
|
44 |
+
)
|
45 |
+
|
46 |
+
default_dfs = load_data()
|
47 |
+
|
48 |
+
if "uploaded_dataframes" not in st.session_state:
|
49 |
+
st.session_state.uploaded_dataframes = {}
|
50 |
+
|
51 |
+
uploaded_dataframes = st.session_state.uploaded_dataframes
|
52 |
+
|
53 |
+
|
54 |
+
with st.popover("Click here to see Database Schema", use_container_width=True):
|
55 |
+
uploaded_df_schema = st.session_state.get("uploaded_df_schema", False)
|
56 |
+
|
57 |
+
choice = st.segmented_control(
|
58 |
+
"Choose",
|
59 |
+
["Default DB", "Uploaded Files"],
|
60 |
+
label_visibility="collapsed",
|
61 |
+
disabled=uploaded_df_schema == False,
|
62 |
+
default="Default DB" if uploaded_df_schema == False else "Uploaded Files",
|
63 |
+
)
|
64 |
+
|
65 |
+
if uploaded_df_schema is False:
|
66 |
+
st.markdown("""> If you want to use your own files(csv/xls), click here - """)
|
67 |
+
st.page_link(
|
68 |
+
page="pages/3 📂File Upload for SQL.py",
|
69 |
+
label="Upload your own CSV or Excel files",
|
70 |
+
icon="📜",
|
71 |
+
)
|
72 |
+
schema = load_defaultdb_schema_text()
|
73 |
+
st.markdown(schema, unsafe_allow_html=True)
|
74 |
+
elif choice == "Default DB":
|
75 |
+
schema = load_defaultdb_schema_text()
|
76 |
+
st.markdown(schema, unsafe_allow_html=True)
|
77 |
+
else:
|
78 |
+
pretty_schema, markdown = st.tabs(["Schema", "Copy Schema in Markdown"])
|
79 |
+
with pretty_schema:
|
80 |
+
st.info(
|
81 |
+
"You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊",
|
82 |
+
icon="ℹ️",
|
83 |
+
)
|
84 |
+
st.markdown(uploaded_df_schema, unsafe_allow_html=True)
|
85 |
+
with markdown:
|
86 |
+
st.info(
|
87 |
+
"You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊",
|
88 |
+
icon="ℹ️",
|
89 |
+
)
|
90 |
+
st.markdown(f"```\n{uploaded_df_schema}\n```")
|
91 |
+
|
92 |
+
|
93 |
+
data_source = None
|
94 |
+
if uploaded_dataframes is None or len(uploaded_dataframes) == 0:
|
95 |
+
data_source = "Default Database"
|
96 |
+
col1, col2 = st.columns([4, 1])
|
97 |
+
with col1:
|
98 |
+
st.caption(
|
99 |
+
"Use this dialog to execute SQL commands on the Default Database. To use your own Excel/CSV files"
|
100 |
+
)
|
101 |
+
with col2:
|
102 |
+
st.page_link(page="pages/3 📂File Upload for SQL.py", label="Click Here")
|
103 |
+
else:
|
104 |
+
data_source = st.radio("Select Data Source", ["Default Database", "Uploaded Files"])
|
105 |
+
|
106 |
+
if data_source == "Default Database":
|
107 |
+
dataframes = default_dfs
|
108 |
+
if uploaded_dataframes is not None and len(uploaded_dataframes) > 0:
|
109 |
+
st.caption(
|
110 |
+
"Use this dialog to execute SQL commands on the Default Database. To use your own Excel/CSV files"
|
111 |
+
)
|
112 |
+
else:
|
113 |
+
dataframes = uploaded_dataframes
|
114 |
+
st.caption("Use this dialog to execute SQL commands on Your Uploaded Files")
|
115 |
+
|
116 |
+
sql_command = st.text_area("Enter your SQL command here:")
|
117 |
+
if st.button("Execute"):
|
118 |
+
df = execute_sql_duckdb(sql_command, dataframes)
|
119 |
+
if df is not None:
|
120 |
+
st.dataframe(df)
|
121 |
+
st.info(f"Rows x Columns: {df.shape[0]} x {df.shape[1]}")
|
122 |
+
st.subheader("Data Description:")
|
123 |
+
st.markdown(df.describe().T.to_markdown())
|
124 |
+
st.subheader("Data Types:")
|
125 |
+
st.write(df.dtypes)
|
126 |
+
else:
|
127 |
+
st.error(
|
128 |
+
"An error occurred while executing the SQL command. Please cross check your command as well as tables & columns names."
|
129 |
+
)
|
pages/3 📂File Upload for SQL.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from utils.sql_utils import create_schema
|
4 |
+
|
5 |
+
MAX_FILES = 5
|
6 |
+
|
7 |
+
if "uploaded_dataframes" not in st.session_state:
|
8 |
+
st.session_state.uploaded_dataframes = {}
|
9 |
+
|
10 |
+
st.header("Upload your CSV or Excel files")
|
11 |
+
st.caption("Maximum 5 files can be uploaded.")
|
12 |
+
|
13 |
+
num_uploaded_files = len(st.session_state.uploaded_dataframes)
|
14 |
+
|
15 |
+
disabled = num_uploaded_files >= MAX_FILES
|
16 |
+
|
17 |
+
uploaded_files = st.file_uploader(
|
18 |
+
"Choose files",
|
19 |
+
type=["csv", "xlsx"],
|
20 |
+
accept_multiple_files=True,
|
21 |
+
disabled=disabled,
|
22 |
+
)
|
23 |
+
|
24 |
+
if uploaded_files and not disabled:
|
25 |
+
uploaded_count = 0
|
26 |
+
for uploaded_file in uploaded_files:
|
27 |
+
if len(st.session_state.uploaded_dataframes) < MAX_FILES:
|
28 |
+
df = None
|
29 |
+
try:
|
30 |
+
if uploaded_file.name.endswith(".csv"):
|
31 |
+
df = pd.read_csv(uploaded_file)
|
32 |
+
elif uploaded_file.name.endswith(".xlsx"):
|
33 |
+
df = pd.read_excel(uploaded_file)
|
34 |
+
|
35 |
+
if uploaded_file.name in st.session_state.uploaded_dataframes:
|
36 |
+
st.toast(
|
37 |
+
f"File {uploaded_file.name} already uploaded. Skipping...",
|
38 |
+
icon="⚠️",
|
39 |
+
)
|
40 |
+
else:
|
41 |
+
key = (
|
42 |
+
uploaded_file.name.lower()
|
43 |
+
.strip()
|
44 |
+
.replace(" ", "_")
|
45 |
+
.replace(".csv", "")
|
46 |
+
.replace(".xlsx", "")
|
47 |
+
)
|
48 |
+
st.session_state.uploaded_dataframes[key] = df
|
49 |
+
uploaded_count += 1
|
50 |
+
print(f"Uploaded file: {str(uploaded_file.name)}")
|
51 |
+
except Exception as e:
|
52 |
+
st.error(f"Error reading file {uploaded_file.name}: {e}")
|
53 |
+
else:
|
54 |
+
st.warning(
|
55 |
+
f"Maximum number of files({MAX_FILES}) reached. Cannot upload more files."
|
56 |
+
)
|
57 |
+
break
|
58 |
+
if uploaded_count > 0:
|
59 |
+
st.success(
|
60 |
+
f"{uploaded_count} File(s) uploaded successfully. Total {len(st.session_state.uploaded_dataframes)} File(s)"
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
if len(st.session_state.uploaded_dataframes) >= MAX_FILES:
|
65 |
+
st.warning(
|
66 |
+
f"Maximum number of files({MAX_FILES}) reached. Cannot upload more files."
|
67 |
+
)
|
68 |
+
|
69 |
+
|
70 |
+
dataframes = st.session_state.uploaded_dataframes
|
71 |
+
|
72 |
+
if dataframes:
|
73 |
+
st.header("Schema of Uploaded Files📚")
|
74 |
+
schema = create_schema(dataframes)
|
75 |
+
if "uploaded_df_schema" not in st.session_state:
|
76 |
+
st.session_state.uploaded_df_schema = schema
|
77 |
+
pretty_schema, markdown = st.tabs(["Schema", "Copy Schema in Markdown"])
|
78 |
+
with pretty_schema:
|
79 |
+
st.info(
|
80 |
+
"You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊",
|
81 |
+
icon="ℹ️",
|
82 |
+
)
|
83 |
+
st.markdown(schema, unsafe_allow_html=True)
|
84 |
+
with markdown:
|
85 |
+
st.info(
|
86 |
+
"You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊",
|
87 |
+
icon="ℹ️",
|
88 |
+
)
|
89 |
+
st.markdown(f"```\n{schema}\n```")
|
90 |
+
|
pages/4 👨🎓Connect with me.py
ADDED
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
st.markdown(
|
4 |
+
"""
|
5 |
+
<style>
|
6 |
+
body {
|
7 |
+
background-color: #1e1e1e;
|
8 |
+
color: #f0f0f0;
|
9 |
+
font-family: sans-serif;
|
10 |
+
}
|
11 |
+
h1, h2, h3, h4, h5, h6 {
|
12 |
+
color: #f0f0f0;
|
13 |
+
}
|
14 |
+
a {
|
15 |
+
color: #89b4fa;
|
16 |
+
text-decoration: none;
|
17 |
+
}
|
18 |
+
a:hover {
|
19 |
+
text-decoration: underline;
|
20 |
+
}
|
21 |
+
.social-link-button {
|
22 |
+
display: inline-block;
|
23 |
+
padding: 8px 16px;
|
24 |
+
margin: 4px;
|
25 |
+
background-color: #333;
|
26 |
+
color: #fff;
|
27 |
+
border-radius: 5px;
|
28 |
+
text-decoration: none;
|
29 |
+
}
|
30 |
+
.social-link-button:hover {
|
31 |
+
background-color: #555;
|
32 |
+
}
|
33 |
+
.project-card {
|
34 |
+
background-color: #282828;
|
35 |
+
border: 1px solid #444;
|
36 |
+
}
|
37 |
+
.skills, .experience, .projects, .volunteering, .achievements, .certifications, .education {
|
38 |
+
background-color: #282828;
|
39 |
+
border: 1px solid #444;
|
40 |
+
}
|
41 |
+
.status.deployed {
|
42 |
+
color: #a3be8c;
|
43 |
+
}
|
44 |
+
.status.ongoing {
|
45 |
+
color: #f9bb6d;
|
46 |
+
}
|
47 |
+
footer {
|
48 |
+
background-color: #333;
|
49 |
+
color: #f0f0f0;
|
50 |
+
}
|
51 |
+
</style>
|
52 |
+
""",
|
53 |
+
unsafe_allow_html=True,
|
54 |
+
)
|
55 |
+
|
56 |
+
st.html(
|
57 |
+
"""<!DOCTYPE html>
|
58 |
+
<html lang="en">
|
59 |
+
<head>
|
60 |
+
<meta charset="UTF-8">
|
61 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
62 |
+
<title>Debopam Chowdhury (Param)</title>
|
63 |
+
</head>
|
64 |
+
<body>
|
65 |
+
<header style="text-align: center; padding: 2rem 0; border-bottom: 1px solid #444;">
|
66 |
+
<img src="https://tinyurl.com/ysa6yekw" alt="Debopam Chowdhury (Param)" class="profile-image" style="width: 150px; height: 150px; border-radius: 50%; object-fit: cover; margin-bottom: 1rem;">
|
67 |
+
<h1>Debopam Chowdhury <span class="nickname" style="font-weight: normal; color: #999;">(Param)</span></h1>
|
68 |
+
<p class="tagline" style="color: #ccc; margin-bottom: 1rem;">Creator of <a href="http://aigymbuddy.in" target="_blank" style="color: #89b4fa;">AiGymBuddy.in</a> | Machine Learning | Deep Learning | Flutter | Math | MLops | TensorFlow | FastAPI</p>
|
69 |
+
<p class="pronouns" style="font-size: 0.9rem; color: #999; margin-bottom: 1rem;">(He/Him)</p>
|
70 |
+
<div class="social-links">
|
71 |
+
<a href="https://www.linkedin.com/in/debopam-chowdhury-param-600619229/" target="_blank" class="social-link-button">LinkedIn</a>
|
72 |
+
<a href="https://www.youtube.com/@DCparam/featured" target="_blank" class="social-link-button">YouTube</a>
|
73 |
+
<a href="https://github.com/DebopamParam" target="_blank" class="social-link-button">GitHub</a>
|
74 |
+
<a href="https://www.instagram.com/debopam_param.ai/" target="_blank" class="social-link-button">Instagram</a>
|
75 |
+
</div>
|
76 |
+
</header>
|
77 |
+
|
78 |
+
<section class="about-me" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
79 |
+
<h2>About Me</h2>
|
80 |
+
<p style="line-height: 1.7;">Passionate about Machine Learning, Computer Science and Mathematics. Has a strong grip over Machine & Deep Learning Fundamentals, Computer Networks, OS and DSA. Solved 100+ problems on leetcode. Have a keen interest in learning Mathematics, Deep Learning and Statistics. I like to understand things in a deeper way.</p>
|
81 |
+
</section>
|
82 |
+
|
83 |
+
<section class="skills" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
84 |
+
<h2>Skills</h2>
|
85 |
+
<div class="skills-grid" style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1.5rem;">
|
86 |
+
<div class="skill-category">
|
87 |
+
<h3>Technical</h3>
|
88 |
+
<ul style="padding-left: 0; list-style: none;">
|
89 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Programming:</strong> Python, Java, Dart, VS Code, Git, GitHub, Jupyter Notebooks, CI/CD</li>
|
90 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Machine Learning & Deep Learning:</strong> TensorFlow, PyTorch, Scikit-learn, Keras, Supervised Learning, Unsupervised Learning, Neural Networks, Sequence Modeling, Convolution, Attention Mechanisms, Transformer, GPT, BERT, Hyperparameter Optimization</li>
|
91 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Data Handling:</strong> Pandas, NumPy, Data Manipulation, Data Preparation, SQL, Pyspark, NoSQL</li>
|
92 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Cloud & DevOps:</strong> Docker, AWS, Cloud-AI, FastAPI</li>
|
93 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Mathematics:</strong> Linear Algebra, Probability, Statistics, Boosting Methods</li>
|
94 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Frameworks & Tools:</strong> Flutter, Firebase, Deep Learning Frameworks, Model Training & Optimization, Version Control, LLM FineTuning</li>
|
95 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Generative AI:</strong> Vector Embeddings, Indexing, Chunking, RAG pipelines, LlamaIndex, LangChain, Colpali, Byaldi, Chroma DB</li>
|
96 |
+
</ul>
|
97 |
+
</div>
|
98 |
+
</div>
|
99 |
+
</section>
|
100 |
+
|
101 |
+
<section class="experience" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
102 |
+
<h2>Experience</h2>
|
103 |
+
<div class="experience-item" style="margin-bottom: 1.5rem;">
|
104 |
+
<h3>GENERATIVE AI ENGINEER (Contract - Remote)</h3>
|
105 |
+
<p class="company" style="font-weight: bold; color: #bbb; margin-bottom: 0.25rem;">Private Client | Sydney, Australia</p>
|
106 |
+
<p class="duration" style="font-size: 0.9rem; color: #999; margin-bottom: 0.75rem;">October - November 2024</p>
|
107 |
+
<ul style="margin-top: 0.5rem; padding-left: 20px;">
|
108 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;">Developed secure, on-premise solutions for private Complex data (Complex PDF with Images and Charts) Q&A and knowledge retrieval.</li>
|
109 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;">Built data ingestion pipeline (100-500 documents daily) with vision embeddings and task scheduler updates.</li>
|
110 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;">Built multimodal RAG pipelines (Byaldi, Colqwen2, Pixtral 12B) optimized for diverse document types (70% accuracy improvement).</li>
|
111 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;">Containerized the application with Docker for deployment flexibility.</li>
|
112 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Open Source Contribution - Byaldi - 575✩:</strong> while working on It</li>
|
113 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><strong>Technologies:</strong> NLP, Vision Embeddings, Local Multimodal RAG, LangChain, Pixtral 12B, Col-Qwen2, Byaldi</li>
|
114 |
+
</ul>
|
115 |
+
</div>
|
116 |
+
</section>
|
117 |
+
|
118 |
+
<section class="projects" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
119 |
+
<h2>Projects</h2>
|
120 |
+
<div class="project-list" style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 1.5rem;">
|
121 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
122 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">LLM-finetuning and SQL Agent with Auto_Execution with DuckDB, Schema Retriever from CSVs, manual SQL executer.</h3>
|
123 |
+
<p class="status ongoing" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #f9bb6d;">Current WebAPP which you are using</p>
|
124 |
+
<p class="status deployed" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #a3be8c;">Deployed <i class="fas fa-check-circle"></i></p>
|
125 |
+
</div>
|
126 |
+
|
127 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
128 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">Deep Learning Based Recommendation System</h3>
|
129 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;">Initially, I aimed to build a recommendation system from scratch using TensorFlow Recommenders (TFRS) on the MovieLens 1M dataset. This involved creating user and movie embeddings with a candidate generation and ranking model.</p>
|
130 |
+
|
131 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;">However, this TFRS approach proved too resource-intensive and time-consuming for effective training and testing. Crucially, the initial results weren't satisfactory for deployment.</p>
|
132 |
+
|
133 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;">Therefore, for the deployed web application, I switched to pre-trained models (BGE embeddings and re-ranking). This offered:</p>
|
134 |
+
<ul style="padding-left: 20px;">
|
135 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><b>Better Performance:</b> More relevant recommendations.</li>
|
136 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;"><b>Reduced Resources/Time:</b> Faster training and deployment.</li>
|
137 |
+
</ul>
|
138 |
+
|
139 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;">While the TFRS code is also included in the web app, the pre-trained model approach was chosen for its superior results and efficiency in a deployment setting. A future improvement could be fine-tuning the pre-trained models for even better performance.</p>
|
140 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Technologies used:</strong> TensorFlow Recommenders, Scann, Vector DB, Distributed GPU Training, Langchain, Streamlit, BAAI BGE Models</p>
|
141 |
+
<div class="project-links">
|
142 |
+
<a href="https://debopam-movie-recommendation-system.streamlit.app/" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">Try it out APP Live</a>
|
143 |
+
</div>
|
144 |
+
<p class="status deployed" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #a3be8c;">Deployed <i class="fas fa-check-circle"></i></p>
|
145 |
+
</div>
|
146 |
+
|
147 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
148 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">IBM EMPLOYEE ATTRITION PREDICTOR (End to End with Deployment to AWS, FastAPI with Proxy Server)</h3>
|
149 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Objective:</strong> Predicted employee attrition with 85% AUC to improve employee retention and business performance.</p>
|
150 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Model Development:</strong> Hyperparameter optimized Multi-Layer Perceptron, XGBoost, Logistic Regression with Inference as well as Training Pipeline</p>
|
151 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Backend:</strong> Developed a FastAPI backend for real-time predictions, using Pydantic for schema validation for incoming and outgoing requests</p>
|
152 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Deployment:</strong> Containerized with Docker, deployed on AWS EC2, managed via AWS ECR.</p>
|
153 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>CI/CD:</strong> Set up an automated CI/CD pipeline using GitHub Actions for seamless updates.</p>
|
154 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Web Application:</strong> Built a user-friendly interface using Flutter Web for real-time interaction.</p>
|
155 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Security:</strong> Handled HTTPS requests using Caddy as a reverse proxy server</p>
|
156 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Technologies used:</strong> TensorFlow, AWS, Docker, FastAPI, CI/CD Pipeline, Multi-Layer Perceptron, Neural Network, XGBoost, Logistic Regression, Hyperparameter Tuned Models, GitHub Actions, Pydantic, Flutter Web, Reverse-Proxy-Server: Caddy</p>
|
157 |
+
<div class="project-links">
|
158 |
+
<a href="https://www.linkedin.com/posts/debopam-chowdhury-param-600619229_machinelearning-deeplearning-aws-activity-7244476917884608512-DfbD/?utm_source=share&utm_medium=member_desktop" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">Explanation & Live Demo</a>
|
159 |
+
<a href="http://www.debopamchowdhury.works" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">Click to try out Live</a>
|
160 |
+
</div>
|
161 |
+
<p class="status deployed" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #a3be8c;">Deployed <i class="fas fa-check-circle"></i></p>
|
162 |
+
</div>
|
163 |
+
|
164 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
165 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">AI GYM BUDDY (Langchain | Flutter | Riverpod | Gemini)</h3>
|
166 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;">Personalized AI-Driven Workouts with Smart Equipment Detection and Progress Tracking</p>
|
167 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Features:</strong> Al Instrument Detection (Camera or Gallery), Exercises based on Available Equipments, Time, Preffered Muscle Groups & Custom requests, Dynamic Video Tutorial Finder for each exercise, Super personalized Al generated routine, Workout History Tracker, Easy SignUp/Login with Google Oauth</p>
|
168 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Technologies used:</strong> Dart, flutter, firebase, gemini 1.5 flash, riverpod, langchain, fastapi, google oauth</p>
|
169 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Licenses:</strong> This code of this app/website is written from scratch and I hold all the rights over distribution</p>
|
170 |
+
<div class="project-links">
|
171 |
+
<a href="https://www.youtube.com/shorts/0ZR0IWiZJQE" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">1 Minute Video Demo</a>
|
172 |
+
<a href="https://play.google.com/store/apps/details?id=com.aigymbuddy.me&hl=en" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">
|
173 |
+
<img src="https://static-00.iconduck.com/assets.00/google-play-icon-2048x2048-487quz63.png" alt="Google Play Store" class="playstore-icon" style="width: 16px; height: 16px; vertical-align: middle; margin-right: 0.25rem;"> Google Play Store (Android)
|
174 |
+
</a>
|
175 |
+
<a href="http://www.aigymbuddy.in" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">Prototype for WEB www.aigymbuddy.in</a>
|
176 |
+
</div>
|
177 |
+
<p class="status deployed" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #a3be8c;">Deployed <i class="fas fa-check-circle"></i></p>
|
178 |
+
</div>
|
179 |
+
|
180 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
181 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">Non-Sequential Breast Cancer Classification System</h3>
|
182 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Multi-Modal Cancer Detection:</strong> Developed a novel multi-output deep learning model for breast cancer detection, predicting cancer presence, invasiveness, and difficult-negative case status. The model incorporates both mammogram images and tabular clinical data, leveraging a non-sequential architecture to process distinct data modalities.</p>
|
183 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Fine-Tuned Image Feature Extraction:</strong> Utilized a pre-trained EfficientNetV2B3 model for image feature extraction, fine-tuning layers from block 6 onwards to enhance its applicability to the specific task, thus improving the quality of learned representations and potentially making the model more robust and accurate.</p>
|
184 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Distributed Training:</strong> Accelerated model training through distributed training using TensorFlow's MirroredStrategy on 2xT4 GPUs for 9 hours on Kaggle, demonstrating proficiency in optimizing model training with limited computational resources.</p>
|
185 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Technologies used:</strong> TensorFlow, Transfer Learning, EfficientNetV2, Fused MB-CNN</p>
|
186 |
+
<div class="project-links">
|
187 |
+
<a href="https://debopamparam-bcd-inference-vvyb1v.streamlit.app/" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">Live Webapp + Architecture + Training Code + Evaluation Metrics</a>
|
188 |
+
</div>
|
189 |
+
<p class="status deployed" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #a3be8c;">Deployed <i class="fas fa-check-circle"></i></p>
|
190 |
+
</div>
|
191 |
+
|
192 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
193 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">Image Entity Extraction with Qwen2 VL: Large-Scale Inference</h3>
|
194 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Problem Statement:</strong> E-commerce and healthcare industries struggle to efficiently extract product details (weight, volume, dimensions) from images at scale.</p>
|
195 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Action:</strong> Developed a large-scale image-to-text inference pipeline using Qwen2 VL: 2B, incorporating image preprocessing, Regex, and parallel processing. Processed 84,000 of 131,000 test images.</p>
|
196 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Result:</strong> Successfully extracted product values from a significant portion of the dataset. Our team of four ranked 172nd out of ~75,000 in the Amazon ML Challenge with Fl-Score=0.47, demonstrating the solution's potential for automated product information extraction.</p>
|
197 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Technologies used:</strong> Qwen2 VL, Python, Regex, Parallel Processing</p>
|
198 |
+
<div class="project-links">
|
199 |
+
<a href="https://colab.research.google.com/drive/1V5F1XMlYNHzv-hA9xmIJ-Jx5vuD0fKAR?usp=sharing" target="_blank" style="display: inline-block; margin-right: 1rem; font-size: 0.9rem;">Click Here to see the code</a>
|
200 |
+
</div>
|
201 |
+
</div>
|
202 |
+
|
203 |
+
<div class="project-card" style="border: 1px solid #444; padding: 1.5rem; border-radius: 8px;">
|
204 |
+
<h3 style="margin-top: 0; margin-bottom: 0.75rem;">LLM based ATS System using VertexAI Embedding</h3>
|
205 |
+
<p style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>Technologies used:</strong> Langchain, VertexAI Embedding, StreamLit, PostGresVector</p>
|
206 |
+
<p class="status ongoing" style="font-size: 0.85rem; font-weight: bold; margin-top: 0.5rem; color: #f9bb6d;">Ongoing</p>
|
207 |
+
</div>
|
208 |
+
</div>
|
209 |
+
</section>
|
210 |
+
|
211 |
+
<section class="volunteering" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
212 |
+
<h2>Volunteering</h2>
|
213 |
+
<div class="volunteering-item" style="margin-bottom: 1rem;">
|
214 |
+
<h3>Git/GitHub Instructor (Volunteer)</h3>
|
215 |
+
<p style="color: #ccc;">Carried out sessions to teach juniors the fundamentals of Git and GitHub, covering version control, collaboration, and best practices.</p>
|
216 |
+
</div>
|
217 |
+
<div class="volunteering-item" style="margin-bottom: 1rem;">
|
218 |
+
<h3>Event Coordinator</h3>
|
219 |
+
<p style="color: #ccc;">Acharya Technical Club - Steigen</p>
|
220 |
+
</div>
|
221 |
+
</section>
|
222 |
+
|
223 |
+
<section class="achievements" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
224 |
+
<h2>Achievements</h2>
|
225 |
+
<ul style="padding-left: 20px;">
|
226 |
+
<li style="margin-bottom: 0.75rem; font-size: 0.95rem;">Secured rank 172 out of 75000 participants in AMAZON ML Challenge Hackathon 2024.</li>
|
227 |
+
<li style="margin-bottom: 0.75rem; font-size: 0.95rem;">2nd place out of 60 in the TechnioD Hackathon.</li>
|
228 |
+
<li style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong><a href="https://github.com/AnswerDotAI/byaldi/pull/50" target="_blank" style="color: #89b4fa;">Open - Source Contribution - Byaldi - 575☆</a></strong>
|
229 |
+
<ul style="padding-left: 20px;">
|
230 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;">Fix langchain integration not present in pypi tar & whl-- pyproject.toml</li>
|
231 |
+
</ul>
|
232 |
+
</li>
|
233 |
+
<li style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>NON TECHNICAL :</strong>
|
234 |
+
<ul style="padding-left: 20px;">
|
235 |
+
<li style="margin-bottom: 0.5rem; font-size: 0.95rem;">Performed in IIT BOMBAY'S Mood Indigo Bengaluru Event - Finalist.</li>
|
236 |
+
</ul>
|
237 |
+
</li>
|
238 |
+
</ul>
|
239 |
+
</section>
|
240 |
+
|
241 |
+
<section class="certifications" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
242 |
+
<h2>Certifications</h2>
|
243 |
+
<ul style="padding-left: 20px;">
|
244 |
+
<li style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>COURSERA:</strong> Advanced Learning Algorithms | <a href="https://coursera.org/share/d540318c8cb4a7e802d8c4964a471d34" target="_blank" style="color: #89b4fa;">View Certificate</a></li>
|
245 |
+
<li style="margin-bottom: 0.75rem; font-size: 0.95rem;"><strong>COURSERA:</strong> Supervised Machine Learning: Regression and Classification | <a href="https://coursera.org/share/d540318c8cb4a7e802d8c4964a471d34" target="_blank" style="color: #89b4fa;">View Certificate</a></li>
|
246 |
+
</ul>
|
247 |
+
</section>
|
248 |
+
|
249 |
+
<section class="education" style="padding: 2rem; margin-bottom: 1.5rem; border-radius: 8px;">
|
250 |
+
<h2>Education</h2>
|
251 |
+
<div class="education-item" style="margin-bottom: 0.75rem; font-size: 0.95rem;">
|
252 |
+
<h3>BE in Information Science</h3>
|
253 |
+
<p class="institution" style="color: #ccc;">Acharya Institute of Technology, Bangalore</p>
|
254 |
+
<p class="duration" style="font-size: 0.9rem; color: #999;">2021-2025</p>
|
255 |
+
<p>CGPA-8.12</p>
|
256 |
+
</div>
|
257 |
+
<div class="education-item" style="margin-bottom: 0.75rem; font-size: 0.95rem;">
|
258 |
+
<h3>Higher Secondary Education</h3>
|
259 |
+
<p class="institution" style="color: #ccc;">Kalyani Public School, Barasat, Kolkata</p>
|
260 |
+
<p class="duration" style="font-size: 0.9rem; color: #999;">2021</p>
|
261 |
+
<p>77% (Auto Pass Covid Batch)</p>
|
262 |
+
</div>
|
263 |
+
<div class="education-item" style="margin-bottom: 0.75rem; font-size: 0.95rem;">
|
264 |
+
<h3>Secondary Education</h3>
|
265 |
+
<p class="institution" style="color: #ccc;">Sacred Heart Day High School, Kolkata</p>
|
266 |
+
<p class="duration" style="font-size: 0.9rem; color: #999;">2019</p>
|
267 |
+
<p>90%</p>
|
268 |
+
</div>
|
269 |
+
</section>
|
270 |
+
|
271 |
+
<footer style="text-align: center; padding: 1rem 0; font-size: 0.9rem;">
|
272 |
+
<p>© 2024 Debopam Chowdhury (Param)</p>
|
273 |
+
</footer>
|
274 |
+
</body>
|
275 |
+
</html>"""
|
276 |
+
)
|
requirements.txt
ADDED
Binary file (180 Bytes). View file
|
|
static/database_scema.txt
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## The Tables in the Default Database are:
|
2 |
+
`customers`, `order_items`, `orders`, `payments`, and `products`.
|
3 |
+
|
4 |
+
### **customers**
|
5 |
+
| | customer_id | customer_zip_code_prefix | customer_city | customer_state |
|
6 |
+
|------:|:--------------|---------------------------:|:----------------|:-----------------|
|
7 |
+
| 21921 | 0tgYlOTGgpO6 | 79230 | russas | CE |
|
8 |
+
| 9748 | jGhRQF3CIew4 | 81460 | joao monlevade | MG |
|
9 |
+
| 22679 | 1UutQTIhBvcP | 94480 | pelotas | RS |
|
10 |
+
|
11 |
+
Rows: 38279, Columns: 4
|
12 |
+
|
13 |
+
```sql
|
14 |
+
CREATE TABLE customers (
|
15 |
+
customer_id VARCHAR(255) PRIMARY KEY,
|
16 |
+
customer_zip_code_prefix INT,
|
17 |
+
customer_city VARCHAR(255),
|
18 |
+
customer_state VARCHAR(2)
|
19 |
+
);
|
20 |
+
```
|
21 |
+
|
22 |
+
### **order_items**
|
23 |
+
| | order_id | product_id | seller_id | price | shipping_charges |
|
24 |
+
|------:|:-------------|:-------------|:-------------|--------:|-------------------:|
|
25 |
+
| 19729 | PDEzZdebLSn3 | aBpYjaBcwz6e | bzfcwRPnZzVO | 55.83 | 27.8 |
|
26 |
+
| 6001 | R7bIPjjYqlHP | ZM2JJXV5m9hl | Ivbw25fb5t2Z | 100 | 42.05 |
|
27 |
+
| 282 | Biqo21nETaMO | XqmdGKRbTetH | P2nCHWuo0HC0 | 113.49 | 91.32 |
|
28 |
+
|
29 |
+
Rows: 38279, Columns: 5
|
30 |
+
|
31 |
+
```sql
|
32 |
+
CREATE TABLE order_items (
|
33 |
+
order_id VARCHAR(255),
|
34 |
+
product_id VARCHAR(255),
|
35 |
+
seller_id VARCHAR(255),
|
36 |
+
price DECIMAL(10, 2),
|
37 |
+
shipping_charges DECIMAL(10, 2),
|
38 |
+
PRIMARY KEY (order_id, product_id),
|
39 |
+
FOREIGN KEY (order_id) REFERENCES orders(order_id),
|
40 |
+
FOREIGN KEY (product_id) REFERENCES products(product_id)
|
41 |
+
);
|
42 |
+
```
|
43 |
+
|
44 |
+
### **orders**
|
45 |
+
|
46 |
+
| | order_id | customer_id | order_purchase_timestamp | order_approved_at |
|
47 |
+
|------:|:-------------|:--------------|:---------------------------|:--------------------|
|
48 |
+
| 7294 | PMqwQc01iDTJ | c9ueC6k6V5WS | 2018-06-19 21:23:48 | 2018-06-20 08:38:30 |
|
49 |
+
| 13800 | P4l8R2Qat5n7 | ovKkGaXi5TmN | 2018-01-05 08:26:03 | 2018-01-05 08:47:20 |
|
50 |
+
| 17679 | NxIseZjAQCdC | o9qzmUQVJOxA | 2018-01-28 23:46:53 | 2018-01-28 23:58:31 |
|
51 |
+
|
52 |
+
Rows: 38279, Columns: 4
|
53 |
+
|
54 |
+
```sql
|
55 |
+
CREATE TABLE orders (
|
56 |
+
order_id VARCHAR(255) PRIMARY KEY,
|
57 |
+
customer_id VARCHAR(255),
|
58 |
+
order_purchase_timestamp TIMESTAMP,
|
59 |
+
order_approved_at TIMESTAMP,
|
60 |
+
FOREIGN KEY (customer_id) REFERENCES customers(customer_id)
|
61 |
+
);
|
62 |
+
```
|
63 |
+
|
64 |
+
### **payments**
|
65 |
+
| | order_id | payment_sequential | payment_type | payment_installments | payment_value |
|
66 |
+
|------:|:-------------|---------------------:|:---------------|-----------------------:|----------------:|
|
67 |
+
| 35526 | cQXl0pQtiMad | 1 | wallet | 1 | 172.58 |
|
68 |
+
| 35799 | olImD2k316Gz | 1 | credit_card | 3 | 16.78 |
|
69 |
+
| 13278 | G9MJYXXtPZSz | 1 | credit_card | 10 | 221.86 |
|
70 |
+
|
71 |
+
Rows: 38279, Columns: 5
|
72 |
+
|
73 |
+
```sql
|
74 |
+
CREATE TABLE payments (
|
75 |
+
order_id VARCHAR(255),
|
76 |
+
payment_sequential INT,
|
77 |
+
payment_type VARCHAR(50),
|
78 |
+
payment_installments INT,
|
79 |
+
payment_value DECIMAL(10, 2),
|
80 |
+
PRIMARY KEY (order_id, payment_sequential),
|
81 |
+
FOREIGN KEY (order_id) REFERENCES orders(order_id)
|
82 |
+
);
|
83 |
+
```
|
84 |
+
|
85 |
+
### **products**
|
86 |
+
| | product_id | product_category_name | product_weight_g | product_length_cm | product_height_cm | product_width_cm |
|
87 |
+
|------:|:-------------|:------------------------|-------------------:|--------------------:|--------------------:|-------------------:|
|
88 |
+
| 18191 | hpiXwRzTkhkL | bed_bath_table | 1150 | 40 | 9 | 50 |
|
89 |
+
| 2202 | iPoRkE7dkmlc | toys | 15800 | 38 | 62 | 57 |
|
90 |
+
| 27442 | hrjNaMt3Wyo5 | toys | 1850 | 37 | 22 | 40 |
|
91 |
+
|
92 |
+
Rows: 38279, Columns: 6
|
93 |
+
|
94 |
+
```sql
|
95 |
+
CREATE TABLE products (
|
96 |
+
product_id VARCHAR(255) PRIMARY KEY,
|
97 |
+
product_category_name VARCHAR(255),
|
98 |
+
product_weight_g INT,
|
99 |
+
product_length_cm INT,
|
100 |
+
product_height_cm INT,
|
101 |
+
product_width_cm INT
|
102 |
+
);
|
103 |
+
```
|
static/default_questions.txt
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
These questions are generated by ChatGpt 4o. Copy and paste the questions in the ChatBox - run it - get the results - compare the results yourself.
|
2 |
+
> You can also upload your own files, to get your schemas. You can then use these schemas to cross check your Answer by ChatGpt 4os with any bigger LLM models. You can cross-check directly with our - Manual SQL Executer😊.
|
3 |
+
- Ask Questions
|
4 |
+
- Run Queries: automatic + manual
|
5 |
+
- Download Results
|
6 |
+
|
7 |
+
### Easy Questions
|
8 |
+
|
9 |
+
1.
|
10 |
+
**Question:**
|
11 |
+
```
|
12 |
+
Retrieve all customer IDs and their corresponding cities from the `customers` table.
|
13 |
+
```
|
14 |
+
**Fine-Tuned Model Results:**
|
15 |
+
✅ **Pass**
|
16 |
+
**Answer by ChatGpt 4o:**
|
17 |
+
```sql
|
18 |
+
SELECT customer_id, customer_city FROM customers;
|
19 |
+
```
|
20 |
+
|
21 |
+
---
|
22 |
+
|
23 |
+
2.
|
24 |
+
**Question:**
|
25 |
+
```
|
26 |
+
List all products along with their category names from the `products` table.
|
27 |
+
```
|
28 |
+
**Fine-Tuned Model Results:**
|
29 |
+
✅ **Pass**
|
30 |
+
**Answer by ChatGpt 4o:**
|
31 |
+
```sql
|
32 |
+
SELECT product_id, product_category_name FROM products;
|
33 |
+
```
|
34 |
+
|
35 |
+
---
|
36 |
+
|
37 |
+
3.
|
38 |
+
**Question:**
|
39 |
+
```
|
40 |
+
Fetch the order IDs and their purchase timestamps from the `orders` table.
|
41 |
+
```
|
42 |
+
**Fine-Tuned Model Results:**
|
43 |
+
✅ **Pass**
|
44 |
+
**Answer by ChatGpt 4o:**
|
45 |
+
```sql
|
46 |
+
SELECT order_id, order_purchase_timestamp FROM orders;
|
47 |
+
```
|
48 |
+
|
49 |
+
---
|
50 |
+
|
51 |
+
4.
|
52 |
+
**Question:**
|
53 |
+
```
|
54 |
+
Display the distinct payment types available in the `payments` table.
|
55 |
+
```
|
56 |
+
**Fine-Tuned Model Results:**
|
57 |
+
✅ **Pass**
|
58 |
+
**Answer by ChatGpt 4o:**
|
59 |
+
```sql
|
60 |
+
SELECT DISTINCT payment_type FROM payments;
|
61 |
+
```
|
62 |
+
|
63 |
+
---
|
64 |
+
|
65 |
+
5.
|
66 |
+
**Question:**
|
67 |
+
```
|
68 |
+
Find the total number of rows in the `customers` table.
|
69 |
+
```
|
70 |
+
**Fine-Tuned Model Results:**
|
71 |
+
✅ **Pass**
|
72 |
+
**Answer by ChatGpt 4o:**
|
73 |
+
```sql
|
74 |
+
SELECT COUNT(*) AS total_customers FROM customers;
|
75 |
+
```
|
76 |
+
|
77 |
+
---
|
78 |
+
|
79 |
+
### Medium Questions
|
80 |
+
|
81 |
+
1.
|
82 |
+
**Question:**
|
83 |
+
```
|
84 |
+
Retrieve the total payment value for each order from the `payments` table, grouped by `order_id`.
|
85 |
+
```
|
86 |
+
**Fine-Tuned Model Results:**
|
87 |
+
✅ **Pass**
|
88 |
+
**Answer by ChatGpt 4o:**
|
89 |
+
```sql
|
90 |
+
SELECT order_id, SUM(payment_value) AS total_payment
|
91 |
+
FROM payments
|
92 |
+
GROUP BY order_id;
|
93 |
+
```
|
94 |
+
|
95 |
+
---
|
96 |
+
|
97 |
+
2.
|
98 |
+
**Question:**
|
99 |
+
```
|
100 |
+
Find all orders where the total shipping charges (sum of `shipping_charges`) exceed 100.
|
101 |
+
```
|
102 |
+
**Fine-Tuned Model Results:**
|
103 |
+
❌ **Fail**
|
104 |
+
**Answer by ChatGpt 4o:**
|
105 |
+
```sql
|
106 |
+
SELECT order_id
|
107 |
+
FROM order_items
|
108 |
+
GROUP BY order_id
|
109 |
+
HAVING SUM(shipping_charges) > 100;
|
110 |
+
```
|
111 |
+
**Issue:** Missing validation for null or non-existent data.
|
112 |
+
|
113 |
+
---
|
114 |
+
|
115 |
+
3.
|
116 |
+
**Question:**
|
117 |
+
```
|
118 |
+
List the names of cities and the number of customers in each city, sorted in descending order of the number of customers.
|
119 |
+
```
|
120 |
+
**Fine-Tuned Model Results:**
|
121 |
+
✅ **Pass**
|
122 |
+
**Answer by ChatGpt 4o:**
|
123 |
+
```sql
|
124 |
+
SELECT customer_city, COUNT(*) AS customer_count
|
125 |
+
FROM customers
|
126 |
+
GROUP BY customer_city
|
127 |
+
ORDER BY customer_count DESC;
|
128 |
+
```
|
129 |
+
|
130 |
+
---
|
131 |
+
|
132 |
+
### Hard Questions
|
133 |
+
|
134 |
+
1.
|
135 |
+
**Question:**
|
136 |
+
```
|
137 |
+
Write a Query to find the total revenue (sum of `price` + `shipping_charges`) generated for each product category in the `order_items` table, joined with the `products` table.
|
138 |
+
```
|
139 |
+
**Fine-Tuned Model Results:**
|
140 |
+
✅ **Pass**
|
141 |
+
**Answer by ChatGpt 4o:**
|
142 |
+
```sql
|
143 |
+
SELECT
|
144 |
+
p.product_category_name,
|
145 |
+
SUM(o.price + o.shipping_charges) AS total_revenue
|
146 |
+
FROM order_items o
|
147 |
+
JOIN products p ON o.product_id = p.product_id
|
148 |
+
GROUP BY p.product_category_name
|
149 |
+
ORDER BY total_revenue DESC;
|
150 |
+
```
|
151 |
+
|
152 |
+
---
|
153 |
+
|
154 |
+
2.
|
155 |
+
**Question:**
|
156 |
+
```
|
157 |
+
Write a Query to identify the top 5 products with the highest total sales value ( sum of `price` ) across all orders.
|
158 |
+
```
|
159 |
+
**Fine-Tuned Model Results:**
|
160 |
+
❌ **Fail**
|
161 |
+
**Answer by ChatGpt 4o:**
|
162 |
+
```sql
|
163 |
+
SELECT
|
164 |
+
product_id,
|
165 |
+
SUM(price) AS total_sales
|
166 |
+
FROM order_items
|
167 |
+
GROUP BY product_id
|
168 |
+
ORDER BY total_sales DESC
|
169 |
+
LIMIT 5;
|
170 |
+
```
|
171 |
+
**Issue:** Misalignment with finer-grained filters or lack of handling for tied ranks.
|
172 |
+
|
173 |
+
---
|
static/df_Customers.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
static/df_OrderItems.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
static/df_Orders.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
static/df_Payments.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
static/df_Products.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
utils/__init__.py
ADDED
File without changes
|
utils/handle_sql_commands.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# handle_sql_commands.py
|
2 |
+
import duckdb
|
3 |
+
import pandas as pd
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
|
7 |
+
def execute_sql_duckdb(sql_command: str, dataframes: dict) -> pd.DataFrame or None:
|
8 |
+
try:
|
9 |
+
con = duckdb.connect(database=":memory:", read_only=False)
|
10 |
+
for df_name, df in dataframes.items():
|
11 |
+
con.register(df_name, df)
|
12 |
+
result_df = con.execute(sql_command).fetchdf()
|
13 |
+
con.close()
|
14 |
+
return result_df
|
15 |
+
except duckdb.Error as e:
|
16 |
+
st.error(f"DuckDB Error: {e}")
|
17 |
+
return None
|
18 |
+
|
utils/llm_logic.py
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# llm_logic.py
|
2 |
+
# from langchain_ollama import ChatOllama
|
3 |
+
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
|
4 |
+
import streamlit as st
|
5 |
+
import multiprocessing
|
6 |
+
from langchain_community.chat_models import ChatLlamaCpp
|
7 |
+
|
8 |
+
local_model = "qwen2.5-coder-3b-instruct-q4_k_m.gguf"
|
9 |
+
|
10 |
+
stop = [
|
11 |
+
"<|image_pad|>",
|
12 |
+
"<|endoftext|>",
|
13 |
+
"<|quad_end|>",
|
14 |
+
"<|object_ref_end|>",
|
15 |
+
"<|object_ref_start|>",
|
16 |
+
"<|file_sep|>",
|
17 |
+
"<|repo_name|>",
|
18 |
+
"<|PAD_TOKEN|>",
|
19 |
+
"<|quad_start|>",
|
20 |
+
"<|box_start|>",
|
21 |
+
"<|box_end|>",
|
22 |
+
"<|im_start|>",
|
23 |
+
"</tool_call>",
|
24 |
+
"<|video_pad|>",
|
25 |
+
"<tool_call>",
|
26 |
+
"<|im_end|>",
|
27 |
+
"<|vision_",
|
28 |
+
"<|fim_",
|
29 |
+
]
|
30 |
+
|
31 |
+
|
32 |
+
def get_llm():
|
33 |
+
cache_llm = ChatLlamaCpp(
|
34 |
+
temperature=0.0,
|
35 |
+
model_path=local_model,
|
36 |
+
n_ctx=10000,
|
37 |
+
n_gpu_layers=0,
|
38 |
+
n_batch=1024,
|
39 |
+
max_tokens=500,
|
40 |
+
n_threads=multiprocessing.cpu_count() - 1,
|
41 |
+
top_p=0.97,
|
42 |
+
verbose=False,
|
43 |
+
stop=stop,
|
44 |
+
)
|
45 |
+
return cache_llm
|
46 |
+
|
47 |
+
|
48 |
+
llm = get_llm()
|
49 |
+
|
50 |
+
|
51 |
+
db_schema = """### **customers**
|
52 |
+
| | customer_id | customer_zip_code_prefix | customer_city | customer_state |
|
53 |
+
|------:|:--------------|---------------------------:|:----------------|:-----------------|
|
54 |
+
| 21921 | 0tgYlOTGgpO6 | 79230 | russas | CE |
|
55 |
+
| 9748 | jGhRQF3CIew4 | 81460 | joao monlevade | MG |
|
56 |
+
| 22679 | 1UutQTIhBvcP | 94480 | pelotas | RS |
|
57 |
+
|
58 |
+
Rows: 38279, Columns: 4
|
59 |
+
|
60 |
+
---
|
61 |
+
|
62 |
+
### **order_items**
|
63 |
+
| | order_id | product_id | seller_id | price | shipping_charges |
|
64 |
+
|------:|:-------------|:-------------|:-------------|--------:|-------------------:|
|
65 |
+
| 19729 | PDEzZdebLSn3 | aBpYjaBcwz6e | bzfcwRPnZzVO | 55.83 | 27.8 |
|
66 |
+
| 6001 | R7bIPjjYqlHP | ZM2JJXV5m9hl | Ivbw25fb5t2Z | 100 | 42.05 |
|
67 |
+
| 282 | Biqo21nETaMO | XqmdGKRbTetH | P2nCHWuo0HC0 | 113.49 | 91.32 |
|
68 |
+
|
69 |
+
Rows: 38279, Columns: 5
|
70 |
+
|
71 |
+
---
|
72 |
+
|
73 |
+
### **orders**
|
74 |
+
|
75 |
+
| | order_id | customer_id | order_purchase_timestamp | order_approved_at |
|
76 |
+
|------:|:-------------|:--------------|:---------------------------|:--------------------|
|
77 |
+
| 7294 | PMqwQc01iDTJ | c9ueC6k6V5WS | 2018-06-19 21:23:48 | 2018-06-20 08:38:30 |
|
78 |
+
| 13800 | P4l8R2Qat5n7 | ovKkGaXi5TmN | 2018-01-05 08:26:03 | 2018-01-05 08:47:20 |
|
79 |
+
| 17679 | NxIseZjAQCdC | o9qzmUQVJOxA | 2018-01-28 23:46:53 | 2018-01-28 23:58:31 |
|
80 |
+
|
81 |
+
Rows: 38279, Columns: 4
|
82 |
+
|
83 |
+
---
|
84 |
+
|
85 |
+
### **payments**
|
86 |
+
| | order_id | payment_sequential | payment_type | payment_installments | payment_value |
|
87 |
+
|------:|:-------------|---------------------:|:---------------|-----------------------:|----------------:|
|
88 |
+
| 35526 | cQXl0pQtiMad | 1 | wallet | 1 | 172.58 |
|
89 |
+
| 35799 | olImD2k316Gz | 1 | credit_card | 3 | 16.78 |
|
90 |
+
| 13278 | G9MJYXXtPZSz | 1 | credit_card | 10 | 221.86 |
|
91 |
+
|
92 |
+
Rows: 38279, Columns: 5
|
93 |
+
|
94 |
+
---
|
95 |
+
|
96 |
+
### **products**
|
97 |
+
| | product_id | product_category_name | product_weight_g | product_length_cm | product_height_cm | product_width_cm |
|
98 |
+
|------:|:-------------|:------------------------|-------------------:|--------------------:|--------------------:|-------------------:|
|
99 |
+
| 18191 | hpiXwRzTkhkL | bed_bath_table | 1150 | 40 | 9 | 50 |
|
100 |
+
| 2202 | iPoRkE7dkmlc | toys | 15800 | 38 | 62 | 57 |
|
101 |
+
| 27442 | hrjNaMt3Wyo5 | toys | 1850 | 37 | 22 | 40 |
|
102 |
+
|
103 |
+
Rows: 38279, Columns: 6
|
104 |
+
"""
|
105 |
+
|
106 |
+
# Improved SQL generation prompt
|
107 |
+
sql_system_prompt = """You are a highly skilled natural language to SQL translator. Your goal is to generate accurate SQL queries based on the provided database schema. You must only return the SQL query and no other text or explanations.
|
108 |
+
|
109 |
+
DATABASE SCHEMA:
|
110 |
+
{db_schema}
|
111 |
+
"""
|
112 |
+
sql_chat_template = """
|
113 |
+
|
114 |
+
Translate the following natural language question into an accurate SQL query. Return only the SQL query.
|
115 |
+
|
116 |
+
QUESTION: {question}
|
117 |
+
|
118 |
+
### assistant:
|
119 |
+
|
120 |
+
"""
|
121 |
+
|
122 |
+
# Improved prompt for classifying the question
|
123 |
+
classification_system_prompt = """You are an expert at classifying user questions as requiring a SQL query or being generic based on the provided database schema. Your response should be ONLY 'SQL' or 'GENERIC'.
|
124 |
+
|
125 |
+
A question requires a SQL query if it asks for specific data that can be retrieved from the tables in the schema. A question is generic if it asks for explanations, definitions, or information not directly retrievable through a SQL query on the given schema.
|
126 |
+
|
127 |
+
Consider the following database schema:
|
128 |
+
{db_schema}
|
129 |
+
|
130 |
+
Here are some examples:
|
131 |
+
|
132 |
+
Question: What are the names of all customers?
|
133 |
+
Response: SQL
|
134 |
+
|
135 |
+
Question: Tell me about the sales table.
|
136 |
+
Response: GENERIC
|
137 |
+
|
138 |
+
Question: How much did product 'Product A' sell for?
|
139 |
+
Response: SQL
|
140 |
+
|
141 |
+
Question: What is a primary key?
|
142 |
+
Response: GENERIC
|
143 |
+
"""
|
144 |
+
classification_chat_template = """
|
145 |
+
|
146 |
+
Determine if the following question requires a SQL query based on the database schema. Respond with 'SQL' or 'GENERIC'.
|
147 |
+
|
148 |
+
QUESTION: {question}
|
149 |
+
|
150 |
+
### assistant:
|
151 |
+
"""
|
152 |
+
|
153 |
+
|
154 |
+
def classify_question(question: str, use_default_schema: bool = True):
|
155 |
+
classification_system_prompt_local = classification_system_prompt # Initialize here
|
156 |
+
if use_default_schema:
|
157 |
+
classification_system_prompt_local = classification_system_prompt_local.format(
|
158 |
+
db_schema=db_schema
|
159 |
+
)
|
160 |
+
else:
|
161 |
+
uploaded_schema = st.session_state.uploaded_df_schema
|
162 |
+
classification_system_prompt_local = classification_system_prompt_local.format(
|
163 |
+
db_schema=uploaded_schema
|
164 |
+
)
|
165 |
+
classification_messages = [
|
166 |
+
SystemMessage(content=classification_system_prompt_local),
|
167 |
+
HumanMessage(content=classification_chat_template.format(question=question)),
|
168 |
+
]
|
169 |
+
response = llm.invoke(classification_messages)
|
170 |
+
return response.content.strip().upper()
|
171 |
+
|
172 |
+
|
173 |
+
def generate_llm_response(prompt: str, use_default_schema: bool = True):
|
174 |
+
question_type = classify_question(prompt, use_default_schema)
|
175 |
+
chosen_schema = None
|
176 |
+
if use_default_schema:
|
177 |
+
chosen_schema = db_schema
|
178 |
+
sql_system_prompt_local = sql_system_prompt.format(db_schema=chosen_schema)
|
179 |
+
else:
|
180 |
+
uploaded_schema = st.session_state.uploaded_df_schema
|
181 |
+
chosen_schema = uploaded_schema
|
182 |
+
sql_system_prompt_local = sql_system_prompt.format(db_schema=chosen_schema)
|
183 |
+
|
184 |
+
# Retrieve the chat history from the session state
|
185 |
+
chat_history = st.session_state.get("chat_history", [])
|
186 |
+
|
187 |
+
if "SQL" in question_type:
|
188 |
+
print("SQL question detected")
|
189 |
+
st.toast("Detected Task: SQL Query Generation", icon="🚨")
|
190 |
+
formatted_prompt = sql_chat_template.format(question=prompt)
|
191 |
+
|
192 |
+
# Create the messages list, including the system prompt and the chat history
|
193 |
+
messages_for_llm = [SystemMessage(content=sql_system_prompt_local)]
|
194 |
+
for message in chat_history:
|
195 |
+
if isinstance(message, HumanMessage):
|
196 |
+
messages_for_llm.append(HumanMessage(content=message.content))
|
197 |
+
elif isinstance(message, AIMessage):
|
198 |
+
# Only include the assistant's text response, not the additional kwargs
|
199 |
+
messages_for_llm.append(AIMessage(content=message.content))
|
200 |
+
messages_for_llm.append(HumanMessage(content=formatted_prompt))
|
201 |
+
|
202 |
+
full_response = ""
|
203 |
+
for chunk in llm.stream(messages_for_llm):
|
204 |
+
full_response += chunk.content
|
205 |
+
yield f"<sql>\n```sql\n{full_response.strip()}\n```\n</sql>"
|
206 |
+
elif "GENERIC" in question_type:
|
207 |
+
print("Generic question detected")
|
208 |
+
st.toast("Detected Task: Generic QA", icon="🚨")
|
209 |
+
generic_prompt = f"Answer the following question related to SQL or coding:\n\nQUESTION: {prompt}\n\n### assistant:"
|
210 |
+
|
211 |
+
# Create the messages list, including the system prompt and the chat history
|
212 |
+
messages_for_generic = [
|
213 |
+
SystemMessage(
|
214 |
+
content=f"You are a helpful assistant finetuned from Qwen2.5-coder:3B-Instruct for answering questions about SQL.\nYou have a database with the Database Schema:\n{chosen_schema}.\n"
|
215 |
+
)
|
216 |
+
]
|
217 |
+
for message in chat_history:
|
218 |
+
if isinstance(message, HumanMessage):
|
219 |
+
messages_for_generic.append(HumanMessage(content=message.content))
|
220 |
+
elif isinstance(message, AIMessage):
|
221 |
+
# Only include the assistant's text response, not the additional kwargs
|
222 |
+
messages_for_generic.append(AIMessage(content=message.content))
|
223 |
+
messages_for_generic.append(HumanMessage(content=generic_prompt))
|
224 |
+
|
225 |
+
generic_response = ""
|
226 |
+
for chunk in llm.stream(messages_for_generic):
|
227 |
+
generic_response += chunk.content
|
228 |
+
yield generic_response
|
229 |
+
else:
|
230 |
+
yield "I am sorry, I am small language model fine-tuned specifically to answer questions that can be solved using SQL. I won't be able to answer this question."
|
utils/sql_utils.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
def extract_sql_command(text):
|
6 |
+
"""
|
7 |
+
Extracts the SQL command enclosed within ```sql ``` delimiters from a given string.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
text: The input string containing the SQL command.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
The extracted SQL command as a string, or None if no SQL command is found.
|
14 |
+
"""
|
15 |
+
pattern = r"```sql\s*([\s\S]*?)\s*```"
|
16 |
+
match = re.search(pattern, text)
|
17 |
+
if match:
|
18 |
+
return match.group(1).strip()
|
19 |
+
else:
|
20 |
+
return None
|
21 |
+
|
22 |
+
def create_schema(dataframes):
|
23 |
+
schema = ""
|
24 |
+
for df_name, df in dataframes.items():
|
25 |
+
schema += f"### {df_name}\n"
|
26 |
+
schema += df.head(3).to_markdown(index=False)
|
27 |
+
schema += "\n\nRows: " + str(df.shape[0]) + ", Columns: " + str(df.shape[1]) + "\n\n---\n\n"
|
28 |
+
return schema
|
29 |
+
|
30 |
+
|
31 |
+
@st.cache_resource
|
32 |
+
def load_defaultdb_schema_text():
|
33 |
+
with open("static/database_scema.txt", "r", encoding="utf-8") as file:
|
34 |
+
return file.read()
|
35 |
+
|
36 |
+
|
37 |
+
@st.cache_resource
|
38 |
+
def load_defaultdb_queries():
|
39 |
+
with open("static/default_questions.txt", "r", encoding="utf-8") as file:
|
40 |
+
return file.read()
|
41 |
+
|
42 |
+
|
43 |
+
@st.cache_data
|
44 |
+
def convert_df(df):
|
45 |
+
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
46 |
+
return df.to_csv().encode("utf-8")
|
47 |
+
|
48 |
+
|
49 |
+
# Load CSV files into pandas default_dfs
|
50 |
+
@st.cache_resource
|
51 |
+
def load_data():
|
52 |
+
# text-to-sql-streamlit\static\df_Customers.csv
|
53 |
+
df_customers = pd.read_csv("static/df_Customers.csv")
|
54 |
+
df_order_items = pd.read_csv("static/df_OrderItems.csv")
|
55 |
+
df_orders = pd.read_csv("static/df_Orders.csv")
|
56 |
+
df_payments = pd.read_csv("static/df_Payments.csv")
|
57 |
+
df_products = pd.read_csv("static/df_Products.csv")
|
58 |
+
return {
|
59 |
+
"customers": df_customers,
|
60 |
+
"order_items": df_order_items,
|
61 |
+
"orders": df_orders,
|
62 |
+
"payments": df_payments,
|
63 |
+
"products": df_products,
|
64 |
+
}
|
🤖SQL_Agent.py
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import streamlit as st
|
3 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
4 |
+
from utils.llm_logic import generate_llm_response
|
5 |
+
from utils.sql_utils import (
|
6 |
+
extract_sql_command,
|
7 |
+
load_defaultdb_schema_text,
|
8 |
+
load_defaultdb_queries,
|
9 |
+
load_data,
|
10 |
+
)
|
11 |
+
from utils.handle_sql_commands import execute_sql_duckdb
|
12 |
+
|
13 |
+
|
14 |
+
st.set_page_config(
|
15 |
+
page_title="Text-to-SQL Agent",
|
16 |
+
page_icon="🤖",
|
17 |
+
layout="wide",
|
18 |
+
initial_sidebar_state="expanded",
|
19 |
+
)
|
20 |
+
|
21 |
+
default_db_questions = {
|
22 |
+
"easy": [
|
23 |
+
"Retrieve all customer IDs and their corresponding cities from the `customers` table.",
|
24 |
+
"List all products along with their category names from the `products` table.",
|
25 |
+
"Fetch the order IDs and their purchase timestamps from the `orders` table.",
|
26 |
+
"Display the distinct payment types available in the `payments` table.",
|
27 |
+
"Find the total number of rows in the `customers` table.",
|
28 |
+
],
|
29 |
+
"medium": [
|
30 |
+
"Retrieve the total payment value for each order from the `payments` table, grouped by `order_id`.",
|
31 |
+
"Find all orders where the total shipping charges (sum of `shipping_charges`) exceed 100.",
|
32 |
+
"List the names of cities and the number of customers in each city, sorted in descending order of the number of customers.",
|
33 |
+
],
|
34 |
+
"hard": [
|
35 |
+
"Write a query to find the total revenue (sum of `price` + `shipping_charges`) generated for each product category in the `order_items` table, joined with the `products` table.",
|
36 |
+
"Identify the top 5 products with the highest total sales value (sum of `price`) across all orders.",
|
37 |
+
],
|
38 |
+
}
|
39 |
+
|
40 |
+
|
41 |
+
default_dfs = load_data()
|
42 |
+
selected_df = default_dfs
|
43 |
+
use_default_schema = True
|
44 |
+
|
45 |
+
st.markdown(
|
46 |
+
"""
|
47 |
+
<style>
|
48 |
+
/* Base styles for both themes */
|
49 |
+
.stPageLink {
|
50 |
+
background-image: linear-gradient(to right, #007BFF, #6610F2); /* Gradient background */
|
51 |
+
color: white !important; /* Ensure text is readable on the gradient */
|
52 |
+
padding: 12px 20px !important; /* Slightly larger padding */
|
53 |
+
border-radius: 8px !important; /* More rounded corners */
|
54 |
+
border: none !important; /* Remove default border */
|
55 |
+
text-decoration: none !important;
|
56 |
+
font-weight: 500 !important; /* Slightly lighter font weight */
|
57 |
+
transition: transform 0.2s ease-in-out, box-shadow 0.2s ease-in-out; /* Smooth transitions */
|
58 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.15); /* Subtle shadow for depth */
|
59 |
+
display: inline-flex;
|
60 |
+
align-items: center;
|
61 |
+
justify-content: center;
|
62 |
+
}
|
63 |
+
|
64 |
+
.stPageLink:hover {
|
65 |
+
transform: scale(1.03); /* Slight scale up on hover */
|
66 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); /* Increased shadow on hover */
|
67 |
+
}
|
68 |
+
|
69 |
+
.stPageLink span { /* Style the label text */
|
70 |
+
margin-left: 5px; /* Space between icon and text */
|
71 |
+
}
|
72 |
+
|
73 |
+
/* Dark theme adjustments (optional, if needed for better contrast) */
|
74 |
+
/* Consider using Streamlit's theme variables if possible for a more robust solution */
|
75 |
+
/* For simplicity, this example uses fixed colors that should work reasonably well */
|
76 |
+
/* [data-theme="dark"] .stPageLink {
|
77 |
+
}
|
78 |
+
|
79 |
+
[data-theme="dark"] .stPageLink:hover {
|
80 |
+
} */
|
81 |
+
</style>
|
82 |
+
""",
|
83 |
+
unsafe_allow_html=True,
|
84 |
+
)
|
85 |
+
|
86 |
+
with st.popover("Click here to see Database Schema", use_container_width=True):
|
87 |
+
uploaded_df_schema = st.session_state.get("uploaded_df_schema", False)
|
88 |
+
|
89 |
+
choice = st.segmented_control(
|
90 |
+
"Choose",
|
91 |
+
["Default DB", "Uploaded Files"],
|
92 |
+
label_visibility="collapsed",
|
93 |
+
disabled=uploaded_df_schema == False,
|
94 |
+
default="Default DB" if uploaded_df_schema == False else "Uploaded Files",
|
95 |
+
)
|
96 |
+
|
97 |
+
if uploaded_df_schema is False:
|
98 |
+
st.markdown(
|
99 |
+
"""> You can also upload your own files, to get your schemas. You can then use those schemas to cross-check our answers with ChatGpt/Gemini/Claude (Preferred if the Question is very Complex). You can run the queries directly with our Manual SQL Executer😊.
|
100 |
+
- Ask Questions
|
101 |
+
- Run Queries: automatic + manual
|
102 |
+
- Download Results """
|
103 |
+
)
|
104 |
+
st.page_link(
|
105 |
+
page="pages/3 📂File Upload for SQL.py",
|
106 |
+
label="Upload your own CSV or Excel files",
|
107 |
+
icon="📜",
|
108 |
+
)
|
109 |
+
schema = load_defaultdb_schema_text()
|
110 |
+
st.markdown(schema, unsafe_allow_html=True)
|
111 |
+
elif choice == "Default DB":
|
112 |
+
schema = load_defaultdb_schema_text()
|
113 |
+
st.markdown(schema, unsafe_allow_html=True)
|
114 |
+
else:
|
115 |
+
pretty_schema, markdown = st.tabs(["Schema", "Copy Schema in Markdown"])
|
116 |
+
with pretty_schema:
|
117 |
+
st.info(
|
118 |
+
"You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊",
|
119 |
+
icon="ℹ️",
|
120 |
+
)
|
121 |
+
st.markdown(uploaded_df_schema, unsafe_allow_html=True)
|
122 |
+
with markdown:
|
123 |
+
st.info(
|
124 |
+
"You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊",
|
125 |
+
icon="ℹ️",
|
126 |
+
)
|
127 |
+
st.markdown(f"```\n{uploaded_df_schema}\n```")
|
128 |
+
|
129 |
+
|
130 |
+
col1, col2 = st.columns([2, 1], vertical_alignment="bottom")
|
131 |
+
with col1:
|
132 |
+
st.header("Natural Language to SQL Query Agent🤖")
|
133 |
+
|
134 |
+
with col2:
|
135 |
+
st.caption("> ***Execute on the Go!*** 🚀 In-Built DuckDB Execution Engine")
|
136 |
+
|
137 |
+
st.caption(
|
138 |
+
"This is a Qwen2.5-Coder-3B model fine-tuned for SQL queries integrated with langchain for Agentic Workflow. To see the Fine-Tuning code - [click here](https://www.kaggle.com/code/debopamchowdhury/qwen-2-5coder-3b-instruct-finetuning)."
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
col1, col2 = st.columns([2, 1], vertical_alignment="bottom")
|
143 |
+
with col1:
|
144 |
+
# Button to refresh the conversation
|
145 |
+
if st.button("Start New Conversation", type="primary"):
|
146 |
+
st.session_state.chat_history = []
|
147 |
+
st.session_state.conversation_turns = 0
|
148 |
+
st.rerun()
|
149 |
+
with col2:
|
150 |
+
disabled_selection = True
|
151 |
+
if (
|
152 |
+
"uploaded_dataframes" in st.session_state
|
153 |
+
) and st.session_state.uploaded_dataframes:
|
154 |
+
disabled_selection = False
|
155 |
+
options = ["default_db", "uploaded_files"]
|
156 |
+
selected = st.segmented_control(
|
157 |
+
"Choose",
|
158 |
+
options,
|
159 |
+
selection_mode="single",
|
160 |
+
disabled=disabled_selection,
|
161 |
+
label_visibility="collapsed",
|
162 |
+
default="default_db" if disabled_selection else "uploaded_files",
|
163 |
+
)
|
164 |
+
if not disabled_selection:
|
165 |
+
if selected == "uploaded_files":
|
166 |
+
selected_df = st.session_state.uploaded_dataframes
|
167 |
+
print(selected_df)
|
168 |
+
use_default_schema = False
|
169 |
+
else:
|
170 |
+
selected_df = default_dfs
|
171 |
+
print(selected_df)
|
172 |
+
use_default_schema = True
|
173 |
+
if selected_df == default_dfs:
|
174 |
+
with st.popover("Default Database Queries 📚 - Trial"):
|
175 |
+
default_db_questions = load_defaultdb_queries()
|
176 |
+
st.markdown(default_db_questions)
|
177 |
+
|
178 |
+
# Initialize chat history in session state
|
179 |
+
if "chat_history" not in st.session_state:
|
180 |
+
st.session_state.chat_history = []
|
181 |
+
|
182 |
+
# Initialize conversation turn counter
|
183 |
+
if "conversation_turns" not in st.session_state:
|
184 |
+
st.session_state.conversation_turns = 0
|
185 |
+
|
186 |
+
# Set the maximum number of conversation turns
|
187 |
+
MAX_TURNS = 5
|
188 |
+
|
189 |
+
# Display existing chat messages
|
190 |
+
for message in st.session_state.chat_history:
|
191 |
+
with st.chat_message(message.type):
|
192 |
+
st.markdown(message.content)
|
193 |
+
if (
|
194 |
+
isinstance(message, AIMessage)
|
195 |
+
and "response_df" in message.additional_kwargs
|
196 |
+
and message.additional_kwargs["response_df"] is not None
|
197 |
+
and not message.additional_kwargs["response_df"].empty
|
198 |
+
):
|
199 |
+
with st.expander("View SQL-Query Execution Result"):
|
200 |
+
df = message.additional_kwargs["response_df"]
|
201 |
+
# download_csv = convert_df(df)
|
202 |
+
# st.download_button(
|
203 |
+
# label="Download data as CSV",
|
204 |
+
# data=download_csv,
|
205 |
+
# file_name="query_results.csv",
|
206 |
+
# mime="text/csv",
|
207 |
+
# )
|
208 |
+
# renderer = StreamlitRenderer(
|
209 |
+
# df,
|
210 |
+
# spec_io_mode="rw",
|
211 |
+
# default_tab="data",
|
212 |
+
# appearance="dark",
|
213 |
+
# kernel_computation=True,
|
214 |
+
# )
|
215 |
+
# renderer.explorer(default_tab="data")
|
216 |
+
st.dataframe(df)
|
217 |
+
st.info(f"Rows x Columns: {df.shape[0]} x {df.shape[1]}")
|
218 |
+
st.subheader("Data Description:")
|
219 |
+
st.markdown(df.describe().T.to_markdown())
|
220 |
+
st.subheader("Data Types:")
|
221 |
+
st.write(df.dtypes)
|
222 |
+
|
223 |
+
# Get user input only if the conversation turn limit is not reached
|
224 |
+
if st.session_state.conversation_turns < MAX_TURNS:
|
225 |
+
if prompt := st.chat_input("Ask me a SQL query question"):
|
226 |
+
# Add user message to chat history in session state
|
227 |
+
st.session_state.chat_history.append(HumanMessage(content=prompt))
|
228 |
+
# Display user message in chat
|
229 |
+
with st.chat_message("user"):
|
230 |
+
st.markdown(prompt)
|
231 |
+
|
232 |
+
duckdb_result = None
|
233 |
+
# Get assistant response with streaming
|
234 |
+
with st.chat_message("assistant"):
|
235 |
+
message_placeholder = st.empty()
|
236 |
+
full_response = ""
|
237 |
+
with st.spinner(
|
238 |
+
"I know it is taking a lot of time. To run the model I'm using `Free` small vCPUs provided by `HuggingFace Spaces` for deployment. Thank you so much for your patience😊"
|
239 |
+
):
|
240 |
+
for response_so_far in generate_llm_response(
|
241 |
+
prompt, use_default_schema
|
242 |
+
):
|
243 |
+
# Remove <sql> and </sql> tags for streaming display
|
244 |
+
streaming_response = response_so_far.replace("<sql>", "").replace(
|
245 |
+
"</sql>", ""
|
246 |
+
)
|
247 |
+
# Remove duplicate ```sql tags with or without space for streaming display
|
248 |
+
streaming_response = re.sub(
|
249 |
+
r"```sql\s*```sql", "```sql", streaming_response
|
250 |
+
)
|
251 |
+
message_placeholder.markdown(streaming_response + "▌")
|
252 |
+
full_response = response_so_far
|
253 |
+
|
254 |
+
# Remove <sql> and </sql> tags from the full response
|
255 |
+
full_response = full_response.replace("<sql>", "").replace("</sql>", "")
|
256 |
+
# Remove duplicate ```sql tags with or without space from the full response
|
257 |
+
full_response = re.sub(r"```sql\s*```sql", "```sql", full_response)
|
258 |
+
# Remove trailing duplicate ``` tags from the full response
|
259 |
+
full_response = re.sub(r"[\s\n]*`+$", "```", full_response)
|
260 |
+
message_placeholder.markdown(full_response)
|
261 |
+
# st.text(extract_sql_command(full_response))
|
262 |
+
|
263 |
+
sql_command = extract_sql_command(full_response)
|
264 |
+
# dataframe_html = None
|
265 |
+
if sql_command:
|
266 |
+
# st.text("Extracted SQL Command:")
|
267 |
+
# st.code(sql_command, language="sql")
|
268 |
+
duckdb_result = execute_sql_duckdb(sql_command, selected_df)
|
269 |
+
if duckdb_result is not None:
|
270 |
+
st.text("Query Execution Result:")
|
271 |
+
with st.expander("View Result"):
|
272 |
+
# st.dataframe(duckdb_result)
|
273 |
+
st.dataframe(duckdb_result)
|
274 |
+
st.info(
|
275 |
+
f"Rows x Columns: {duckdb_result.shape[0]} x {duckdb_result.shape[1]}"
|
276 |
+
)
|
277 |
+
st.subheader("Data Description:")
|
278 |
+
st.markdown(duckdb_result.describe().T.to_markdown())
|
279 |
+
st.subheader("Data Types:")
|
280 |
+
st.write(duckdb_result.dtypes)
|
281 |
+
# renderer = StreamlitRenderer(
|
282 |
+
# duckdb_result,
|
283 |
+
# spec_io_mode="rw",
|
284 |
+
# default_tab="data",
|
285 |
+
# appearance="dark",
|
286 |
+
# kernel_computation=True,
|
287 |
+
# )
|
288 |
+
# renderer.explorer(default_tab="data")
|
289 |
+
|
290 |
+
else:
|
291 |
+
# st.warning("No SQL command found in the response.")
|
292 |
+
pass
|
293 |
+
|
294 |
+
# Add assistant response to chat history in session state
|
295 |
+
st.session_state.chat_history.append(
|
296 |
+
AIMessage(
|
297 |
+
content=full_response,
|
298 |
+
additional_kwargs={"response_df": duckdb_result},
|
299 |
+
)
|
300 |
+
)
|
301 |
+
|
302 |
+
# Increment the conversation turn counter
|
303 |
+
st.session_state.conversation_turns += 1
|
304 |
+
else:
|
305 |
+
st.warning(
|
306 |
+
"Maximum number of questions reached. Please click 'Start New Conversation' to continue."
|
307 |
+
)
|
308 |
+
st.chat_input(
|
309 |
+
"Ask me a SQL query question", disabled=True
|
310 |
+
) # Disable the input field
|