Shreneek commited on
Commit
28545e3
Β·
verified Β·
1 Parent(s): fe6105e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -0
app.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from ydata_profiling import ProfileReport
7
+ import json
8
+ import os
9
+ from langchain.llms import HuggingFaceHub
10
+ from langchain.chains import LLMChain
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain.tools.python.tool import PythonAstREPLTool
14
+ from langchain.agents import AgentExecutor, create_react_agent
15
+ from langchain_experimental.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
16
+ from langchain.agents.agent_types import AgentType
17
+
18
+ # Set page configuration
19
+ st.set_page_config(page_title="Interactive Data Profiler & Chat", layout="wide", page_icon="πŸ“Š")
20
+
21
+ # Create session states for DataFrame and chat history if they don't exist
22
+ if 'df' not in st.session_state:
23
+ st.session_state.df = None
24
+ if 'chat_history' not in st.session_state:
25
+ st.session_state.chat_history = []
26
+ if 'suggestions' not in st.session_state:
27
+ st.session_state.suggestions = []
28
+
29
+ # Initialize Hugging Face API
30
+ def get_llm():
31
+ # Using a small but capable open-source model
32
+ llm = HuggingFaceHub(
33
+ repo_id="google/flan-t5-large",
34
+ model_kwargs={"temperature": 0.1, "max_length": 512},
35
+ huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN", "")
36
+ )
37
+ return llm
38
+
39
+ # Function to generate report
40
+ def generate_profile_report(df):
41
+ with st.spinner("Generating profile report..."):
42
+ profile = ProfileReport(df,
43
+ title="Profiling Report",
44
+ explorative=True,
45
+ minimal=True) # Minimal for faster processing
46
+ return profile
47
+
48
+ # Function to generate query suggestions
49
+ def generate_suggestions(df):
50
+ # Get basic info about the dataframe
51
+ num_rows = df.shape[0]
52
+ num_cols = df.shape[1]
53
+ column_names = df.columns.tolist()
54
+ data_types = df.dtypes.astype(str).tolist()
55
+
56
+ # Sample suggestions based on dataframe structure
57
+ suggestions = [
58
+ f"How many rows are in this dataset?",
59
+ f"What are all the column names?",
60
+ f"Show me the first 5 rows",
61
+ f"What is the average of {column_names[0] if len(column_names) > 0 else 'column'}"
62
+ ]
63
+
64
+ # Add column-specific suggestions
65
+ for col, dtype in zip(column_names[:min(3, len(column_names))], data_types[:min(3, len(data_types))]):
66
+ if 'int' in dtype or 'float' in dtype:
67
+ suggestions.append(f"What is the mean value of {col}?")
68
+ suggestions.append(f"What is the maximum value of {col}?")
69
+ elif 'object' in dtype or 'str' in dtype:
70
+ suggestions.append(f"What are the unique values in {col}?")
71
+ suggestions.append(f"How many missing values in {col}?")
72
+
73
+ return suggestions
74
+
75
+ # Function to execute pandas operations safely
76
+ def execute_pandas_query(df, query):
77
+ try:
78
+ # Create pandas agent
79
+ agent = create_pandas_dataframe_agent(
80
+ llm=get_llm(),
81
+ df=df,
82
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
83
+ verbose=True
84
+ )
85
+
86
+ # Execute query
87
+ result = agent.run(query)
88
+ return result
89
+ except Exception as e:
90
+ # Fallback to basic operations if agent fails
91
+ if "rows" in query.lower() and "how many" in query.lower():
92
+ return f"The dataset has {df.shape[0]} rows."
93
+ elif "columns" in query.lower() and "how many" in query.lower():
94
+ return f"The dataset has {df.shape[1]} columns."
95
+ elif "column names" in query.lower():
96
+ return f"The column names are: {', '.join(df.columns.tolist())}"
97
+ elif "first" in query.lower() and "rows" in query.lower():
98
+ num = 5 # Default
99
+ for word in query.split():
100
+ if word.isdigit():
101
+ num = int(word)
102
+ break
103
+ return df.head(num).to_string()
104
+ elif "describe" in query.lower():
105
+ return df.describe().to_string()
106
+ else:
107
+ return f"I couldn't process that query. Error: {str(e)}"
108
+
109
+ # Main app header
110
+ st.title("πŸ” Interactive Data Profiler & Chat")
111
+ st.markdown("""
112
+ Upload your CSV file to get detailed profiling and ask questions about your data!
113
+ This app combines interactive data profiling with a chat interface for data exploration.
114
+ """)
115
+
116
+ # File uploader
117
+ uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
118
+
119
+ # Process uploaded file
120
+ if uploaded_file is not None:
121
+ try:
122
+ # Read CSV into DataFrame
123
+ df = pd.read_csv(uploaded_file)
124
+ st.session_state.df = df
125
+ st.success(f"βœ… File uploaded successfully! Found {df.shape[0]} rows and {df.shape[1]} columns.")
126
+
127
+ # Generate suggestions when a new file is uploaded
128
+ if len(st.session_state.suggestions) == 0:
129
+ st.session_state.suggestions = generate_suggestions(df)
130
+
131
+ # Create tabs for different functionalities
132
+ tab1, tab2 = st.tabs(["πŸ“Š Data Profiling", "πŸ’¬ Data Chat"])
133
+
134
+ # Tab 1: Data Profiling
135
+ with tab1:
136
+ st.header("Data Profiling")
137
+
138
+ # Basic info
139
+ col1, col2, col3 = st.columns(3)
140
+ with col1:
141
+ st.metric("Rows", df.shape[0])
142
+ with col2:
143
+ st.metric("Columns", df.shape[1])
144
+ with col3:
145
+ st.metric("Missing Values", df.isna().sum().sum())
146
+
147
+ # Show raw data sample
148
+ with st.expander("Preview Data"):
149
+ st.dataframe(df.head(10))
150
+
151
+ # Generate the profile report
152
+ profile = generate_profile_report(df)
153
+
154
+ # Convert report to HTML and display
155
+ report_html = profile.to_html()
156
+ st.components.v1.html(report_html, height=1000, scrolling=True)
157
+
158
+ # Provide download button
159
+ st.write("### Download the Profiling Report")
160
+ report_bytes = report_html.encode('utf-8')
161
+ st.download_button(
162
+ label="Download Report (HTML)",
163
+ data=report_bytes,
164
+ file_name="profiling_report.html",
165
+ mime="text/html"
166
+ )
167
+
168
+ # Tab 2: Interactive Chat
169
+ with tab2:
170
+ st.header("Chat with Your Data")
171
+ st.info("Ask questions about your data and get instant answers!")
172
+
173
+ # Chat input and suggested questions
174
+ user_question = st.text_input("Your question:", key="question_input")
175
+
176
+ # Show suggestion chips
177
+ st.write("Suggested questions:")
178
+ cols = st.columns(2)
179
+ for i, suggestion in enumerate(st.session_state.suggestions):
180
+ col_idx = i % 2
181
+ with cols[col_idx]:
182
+ if st.button(suggestion, key=f"suggestion_{i}"):
183
+ user_question = suggestion
184
+ st.session_state.question_input = suggestion
185
+ st.experimental_rerun()
186
+
187
+ # Process question
188
+ if user_question:
189
+ st.session_state.chat_history.append({"role": "user", "content": user_question})
190
+
191
+ # Get answer
192
+ with st.spinner("Thinking..."):
193
+ answer = execute_pandas_query(df, user_question)
194
+
195
+ # Add answer to chat history
196
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
197
+
198
+ # Display chat history
199
+ st.write("### Conversation History")
200
+ for message in st.session_state.chat_history:
201
+ if message["role"] == "user":
202
+ st.markdown(f"**You:** {message['content']}")
203
+ else:
204
+ st.markdown(f"**Assistant:** {message['content']}")
205
+ st.markdown("---")
206
+
207
+ # Clear chat button
208
+ if st.button("Clear Chat History"):
209
+ st.session_state.chat_history = []
210
+ st.experimental_rerun()
211
+
212
+ except Exception as e:
213
+ st.error(f"An error occurred: {str(e)}")
214
+ else:
215
+ st.info("πŸ‘† Please upload a CSV file to begin.")
216
+
217
+ # Placeholder visuals
218
+ st.markdown("### What you can do with this app:")
219
+ col1, col2 = st.columns(2)
220
+ with col1:
221
+ st.markdown("**πŸ“Š Data Profiling**")
222
+ st.markdown("- Automatic data quality assessment")
223
+ st.markdown("- Column statistics and distributions")
224
+ st.markdown("- Correlation analysis")
225
+ st.markdown("- Missing values analysis")
226
+ with col2:
227
+ st.markdown("**πŸ’¬ Interactive Data Chat**")
228
+ st.markdown("- Ask natural language questions")
229
+ st.markdown("- Get instant insights")
230
+ st.markdown("- Suggested questions for quick exploration")
231
+ st.markdown("- No coding required!")