arad1367 commited on
Commit
b0e246d
·
verified ·
1 Parent(s): aa01a23

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +153 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import io
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ from contextlib import redirect_stdout
6
+ from pejmanai_data_analysis.app import (
7
+ read_csv, data_description, data_preprocessing,
8
+ data_visualization, data_prediction, data_classification
9
+ )
10
+
11
+ # Function to capture printed output with error handling
12
+ def capture_output(func, *args, **kwargs):
13
+ f = io.StringIO()
14
+ try:
15
+ with redirect_stdout(f):
16
+ func(*args, **kwargs)
17
+ return f.getvalue()
18
+ except Exception as e:
19
+ return f"Error occurred: {str(e)}"
20
+
21
+ # Function to handle regression workflow with error handling
22
+ def regression_workflow(csv_file, x_column, y_column, target_column):
23
+ try:
24
+ # Capture data description output
25
+ data_desc = capture_output(data_description, csv_file.name)
26
+
27
+ # Step b) Data Preprocessing
28
+ df_preprocessed = data_preprocessing(csv_file.name)
29
+
30
+ # Step c) Data Visualization
31
+ if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
32
+ plt.figure(figsize=(16, 12))
33
+ data_visualization(csv_file.name, x_column, y_column)
34
+ visualization_output = plt.gcf()
35
+ else:
36
+ plt.figure()
37
+ plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
38
+ visualization_output = plt.gcf()
39
+
40
+ # Capture regression output
41
+ regression_output = capture_output(data_prediction, csv_file.name, target_column)
42
+
43
+ return data_desc, df_preprocessed, visualization_output, regression_output
44
+ except Exception as e:
45
+ return f"Error occurred during regression workflow: {str(e)}", None, None, None
46
+
47
+ # Function to handle classification workflow with error handling
48
+ def classification_workflow(csv_file, x_column, y_column, target_column):
49
+ try:
50
+ # Capture data description output
51
+ data_desc = capture_output(data_description, csv_file.name)
52
+
53
+ # Step b) Data Preprocessing
54
+ df_preprocessed = data_preprocessing(csv_file.name)
55
+
56
+ # Step c) Data Visualization
57
+ if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
58
+ plt.figure(figsize=(16, 12))
59
+ data_visualization(csv_file.name, x_column, y_column)
60
+ visualization_output = plt.gcf()
61
+ else:
62
+ plt.figure()
63
+ plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
64
+ visualization_output = plt.gcf()
65
+
66
+ # Capture classification output
67
+ classification_output = capture_output(data_classification, csv_file.name, target_column)
68
+
69
+ return data_desc, df_preprocessed, visualization_output, classification_output
70
+ except Exception as e:
71
+ return f"Error occurred during classification workflow: {str(e)}", None, None, None
72
+
73
+ # Main Gradio interface function with error handling
74
+ def gradio_interface(option, csv_file, x_column, y_column, target_column):
75
+ if option == "Regression Problem":
76
+ return regression_workflow(csv_file, x_column, y_column, target_column)
77
+ elif option == "Classification Problem":
78
+ return classification_workflow(csv_file, x_column, y_column, target_column)
79
+
80
+ # Reset function to clear outputs
81
+ def reset_all():
82
+ return "", None, None, ""
83
+
84
+ # Explanation text
85
+ explanation = """
86
+ ### PejmanAI Data Analysis Tool
87
+
88
+ This app uses the `pejmanai_data_analysis` package, available on [PyPI](https://pypi.org/project/pejmanai-data-analysis/).
89
+ The GitHub repository for the project is available [here](https://github.com/arad1367/pejmanai_data_analysis_pypi_package).
90
+
91
+ **About the app:**
92
+ - In the visualization part, you must use two numerical columns. If you select string columns, you will not see any output.
93
+ - The target column is the dependent variable on which you want to make predictions.
94
+ - Due to the nature of the `pejmanai_data_analysis` package, the data description and model output are shown in a captured format (this will be addressed in the next version).
95
+ """
96
+
97
+ # Footer HTML
98
+ footer = """
99
+ <div style="text-align: center; margin-top: 20px;">
100
+ <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
101
+ <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
102
+ <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
103
+ <br>
104
+ Made with 💖 by Pejman Ebrahimi
105
+ </div>
106
+ """
107
+
108
+ # Set up the Gradio interface with UI adjustments
109
+ with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as interface:
110
+ gr.Markdown(explanation)
111
+
112
+ with gr.Row():
113
+ problem_type = gr.Radio(["Regression Problem", "Classification Problem"], label="Select Problem Type")
114
+ with gr.Row():
115
+ csv_file = gr.File(label="Upload CSV File")
116
+ with gr.Row():
117
+ x_column = gr.Textbox(label="Enter X Column for Visualization")
118
+ with gr.Row():
119
+ y_column = gr.Textbox(label="Enter Y Column for Visualization")
120
+ with gr.Row():
121
+ target_column = gr.Textbox(label="Enter Target Column for Model Training")
122
+
123
+ with gr.Row():
124
+ submit_button = gr.Button("Run Analysis")
125
+
126
+ with gr.Row():
127
+ data_desc_output = gr.Textbox(label="Data Description", lines=20, placeholder="Data Description Output")
128
+ with gr.Row():
129
+ df_preprocessed_output = gr.Dataframe(label="Data Preprocessing Output")
130
+ with gr.Row():
131
+ visualization_output = gr.Plot(label="Data Visualization Output")
132
+ with gr.Row():
133
+ model_output = gr.Textbox(label="Model Output", lines=20, placeholder="Model Output")
134
+
135
+ with gr.Row():
136
+ reset_button = gr.Button("Reset Outputs")
137
+
138
+ reset_button.click(
139
+ fn=reset_all,
140
+ inputs=[],
141
+ outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
142
+ )
143
+
144
+ submit_button.click(
145
+ fn=gradio_interface,
146
+ inputs=[problem_type, csv_file, x_column, y_column, target_column],
147
+ outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
148
+ )
149
+
150
+ gr.HTML(footer)
151
+
152
+ # Launch the Gradio interface
153
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ contextlib
3
+ pejmanai_data_analysis