raaraya commited on
Commit
dba8586
1 Parent(s): 6d0f80a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -0
app.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sklearn import datasets
3
+ from KNN import KNN_st
4
+ from SVC import SVC_st
5
+ from Logit import Logit_st
6
+ from Decision_tree import Decision_tree_st
7
+ from Random_forest import random_forest_st
8
+ from Naive_bayes import naive_bayes_st
9
+ from Ada_boost import ada_boost_st
10
+ from Linear_regression import linear_regression_st
11
+ from SVR import SVR_st
12
+ from Perceptron import perceptron_st
13
+ from k_mean_clustering import k_mean_clustering_st, plot
14
+ from PCA import PCA_st
15
+ from ICA import ICA_st
16
+ from Agglomerative_clustering import agglomerative_clustering_st
17
+ from LDA import LDA_st
18
+
19
+ st.write('''
20
+ # **Machine Learning**
21
+
22
+ Esta DEMO tiene por objetivo mostrar de manera did谩ctica algunos de los algoritmos
23
+ que m谩s frecuentemente se utilizan en **Machine Learning**. As铆, la biblioteca de
24
+ `sklearn` la podr铆amos separar en 2 grandes grupos, los cuales se encuentran demarcados
25
+ en funci贸n del objetivo que se pretende conseguir.
26
+
27
+ - **Supervised Learning**
28
+ - **Unsupervised Learning**
29
+
30
+ ''')
31
+
32
+ task = st.sidebar.selectbox('Tipo de algoritmo:', options=['Supervised Learning', 'Unsupervised Learning'])
33
+
34
+ # ----------------------------------------Supervised Learning-------------------------------
35
+ if task == 'Supervised Learning':
36
+ st.write('''
37
+ #
38
+ ## **Supervised Learning**
39
+
40
+ **Supervised learning** consiste en aprender sobre la relaci贸n entre dos conjuntos de datos:
41
+ - Las observaciones (X)
42
+ - La variable externa (y), sobre la cual generalmente se pretende predecir (target o label)
43
+
44
+ Todos los estimadores de la biblioteca de sklearn tiene implementado el m茅todo
45
+ `fit(X, y)` para ajustar el algoritmo a los datos y el m茅todo `predict(X)` para
46
+ etiquetar las observaciones X.
47
+
48
+ **Classification and regression**
49
+
50
+ Si la tarea sobre la predicci贸n consiste en clasificar las observaciones en
51
+ un numero finito de "etiquetas" (en otras palabras, nombrar el objeto mostrado),
52
+ entonces se dice que estamos hablando de una tarea de **Clasificaci贸n**.
53
+ Por otro lado, si la predicci贸n es sobre una variable continua, entonces estamos
54
+ hablando de una tarea de **Regresi贸n**.
55
+ ''')
56
+
57
+ type = st.sidebar.radio('Objetivo del algoritmo:', options=['Classification', 'Regression'])
58
+ if type == 'Classification':
59
+ dataset_selected = None
60
+
61
+ # Seleccionamos la base de datos (estas son para clcasificacion)
62
+ with st.expander('Base de datos'):
63
+ class_sets = ['iris', 'digits', 'breast cancer', 'wine']
64
+ dataset_name = st.selectbox('Escoja una base de datos', options=class_sets)
65
+ if dataset_name == 'iris':
66
+ dataset_selected = datasets.load_iris()
67
+ st.write(f'{dataset_selected.DESCR}')
68
+ elif dataset_name == 'digits':
69
+ dataset_selected = datasets.load_digits()
70
+ st.write(f'{dataset_selected.DESCR}')
71
+ elif dataset_name == 'breast cancer':
72
+ dataset_selected = datasets.load_breast_cancer()
73
+ st.write(f'{dataset_selected.DESCR}')
74
+ elif dataset_name == 'wine':
75
+ dataset_selected = datasets.load_wine()
76
+ st.write(f'{dataset_selected.DESCR}')
77
+
78
+
79
+ alg_selected = st.sidebar.selectbox('Algoritmo:', ['SVC (Support Vector Classification)',
80
+ 'KNN (K Nearest Neighborns)',
81
+ 'Logistic Regression',
82
+ 'Decision Tree',
83
+ 'Random Forest',
84
+ 'Naive Bayes',
85
+ 'Ada Boost'])
86
+ # seleccionar el algoritmo
87
+ if alg_selected == 'KNN (K Nearest Neighborns)': algorithm = KNN_st(dataset_selected)
88
+ elif alg_selected == 'SVC (Support Vector Classification)': algorithm = SVC_st(dataset_selected)
89
+ elif alg_selected == 'Logistic Regression': algorithm = Logit_st(dataset_selected)
90
+ elif alg_selected == 'Decision Tree': algorithm = Decision_tree_st(dataset_selected)
91
+ elif alg_selected == 'Random Forest': algorithm = random_forest_st(dataset_selected)
92
+ elif alg_selected == 'Naive Bayes': algorithm = naive_bayes_st(dataset_selected)
93
+ elif alg_selected == 'Ada Boost': algorithm = ada_boost_st(dataset_selected)
94
+
95
+
96
+ with st.expander('Explicacion del algoritmo'):
97
+ if alg_selected == 'KNN (K Nearest Neighborns)': algorithm.desc
98
+ elif alg_selected == 'SVC (Support Vector Classification)': algorithm.desc
99
+ elif alg_selected == 'Logistic Regression': algorithm.desc
100
+ elif alg_selected == 'Decision Tree': algorithm.desc
101
+ elif alg_selected == 'Random Forest': algorithm.desc
102
+ elif alg_selected == 'Naive Bayes': algorithm.desc
103
+ elif alg_selected == 'Ada Boost': algorithm.desc
104
+
105
+
106
+ with st.expander('Ajustes de parametros'):
107
+ if alg_selected == 'KNN (K Nearest Neighborns)': algorithm.params()
108
+ elif alg_selected == 'SVC (Support Vector Classification)': algorithm.params()
109
+ elif alg_selected == 'Logistic Regression': algorithm.params()
110
+ elif alg_selected == 'Decision Tree': algorithm.params()
111
+ elif alg_selected == 'Random Forest': algorithm.params()
112
+ elif alg_selected == 'Naive Bayes': pass
113
+ elif alg_selected == 'Ada Boost': algorithm.params()
114
+
115
+ with st.expander('Resultados'):
116
+ if alg_selected == 'KNN (K Nearest Neighborns)': algorithm.solve()
117
+ elif alg_selected == 'SVC (Support Vector Classification)': algorithm.solve()
118
+ elif alg_selected == 'Logistic Regression': algorithm.solve()
119
+ elif alg_selected == 'Decision Tree': algorithm.solve()
120
+ elif alg_selected == 'Random Forest': algorithm.solve()
121
+ elif alg_selected == 'Naive Bayes': algorithm.solve()
122
+ elif alg_selected == 'Ada Boost': algorithm.solve()
123
+
124
+ with st.expander('Visualizacion'):
125
+ c = st.container()
126
+ if alg_selected == 'KNN (K Nearest Neighborns)': c.pyplot(algorithm.visualization())
127
+ elif alg_selected == 'SVC (Support Vector Classification)': c.pyplot(algorithm.visualization())
128
+ elif alg_selected == 'Logistic Regression': c.pyplot(algorithm.visualization())
129
+ elif alg_selected == 'Decision Tree': c.pyplot(algorithm.visualization())
130
+ elif alg_selected == 'Random Forest': c.pyplot(algorithm.visualization())
131
+ elif alg_selected == 'Naive Bayes': c.pyplot(algorithm.visualization())
132
+ elif alg_selected == 'Ada Boost': c.pyplot(algorithm.visualization())
133
+
134
+
135
+
136
+ elif type == 'Regression':
137
+ dataset_selected = None
138
+
139
+ # Seleccionamos la base de datos (estas son para Regresiones)
140
+ with st.expander('Base de datos'):
141
+ class_sets = ['diabetes', 'boston']
142
+ dataset_name = st.selectbox('Escoja una base de datos', options=class_sets)
143
+ if dataset_name == 'diabetes':
144
+ dataset_selected = datasets.load_diabetes()
145
+ st.write(f'{dataset_selected.DESCR}')
146
+ elif dataset_name == 'boston':
147
+ dataset_selected = datasets.load_boston()
148
+ st.write(f'{dataset_selected.DESCR}')
149
+
150
+
151
+ alg_selected = st.sidebar.selectbox('Algoritmo:', ['Linear Regression',
152
+ 'SVR (Support Vector Regression)',
153
+ 'Perceptron'])
154
+ # seleccionar el algoritmo
155
+ if alg_selected == 'Linear Regression': algorithm = linear_regression_st(dataset_selected)
156
+ elif alg_selected == 'SVR (Support Vector Regression)': algorithm = SVR_st(dataset_selected)
157
+ elif alg_selected == 'Perceptron': algorithm = perceptron_st(dataset_selected)
158
+
159
+ with st.expander('Explicacion del algoritmo'):
160
+ if alg_selected == 'Linear Regression': algorithm.desc
161
+ elif alg_selected == 'SVR (Support Vector Regression)': algorithm.desc
162
+ elif alg_selected == 'Perceptron': algorithm.desc
163
+
164
+ with st.expander('Ajustes de parametros'):
165
+ if alg_selected == 'Linear Regression': pass
166
+ elif alg_selected == 'SVR (Support Vector Regression)': algorithm.params()
167
+ elif alg_selected == 'Perceptron': pass
168
+
169
+ with st.expander('Resultados'):
170
+ if alg_selected == 'Linear Regression': algorithm.solve()
171
+ elif alg_selected == 'SVR (Support Vector Regression)': algorithm.solve()
172
+ elif alg_selected == 'Perceptron': algorithm.solve()
173
+
174
+ with st.expander('Visualizaci贸n'):
175
+ c = st.container()
176
+ if alg_selected == 'Linear Regression': c.pyplot(algorithm.visualization())
177
+ elif alg_selected == 'SVR (Support Vector Regression)': c.pyplot(algorithm.visualization())
178
+ elif alg_selected == 'Perceptron': c.pyplot(algorithm.visualization())
179
+
180
+
181
+ # ------------------------------------Unsupervised learning-----------------------------------
182
+
183
+ elif task == 'Unsupervised Learning':
184
+ st.write('''
185
+ #
186
+ ## **Unsupervised Learning**
187
+
188
+ **Unsupervised learning**: Para este tipo de aprendizaje los datos no vienen
189
+ con un objetivo (**target**). De esta manera, lo que se busca es descubrir los
190
+ grupos con mayores caracter铆sticas similares (**clustering**) o determinar
191
+ la distribuci贸n de los datos en el espacio (luego si esta distribuci贸n se
192
+ encuentra en muchas dimensiones, la podemos reducir a 2 o 3 con fin de poder
193
+ visualizar los datos)
194
+
195
+ ''')
196
+ alg_selected = st.sidebar.selectbox('Algoritmo:', ['K-means Clustering',
197
+ 'Agglomerative Clustering',
198
+ 'PCA (Principal Component Analysis)',
199
+ 'ICA (Independent Component Analysis)',
200
+ 'LDA (Linear Discrimination Analysis)'])
201
+ dataset_selected = None
202
+
203
+ # Seleccionamos la base de datos (todas las bases sirven)
204
+ with st.expander('Base de datos'):
205
+ class_sets = ['iris', 'digits', 'breast cancer', 'diabetes', 'wine', 'boston']
206
+ dataset_name = st.selectbox('Escoja una base de datos', options=class_sets)
207
+ if dataset_name == 'iris':
208
+ dataset_selected = datasets.load_iris()
209
+ st.write(f'{dataset_selected.DESCR}')
210
+ elif dataset_name == 'digits':
211
+ dataset_selected = datasets.load_digits()
212
+ st.write(f'{dataset_selected.DESCR}')
213
+ elif dataset_name == 'breast cancer':
214
+ dataset_selected = datasets.load_breast_cancer()
215
+ st.write(f'{dataset_selected.DESCR}')
216
+ elif dataset_name == 'diabetes':
217
+ dataset_selected = datasets.load_diabetes()
218
+ st.write(f'{dataset_selected.DESCR}')
219
+ elif dataset_name == 'wine':
220
+ dataset_selected = datasets.load_wine()
221
+ st.write(f'{dataset_selected.DESCR}')
222
+ elif dataset_name == 'boston':
223
+ dataset_selected = datasets.load_boston()
224
+ st.write(f'{dataset_selected.DESCR}')
225
+
226
+
227
+
228
+ # seleccionar el algoritmo
229
+ if alg_selected == 'K-means Clustering': algorithm = k_mean_clustering_st(dataset_selected)
230
+ elif alg_selected == 'PCA (Principal Component Analysis)': algorithm = PCA_st(dataset_selected)
231
+ elif alg_selected == 'ICA (Independent Component Analysis)': algorithm = ICA_st(dataset_selected)
232
+ elif alg_selected == 'Agglomerative Clustering': algorithm = agglomerative_clustering_st(dataset_selected)
233
+ elif alg_selected == 'LDA (Linear Discrimination Analysis)': algorithm = LDA_st(dataset_selected)
234
+
235
+ with st.expander('Explicacion del algoritmo'):
236
+ if alg_selected == 'K-means Clustering': algorithm.desc
237
+ elif alg_selected == 'PCA (Principal Component Analysis)': algorithm.desc
238
+ elif alg_selected == 'ICA (Independent Component Analysis)': algorithm.desc
239
+ elif alg_selected == 'Agglomerative Clustering': algorithm.desc
240
+ elif alg_selected == 'LDA (Linear Discrimination Analysis)': algorithm.desc
241
+
242
+ with st.expander('Ajustes de parametros'):
243
+ if alg_selected == 'K-means Clustering': algorithm.params()
244
+ elif alg_selected == 'PCA (Principal Component Analysis)': algorithm.params()
245
+ elif alg_selected == 'ICA (Independent Component Analysis)': algorithm.params()
246
+ elif alg_selected == 'Agglomerative Clustering': algorithm.params()
247
+ elif alg_selected == 'LDA (Linear Discrimination Analysis)': algorithm.params()
248
+
249
+ with st.expander('Resultados'):
250
+ c = st.container()
251
+ if alg_selected == 'K-means Clustering': c.pyplot(algorithm.solve())
252
+ elif alg_selected == 'PCA (Principal Component Analysis)': c.pyplot(algorithm.solve())
253
+ elif alg_selected == 'ICA (Independent Component Analysis)': c.pyplot(algorithm.solve())
254
+ elif alg_selected == 'Agglomerative Clustering': c.pyplot(algorithm.solve())
255
+ elif alg_selected == 'LDA (Linear Discrimination Analysis)' and (dataset_selected.DESCR).split()[1] not in ['_diabetes_dataset:', '_boston_dataset:']:
256
+ c.pyplot(algorithm.solve())
257
+ else:
258
+ st.write('''
259
+ **Nota:** LDA solo puede resolver problemas de clasificaci贸n, ya que require de las
260
+ **etiquetas** de las observaciones para funcionar''')