Spaces:
Sleeping
Sleeping
File size: 4,837 Bytes
42d0bac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import streamlit as st
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
class SVC_st:
def __init__(self, database, test_size=0.2):
self.database = database
self.test_size = test_size
self.desc = r'''
# **Support Vector Machine**
Este algoritmo tiene por objetivo la b煤squeda de un hiperplano que segregue los datos atendiendo a estas dos condiciones:
$$
wx - b = 0
$$
$$
max \quad \frac{2}{||w||}
$$
**Linear model (2 categor铆as (1 y -1))**
$$
wx - b = 0
$$
$$
wx_{i} - b \geq 1 \quad si \quad y_{i} = 1
$$
$$
wx_{i} - b \leq 1 \quad si \quad y_{i} = -1
$$
**Estas 3 ecuaciones se resumen en la siguiente:**
$$
y_{i}(wx_{i} - b) \geq 1
$$
**Funci贸n de costos (loss)**
$$
loss = 位||w||^2 + \frac{1}{n} \sum_{i=1}^{n} max(0, 1-y_{i}(wx_{i}-b))
$$
De esta manera las **derivadas** en funci贸n de los par谩metros siguen las siguientes reglas:
- si $y_{i}(xw - b) \geq 1$:
$$
\left[\begin{array}{ll} \frac{d_{loss}}{d_{w_{k}}} \\ \frac{d_{loss}}{db} \end{array} \right] = \left [\begin{array}{ll} 2 \lambda w_{k} \\ 0 \end{array} \right]
$$
- si $y_{i}(xw - b) < 1$:
$$
\left[\begin{array}{ll}\frac{d_{loss}}{d_{w_{k}}} \\ \frac{d_{loss}}{db} \end{array} \right] = \left[\begin{array}{ll} 2\lambda w_{k} - y_{i} \cdot x_{i} \\ y_{i} \end{array} \right]
$$
**Reglas de actualizaci贸n (Gradient Descent)**
- Inicializar par谩metros
- Iterar
- Calcular loss
- Calcular gradiente
- Actualizar par谩metros
$$
w = w - lr \cdot dw
$$
$$
b = b - lr \cdot db
$$
- Terminar de iterar
'''
self.kernel = 'linear'
self.gamma = 2
self.degree = 3
def params(self):
tipo = st.selectbox('Tipo de kernel', options=['linear',
'poly',
'rbf'])
self.kernel = tipo
self.gamma = st.slider('Parametro gamma', 1, 10, 2)
if tipo == 'poly': self.degree = st.slider('Cantidad de grados del polinomio', 1, 10, 3)
def solve(self):
self.X, self.y = self.database.data, self.database.target
X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=self.test_size, random_state=1234)
self.sklearn_clf = svm.SVC(kernel=self.kernel, gamma=self.gamma, random_state=1234)
self.sklearn_clf.fit(X_train, y_train)
y_pred = self.sklearn_clf.predict(X_test)
acc = accuracy_score(y_pred, y_test)
c1, c2 = st.columns([4, 1])
c2.metric('Acierto', value=f'{np.round(acc, 2)*100}%')
df = pd.DataFrame(confusion_matrix(y_pred, y_test))
labels = self.database.target_names
df.columns = labels
df.index = labels
c1.write('**Confusion Matrix**')
c1.dataframe(df)
def visualization(self):
n_features = int(self.database.data.shape[1])
self.x_feature = st.slider('Variables en eje x', 1, n_features, 1)
self.y_feature = st.slider('Variables en eje y', 1, n_features, 2)
self.X = np.c_[self.database.data[:, self.x_feature-1:self.x_feature], self.database.data[:, self.y_feature-1:self.y_feature]]
self.y = self.database.target
X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=self.test_size, random_state=1234)
self.sklearn_clf = svm.SVC(kernel=self.kernel, gamma=self.gamma, random_state=1234)
self.sklearn_clf.fit(X_train, y_train)
x1_min, x1_max = self.X[:, 0].min() - 0.5, self.X[:, 0].max() + 0.5
x2_min, x2_max = self.X[:, 1].min() - 0.5, self.X[:, 1].max() + 0.5
h = 0.02 # Salto que vamos dando
x1_i = np.arange(x1_min, x1_max, h)
x2_i = np.arange(x2_min, x2_max, h)
x1_x1, x2_x2 = np.meshgrid(x1_i, x2_i)
y_pred = self.sklearn_clf.predict(np.c_[x1_x1.ravel(), x2_x2.ravel()])
y_pred = y_pred.reshape(x1_x1.shape)
plt.figure(1, figsize=(12, 8))
plt.pcolormesh(x1_x1, x2_x2, y_pred, cmap=plt.cm.Paired)
plt.scatter(self.X[:, 0], self.X[:, 1], c=self.y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlim(x1_x1.min(), x1_x1.max())
plt.ylim(x2_x2.min(), x2_x2.max())
return plt.gcf()
|