|
|
|
import streamlit as st |
|
|
|
from numpy import vstack |
|
from pandas import read_csv |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.metrics import accuracy_score |
|
from torch.utils.data import Dataset |
|
from torch.utils.data import DataLoader |
|
from torch.utils.data import random_split |
|
from torch import Tensor |
|
from torch.nn import Linear |
|
from torch.nn import ReLU |
|
from torch.nn import Sigmoid |
|
from torch.nn import Module |
|
from torch.optim import SGD |
|
from torch.nn import BCELoss |
|
from torch.nn.init import kaiming_uniform_ |
|
from torch.nn.init import xavier_uniform_ |
|
|
|
class CSVDataset(Dataset): |
|
def __init__(self, path): |
|
df = read_csv(path, header=None) |
|
self.x = df.values[:, :-1] |
|
self.y = df.values[:, -1] |
|
self.x = self.x.astype('float32') |
|
self.y = LabelEncoder().fit_transform(self.y) |
|
self.y = self.y.astype('float32') |
|
self.y = self.y.reshape((len(self.y), 1)) |
|
|
|
def __len__(self): |
|
return len(self.x) |
|
|
|
def __getitem__(self, idx): |
|
return [self.x[idx], self.y[idx]] |
|
|
|
def get_splits(self, n_test=0.33): |
|
test_size = round(n_test * len(self.x)) |
|
train_size = len(self.x) - test_size |
|
return random_split(self, [train_size, test_size]) |
|
|
|
class MLP(Module): |
|
def __init__(self, n_inputs): |
|
super(MLP, self).__init__() |
|
self.hidden1 = Linear(n_inputs, 10) |
|
kaiming_uniform_(self.hidden1.weight, nonlinearty='relu') |
|
self.act1 = ReLU() |
|
self.hidden2 = Linear(10, 8) |
|
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu') |
|
self.act2 = ReLU() |
|
self.hidden3 = Linear(8, 1) |
|
xavier_uniform_(self.hidden3.weight) |
|
self.act3 = Sigmoid() |
|
|
|
def forward(self, x): |
|
x = self.hidden1(x) |
|
x = self.act1(x) |
|
x = self.hidden2(x) |
|
x = self.act(2) |
|
x = self.hidden3(x) |
|
x = self.act3(x) |
|
return x |
|
|
|
def prepare_data(path): |
|
dataset = CSVDataset(path) |
|
train, test = dataset.get_splits() |
|
train_dl = DataLoader(train, batch_size=32, shuffle=True) |
|
test_dl = DataLoader(test, batch_size=1024, shuffle=False) |
|
return train_dl, test_dl |
|
|
|
def train_model(train_dl, model): |
|
criterion = BCELoss() |
|
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) |
|
for epoch in range(100): |
|
for i, (inputs, targets) in enumerate(train_dl): |
|
optimizer.zero_grad() |
|
yhat = model(inputs) |
|
loss = criterion(yhat, targets) |
|
loss.backward() |
|
optimizer.step() |
|
|
|
def evaluate_model(test_dl, model): |
|
predictions, actuals = list(), list() |
|
for i, (inputs, targets) in enumerate(test_dl): |
|
yhat = model(inputs) |
|
yhat = yhat.detach().numpy() |
|
actual = targets.numpy() |
|
actual = actual.reshape((len(actual), 1)) |
|
yhat = yhat.round() |
|
predictions.append(yhat) |
|
actuals.append(actual) |
|
predictions, actuals = vstack(preictions), vstack(actuals) |
|
acc = accuracy_score(actuals, prediction) |
|
return acc |
|
|
|
def predict(row, model): |
|
row = Tensor([row]) |
|
yhat = model(row) |
|
yhat = yhat.detach().numpy() |
|
return yhat |
|
|
|
path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv' |
|
train_dl, test_dl = prepare_data(path) |
|
print(len(train_dl.dataset), len(test_dl.dataset)) |
|
model = MLP(34) |
|
train_model(train_dl, model) |
|
acc = evaluate_model(test_dl, model) |
|
print('Accuracy: %.3f' % acc) |
|
row = [1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1,0.03760,0.85243,-0.17755,0.59755,-0.44945,0.60536,-0.38223,0.84356,-0.38542,0.58212,-0.32192,0.56971,-0.29674,0.36946,-0.47357,0.56811,-0.51171,0.41078,-0.46168,0.21266,-0.34090,0.42267,-0.54487,0.18641,-0.45300] |
|
yhat = predict(row, model) |
|
print('Predicted: %.3f (class=%d)' % (yhat, yhat.round())) |