Kolpitor commited on
Commit
5fa8ed1
·
1 Parent(s): c33411e

Delete streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +0 -128
streamlit_app.py DELETED
@@ -1,128 +0,0 @@
1
- import os
2
- os.system('git clone --recursive https://github.com/dmlc/xgboost')
3
- os.system('cd xgboost')
4
- os.system('sudo cp make/minimum.mk ./config.mk;')
5
- os.system('sudo make -j4;')
6
- os.system('sh build.sh')
7
- os.system('cd python-package')
8
- os.system('python setup.py install')
9
- os.system('pip install graphviz')
10
- os.system('pip install python-pydot')
11
- os.system('pip install python-pydot-ng')
12
- os.system('pip install -U scikit-learn scipy matplotlib')
13
-
14
- from collections import namedtuple
15
- import altair as alt
16
- import math
17
- import streamlit as st
18
- import pandas
19
- import numpy
20
- import xgboost
21
- import graphviz
22
- from sklearn.metrics import mean_squared_error
23
- from sklearn.model_selection import train_test_split
24
- import matplotlib.pyplot
25
-
26
- """
27
- # MLOPS
28
- """
29
-
30
-
31
- max_depth_input = st.slider("Max depth", 1, 100, 5)
32
- colsample_bytree_input = st.slider("Colsample bytree", 0.0, 1.0, 0.5)
33
- learning_rate_input = st.slider("Learning rate", 0.0, 1.0, 0.2)
34
- alpha_input = st.slider("Alpha", 1, 100, 10)
35
- n_estimators_input = st.slider("n estimators", 1, 100, 20)
36
- city_input = st.selectbox(
37
- 'Which city do you want to predict rain ?',
38
- ("Canberra",
39
- "Albury",
40
- "Penrith",
41
- "Sydney",
42
- "MountGinini",
43
- "Bendigo",
44
- "Brisbane",
45
- "Portland"), index=0)
46
-
47
- dataset = pandas.read_csv('weatherAUS.csv')
48
-
49
- location_dataset = dataset["Location"].unique()
50
- wind_dataset = dataset["WindGustDir"].unique()
51
- date_dataset = dataset["Date"].unique()
52
-
53
- dataset.drop(dataset.loc[dataset['Location'] != city_input].index, inplace=True)
54
-
55
- i_RainTomorrow = dataset.columns.get_loc("RainTomorrow")
56
- #i_Location = dataset.columns.get_loc("Location")
57
- i_WindGustDir = dataset.columns.get_loc("WindGustDir")
58
- i_Date = dataset.columns.get_loc("Date")
59
- yes = dataset.iat[8, dataset.columns.get_loc("RainTomorrow")]
60
- no = dataset.iat[0, dataset.columns.get_loc("RainTomorrow")]
61
-
62
- for i in range(len(dataset)):
63
- if (dataset.iat[i, i_RainTomorrow] == yes):
64
- dataset.iat[i, i_RainTomorrow] = True
65
- else:
66
- dataset.iat[i, i_RainTomorrow] = False
67
- #dataset.iat[i, i_Location] = numpy.where(location_dataset == dataset.iat[i, i_Location])[0][0]
68
- if (pandas.isna(dataset.iat[i, i_WindGustDir])):
69
- dataset.iat[i, i_WindGustDir] = 0
70
- else:
71
- dataset.iat[i, i_WindGustDir] = numpy.where(wind_dataset == dataset.iat[i, i_WindGustDir])[0][0] + 1
72
- dataset.iat[i, i_Date] = numpy.where(date_dataset == dataset.iat[i, i_Date])[0][0]
73
-
74
-
75
- dataset = dataset.astype({'RainTomorrow': 'bool'})
76
- #dataset = dataset.astype({'Location': 'int'})
77
- dataset = dataset.astype({'WindGustDir': 'int'})
78
- dataset = dataset.astype({'Date': 'int'})
79
-
80
- dataset.drop(columns=["WindDir9am", "WindDir3pm", "WindSpeed9am", "WindSpeed3pm", "Temp9am", "Temp3pm", "RainToday"], inplace=True)
81
- dataset.drop(dataset.index[dataset.isnull().any(axis=1)], 0, inplace=True)
82
-
83
- dataset["Humidity"] = 0.0
84
- dataset["Pressure"] = 0.0
85
- dataset["Cloud"] = 0.0
86
-
87
- for i in dataset.index:
88
- humidity = (dataset["Humidity9am"][i] + dataset["Humidity3pm"][i]) / 2
89
- dataset.at[i, "Humidity"] = humidity
90
- pressure = (dataset["Pressure9am"][i] + dataset["Pressure3pm"][i]) / 2
91
- dataset.at[i, "Pressure"] = pressure
92
- cloud = (dataset["Cloud9am"][i] + dataset["Cloud3pm"][i]) / 2
93
- dataset.at[i, "Cloud"] = cloud
94
-
95
- dataset.drop(columns=["Humidity9am", "Humidity3pm", "Pressure9am", "Pressure3pm", "Cloud9am", "Cloud3pm"], inplace=True)
96
-
97
- x, y = dataset.iloc[:,[False, False, True, True, False, True, True, True, True, True, True, True, True]],dataset.iloc[:,4]
98
-
99
- data_dmatrix = xgboost.DMatrix(data=x,label=y)
100
-
101
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=123)
102
-
103
- xg_reg = xgboost.XGBRegressor(colsample_bytree = colsample_bytree_input, learning_rate = learning_rate_input, max_depth = max_depth_input, alpha = alpha_input, n_estimators = n_estimators_input)
104
-
105
- xg_reg.fit(X_train,y_train)
106
-
107
- preds = xg_reg.predict(X_test)
108
-
109
- rmse = numpy.sqrt(mean_squared_error(y_test, preds))
110
- st.write("RMSE: %f" % (rmse))
111
-
112
- params = {'colsample_bytree': colsample_bytree_input,'learning_rate': learning_rate_input,
113
- 'max_depth': max_depth_input, 'alpha': alpha_input}
114
-
115
- cv_results = xgboost.cv(dtrain=data_dmatrix, params=params, nfold=3,
116
- num_boost_round=50,early_stopping_rounds=10,metrics="rmse", as_pandas=True, seed=123)
117
-
118
- st.write((cv_results["test-rmse-mean"]).tail(1))
119
-
120
- xg_reg = xgboost.train(params=params, dtrain=data_dmatrix, num_boost_round=10)
121
-
122
- #xgboost.plot_tree(xg_reg,num_trees=0)
123
- #matplotlib.pyplot.rcParams['figure.figsize'] = [200, 200]
124
- #matplotlib.pyplot.show()
125
-
126
- #xgboost.plot_importance(xg_reg)
127
- #matplotlib.pyplot.rcParams['figure.figsize'] = [5, 5]
128
- #matplotlib.pyplot.show()