darly9991 commited on
Commit
b028bf3
·
verified ·
1 Parent(s): 149c786

Upload 10 files

Browse files
Files changed (10) hide show
  1. app.py +10 -0
  2. best_pipe.pkl +3 -0
  3. eda.py +268 -0
  4. num_col.txt +1 -0
  5. output.png +0 -0
  6. prediction.py +78 -0
  7. requirements.txt +9 -0
  8. water.jpg +0 -0
  9. water1.jpeg +0 -0
  10. water_quality.csv +0 -0
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ page = st.sidebar.selectbox('Pilih halaman ', ('EDA', 'Prediction'))
6
+
7
+ if page == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
best_pipe.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daed01dfbcddab5d570ea0ed6da1f34b7de61df4673c349844f1b5951005a8f9
3
+ size 136083
eda.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from PIL import Image
5
+ import streamlit as st
6
+ import pandas as pd
7
+ import random
8
+ import matplotlib.pyplot as plt
9
+ from bokeh.plotting import figure
10
+ from bokeh.models import ColumnDataSource, HoverTool
11
+ from bokeh.models.formatters import NumeralTickFormatter
12
+ from bokeh.plotting import figure
13
+ from bokeh.models import HoverTool, NumeralTickFormatter
14
+ from bokeh.layouts import gridplot
15
+ from bokeh.palettes import Category20
16
+ import seaborn as sns
17
+
18
+ # Fungsi EDA Scatterplot dengan Regresi menggunakan Bokeh
19
+ def scatter_plot_regression(df, x_col='Year', y_col='Salary', x_label='Tahun', y_label='Gaji Rata-rata', title='Diagram Sebaran dengan Garis Regresi'):
20
+ # Konversi data menjadi array numpy
21
+ x_data = df[x_col].values
22
+ y_data = df[y_col].values
23
+
24
+ # Membuat model regresi linier
25
+ model = np.polyfit(x_data, y_data, 1)
26
+ y_pred = np.polyval(model, x_data)
27
+
28
+ # Buat ColumnDataSource
29
+ source = ColumnDataSource(data={x_col: x_data, y_col: y_data, 'regression_line': y_pred})
30
+
31
+ # Buat plot baru dengan judul dan label sumbu
32
+ p = figure(title=title, x_axis_label=x_label, y_axis_label=y_label, width=800, height=400,
33
+ tools="pan,box_zoom,wheel_zoom,reset,save")
34
+
35
+ # Tambahkan diagram sebaran
36
+ p.circle(x_col, y_col, source=source, size=8, color="navy", alpha=0.5, legend_label=y_label)
37
+
38
+ # Tambahkan garis regresi
39
+ p.line(x_col, 'regression_line', source=source, line_width=2, line_color="red", legend_label="Garis Regresi")
40
+
41
+ # Tambahkan alat hover untuk menampilkan nilai data
42
+ hover = HoverTool()
43
+ hover.tooltips = [(x_label, f"@{x_col}"), (y_label, f"@{y_col}")]
44
+ p.add_tools(hover)
45
+
46
+ # Sesuaikan gaya label sumbu
47
+ p.xaxis.axis_label_text_font_style = "bold"
48
+ p.yaxis.axis_label_text_font_style = "bold"
49
+
50
+ # Atur format untuk sumbu Y agar menampilkan nilai tanpa notasi ilmiah
51
+ p.yaxis.formatter = NumeralTickFormatter(format="0")
52
+
53
+ # Sesuaikan plot
54
+ p.legend.location = "top_left"
55
+ p.legend.click_policy = "hide"
56
+
57
+ # Tampilkan plot menggunakan st.bokeh_chart()
58
+ st.bokeh_chart(p)
59
+
60
+ # Fungsi EDA untuk IQR plot & Histogram 1 Kolom
61
+ def histogram_boxplot(df, nama_kolom, judul="Contoh Bar Plot"):
62
+ # Ekstrak data kolom
63
+ data_kolom = df[nama_kolom]
64
+
65
+ # Plot histogram
66
+ hist, edges = np.histogram(data_kolom, bins=20)
67
+
68
+ # Generate random colors for the bars
69
+ colors = random.choices(Category20[20], k=len(hist))
70
+
71
+ p1 = figure(title=f"{judul} (Histogram)", tools="save,hover", background_fill_color="#fafafa",
72
+ width=600, height=400, tooltips=[("Jumlah", "@top"), ("Interval", "@left{0.00} hingga @right{0.00}")],
73
+ x_axis_label=judul, y_axis_label="Frequency")
74
+
75
+ p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
76
+ fill_color=colors, line_color="white", alpha=0.7)
77
+
78
+ # Box plot
79
+ q1 = data_kolom.quantile(0.25)
80
+ q2 = data_kolom.quantile(0.50)
81
+ q3 = data_kolom.quantile(0.75)
82
+ iqr = q3 - q1
83
+
84
+ lower_whisker = data_kolom[data_kolom >= (q1 - 1.5 * iqr)].min()
85
+ upper_whisker = data_kolom[data_kolom <= (q3 + 1.5 * iqr)].max()
86
+
87
+ outliers = data_kolom[(data_kolom > upper_whisker) | (data_kolom < lower_whisker)]
88
+
89
+ p2 = figure(title=f"{judul} (Boxplot)", tools="save,hover", background_fill_color="#fafafa",
90
+ width=400, height=400, tooltips=[("Nilai", "@y"), ("Q1", f"{q1:.2f}"),
91
+ ("Q2 (Median)", f"{q2:.2f}"), ("Q3", f"{q3:.2f}"),
92
+ ("Lower Whisker", f"{lower_whisker:.2f}"),
93
+ ("Upper Whisker", f"{upper_whisker:.2f}")])
94
+
95
+ # Menambahkan elemen diagram kotak
96
+ p2.segment(1, lower_whisker, 1, q1, line_color="black")
97
+ p2.segment(1, q3, 1, upper_whisker, line_color="black")
98
+ p2.vbar(1, 0.7, q1, q3, fill_color="navy", line_color="black")
99
+ p2.vbar(1, 0.7, q2, q2, line_color="black")
100
+
101
+ # Whiskers
102
+ p2.rect(1, lower_whisker, 0.2, 0.01, line_color="black")
103
+ p2.rect(1, upper_whisker, 0.2, 0.01, line_color="black")
104
+
105
+ # Outliers
106
+ p2.scatter([1]*len(outliers), outliers, size=6, color="red", fill_alpha=0.6)
107
+
108
+ # Menghapus label sumbu dan tanda sumbu pada boxplot
109
+ p2.xaxis.axis_label = ""
110
+ p2.yaxis.axis_label = ""
111
+ p2.xaxis.visible = False
112
+ p2.yaxis.visible = False
113
+
114
+ # Set gaya label sumbu dan tanda sumbu
115
+ p1.xaxis.axis_label_text_font_style = "bold"
116
+ p1.xaxis.axis_label_text_font_size = "10pt"
117
+ p1.xaxis.major_label_text_font_style = "bold"
118
+ p1.xaxis.major_label_text_font_size = "8pt"
119
+ p1.yaxis.axis_label_text_font_style = "bold"
120
+ p1.yaxis.axis_label_text_font_size = "10pt"
121
+ p1.yaxis.major_label_text_font_style = "bold"
122
+ p1.yaxis.major_label_text_font_size = "8pt"
123
+
124
+ # Menghapus garis grid
125
+ p1.grid.grid_line_color = None
126
+ p2.grid.grid_line_color = None
127
+
128
+ # Mengatur formatter sumbu agar tidak menggunakan notasi ilmiah
129
+ p1.yaxis.formatter.use_scientific = False
130
+ p1.xaxis.formatter.use_scientific = False
131
+
132
+ # Menata plot dalam grid
133
+ grid = gridplot([[p1, p2]])
134
+
135
+ # Tampilkan plot menggunakan st.bokeh_chart()
136
+ st.bokeh_chart(grid)
137
+
138
+
139
+ def run():
140
+ # Membuat judul
141
+ st.title('Water Quality')
142
+
143
+ # Membuat Sub Header
144
+ st.header('Water Quality Data Visualization', divider='gray')
145
+
146
+ # Menambahkan Gambar
147
+ image = Image.open('water.jpg')
148
+ st.image(image, caption = 'Water Pollution (wallpapers.com)', channels='RGB')
149
+
150
+ # Menambahkan Divider
151
+ st.divider()
152
+
153
+ # Menampilkan Dataframe
154
+ st.header('Dataframe', divider='gray')
155
+ df = pd.read_csv('water_quality.csv')
156
+ st.dataframe(df)
157
+ st.divider()
158
+
159
+ # Display descriptive statistics for all numeric columns
160
+ # Fungsi Untuk Menghitung Mean, Median, Mode dan Mengevaluasi Distribusi
161
+ def evaluate_distribution(col):
162
+ mean = col.mean()
163
+ median = col.median()
164
+ mode = col.mode()[0] # Ambil mode pertama jika ada beberapa mode
165
+ if (abs(mean - median) / mean <= 0.05 and abs(mean - mode) / mean <= 0.05):
166
+ evaluasi = 'Normal Distribution'
167
+ elif mean > median:
168
+ evaluasi = 'Positive Skewness'
169
+ else:
170
+ evaluasi = 'Negative Skewness'
171
+ return pd.Series({'Mean': mean, 'Median': median, 'Mode': mode, 'Evaluasi': evaluasi})
172
+
173
+ # Memilih Hanya Kolom Numerik
174
+ numerical_cols = df.select_dtypes(include=[np.number])
175
+
176
+ # Terapkan Fungsi ke Setiap Kolom Numerik dalam DataFrame
177
+ result = numerical_cols.apply(evaluate_distribution)
178
+ st.header('Descriptive Statistics', divider='gray')
179
+ st.write(result)
180
+ st.divider()
181
+
182
+ st.header('Correlation Bacteria and Viruses', divider='gray')
183
+ scatter_plot_regression(df, x_col='bacteria', y_col='viruses', x_label='Bacteria', y_label='Viruses', title='Correlation Bacteria and Viruses')
184
+ st.divider()
185
+
186
+ st.header('Correlation Arsenic and Selenium', divider='gray')
187
+ scatter_plot_regression(df, x_col='arsenic', y_col='selenium', x_label='Arsenic', y_label='Selenium', title='Correlation Arsenic and Selenium')
188
+ st.divider()
189
+
190
+ st.header('Correlation Lead and Copper', divider='gray')
191
+ scatter_plot_regression(df, x_col='lead', y_col='copper', x_label='Lead', y_label='copper', title='Correlation Lead and Copper')
192
+ st.divider()
193
+
194
+ st.header('Correlation Lead and Copper', divider='gray')
195
+ scatter_plot_regression(df, x_col='chloramine', y_col='bacteria', x_label='Chloramine', y_label='Bacteria', title='Correlation Chloramine and Bacteria')
196
+ st.divider()
197
+
198
+
199
+ # Fungsi untuk menghitung korelasi dan menampilkan hasilnya di Streamlit
200
+ def tampilkan_korelasi(df):
201
+ # Hitung korelasi
202
+ cor = df.corr()
203
+
204
+ # Membuat DataFrame dari Matriks Korelasi
205
+ cor_df = pd.DataFrame(cor.stack(), columns=['Correlation'])
206
+
207
+ # Menambahkan Kolom Baris dan Kolom
208
+ cor_df.reset_index(inplace=True)
209
+ cor_df.columns = ['Variable 1', 'Variable 2', 'Correlation']
210
+
211
+ # Menambahkan Kolom Interpretasi
212
+ cor_df['Interpretation'] = np.where(cor_df['Correlation'] < 0.05, 'Ada korelasi', 'Tidak ada korelasi')
213
+
214
+ # Menampilkan DataFrame menggunakan Streamlit
215
+ st.dataframe(cor_df)
216
+
217
+ # Menampilkan heatmap korelasi
218
+ st.write("Heatmap Korelasi:")
219
+ fig, ax = plt.subplots(figsize=(10, 8))
220
+ sns.heatmap(cor, annot=True, fmt=".2f", cmap='coolwarm', ax=ax)
221
+ st.pyplot(fig)
222
+
223
+ # Implementasi Streamlit
224
+ def main():
225
+ # Tombol untuk menghitung dan menampilkan korelasi
226
+ if st.button('Hitung Korelasi'):
227
+ tampilkan_korelasi(df)
228
+
229
+ st.divider()
230
+
231
+ st.header('Aluminium Bar Plot', divider='gray')
232
+ histogram_boxplot(df, 'aluminium', judul="Aluminium Plot")
233
+ st.divider()
234
+
235
+ st.header('Arsenic Bar Plot', divider='gray')
236
+ histogram_boxplot(df, 'arsenic', judul="Arsenic Plot")
237
+ st.divider()
238
+
239
+ # Fungsi untuk membuat pie chart
240
+ def plot_pie_chart(df, column):
241
+ # Menghitung distribusi nilai dalam kolom
242
+ value_counts = df[column].value_counts()
243
+
244
+ # Set up the matplotlib figure
245
+ fig, ax = plt.subplots(figsize=(8, 8))
246
+
247
+ # Create the pie chart
248
+ ax.pie(value_counts, labels=value_counts.index, autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors)
249
+
250
+ # Set title
251
+ plt.title(f'Pie Chart for {column}')
252
+
253
+ # Display the plot
254
+ st.pyplot(fig)
255
+
256
+ # Menambahkan Gambar
257
+ image2 = Image.open('output.png')
258
+ st.header('Feature Selection', divider='gray')
259
+ st.image(image2, caption = 'Feature Selection', channels='RGB')
260
+
261
+ # Menampilkan gambar menggunakan tombol
262
+ if st.button('Show Image'):
263
+ # Gantilah 'image2.png' dengan jalur ke file gambar Anda
264
+ image_path = 'water1.jpeg'
265
+ st.image(image_path, caption='Nickel Processing Factory (Smelter) in the Obi Island Industrial Area, North Maluku Province.', channels='RGB')
266
+
267
+ if __name__ == '__main__':
268
+ run()
num_col.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["aluminium", "ammonia", "arsenic", "cadmium", "chloramine", "copper", "bacteria", "viruses", "lead", "nitrites", "perchlorate", "radium", "selenium", "silver", "uranium"]
output.png ADDED
prediction.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ import json
6
+
7
+ #Load All files
8
+ #Load model
9
+
10
+ with open('best_pipe.pkl', 'rb') as file_1:
11
+ best_pipe = pickle.load(file_1)
12
+ with open('num_col.txt', 'r') as file_2:
13
+ num_col = json.load(file_2)
14
+
15
+ def run():
16
+ st.title('Water Quality Data Input Form')
17
+
18
+ # Membuat form
19
+ with st.form('Water_Quality'):
20
+ aluminium = st.number_input('Aluminium', value=0.0, min_value=0.0)
21
+ ammonia = st.number_input('Ammonia', value=0.0, min_value=0.0)
22
+ arsenic = st.number_input('Arsenic', value=0.0, min_value=0.0)
23
+ cadmium = st.number_input('Cadmium', value=0.0, min_value=0.0)
24
+ chloramine = st.number_input('Chloramine', value=0.0, min_value=0.0)
25
+ copper = st.number_input('Copper', value=0.0, min_value=0.0)
26
+ bacteria = st.number_input('Bacteria', value=0.0, min_value=0.0)
27
+ viruses = st.number_input('Viruses', value=0.0, min_value=0.0)
28
+ lead = st.number_input('Lead', value=0.0, min_value=0.0)
29
+ nitrites = st.number_input('Nitrites', value=0.0, min_value=0.0)
30
+ perchlorate = st.number_input('Perchlorate', value=0.0, min_value=0.0)
31
+ radium = st.number_input('Radium', value=0.0, min_value=0.0)
32
+ selenium = st.number_input('Selenium', value=0.0, min_value=0.0)
33
+ silver = st.number_input('Silver', value=0.0, min_value=0.0)
34
+ uranium = st.number_input('Uranium', value=0.0, min_value=0.0)
35
+
36
+ # Membuat tombol submit
37
+ submitted = st.form_submit_button('Submit')
38
+
39
+ # Mengumpulkan data inputan ke dalam dictionary
40
+ data_inf = {
41
+ 'aluminium': aluminium,
42
+ 'ammonia': ammonia,
43
+ 'arsenic': arsenic,
44
+ 'cadmium': cadmium,
45
+ 'chloramine': chloramine,
46
+ 'copper': copper,
47
+ 'bacteria': bacteria,
48
+ 'viruses': viruses,
49
+ 'lead': lead,
50
+ 'nitrites': nitrites,
51
+ 'perchlorate': perchlorate,
52
+ 'radium': radium,
53
+ 'selenium': selenium,
54
+ 'silver': silver,
55
+ 'uranium': uranium
56
+ }
57
+
58
+ # Mengubah dictionary menjadi DataFrame
59
+ data_inf = pd.DataFrame([data_inf])
60
+
61
+ # Menampilkan DataFrame
62
+ st.dataframe(data_inf)
63
+
64
+ # Memproses data setelah tombol submit ditekan
65
+ if submitted:
66
+ # Prediksi menggunakan model
67
+ y_pred_inf = best_pipe.predict(data_inf)
68
+
69
+ if y_pred_inf == 0:
70
+ st.write('## Rating : Dangerous')
71
+ else:
72
+ st.write('## Rating : Safe')
73
+
74
+ # Menjalankan aplikasi
75
+ if __name__ == "__main__":
76
+ main()
77
+
78
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ numpy
5
+ matplotlib
6
+ scipy
7
+ scikit-learn==1.2.2
8
+ Pillow
9
+ plotly
water.jpg ADDED
water1.jpeg ADDED
water_quality.csv ADDED
The diff for this file is too large to render. See raw diff