Spaces:

darly9991
/

Water_Quality

Sleeping

App Files Files Community

darly9991 commited on Jun 6, 2024

Commit

b028bf3

verified ·

1 Parent(s): 149c786

Upload 10 files

Browse files

Files changed (10) hide show

app.py +10 -0
best_pipe.pkl +3 -0
eda.py +268 -0
num_col.txt +1 -0
output.png +0 -0
prediction.py +78 -0
requirements.txt +9 -0
water.jpg +0 -0
water1.jpeg +0 -0
water_quality.csv +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+page = st.sidebar.selectbox('Pilih halaman ', ('EDA', 'Prediction'))
+if page == 'EDA':
+    eda.run()
+else:
+    prediction.run()

best_pipe.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:daed01dfbcddab5d570ea0ed6da1f34b7de61df4673c349844f1b5951005a8f9
+size 136083

eda.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from PIL import Image
+import streamlit as st
+import pandas as pd
+import random
+import matplotlib.pyplot as plt
+from bokeh.plotting import figure
+from bokeh.models import ColumnDataSource, HoverTool
+from bokeh.models.formatters import NumeralTickFormatter
+from bokeh.plotting import figure
+from bokeh.models import HoverTool, NumeralTickFormatter
+from bokeh.layouts import gridplot
+from bokeh.palettes import Category20
+import seaborn as sns
+# Fungsi EDA Scatterplot dengan Regresi menggunakan Bokeh
+def scatter_plot_regression(df, x_col='Year', y_col='Salary', x_label='Tahun', y_label='Gaji Rata-rata', title='Diagram Sebaran dengan Garis Regresi'):
+    # Konversi data menjadi array numpy
+    x_data = df[x_col].values
+    y_data = df[y_col].values
+    # Membuat model regresi linier
+    model = np.polyfit(x_data, y_data, 1)
+    y_pred = np.polyval(model, x_data)
+    # Buat ColumnDataSource
+    source = ColumnDataSource(data={x_col: x_data, y_col: y_data, 'regression_line': y_pred})
+    # Buat plot baru dengan judul dan label sumbu
+    p = figure(title=title, x_axis_label=x_label, y_axis_label=y_label, width=800, height=400,
+               tools="pan,box_zoom,wheel_zoom,reset,save")
+    # Tambahkan diagram sebaran
+    p.circle(x_col, y_col, source=source, size=8, color="navy", alpha=0.5, legend_label=y_label)
+    # Tambahkan garis regresi
+    p.line(x_col, 'regression_line', source=source, line_width=2, line_color="red", legend_label="Garis Regresi")
+    # Tambahkan alat hover untuk menampilkan nilai data
+    hover = HoverTool()
+    hover.tooltips = [(x_label, f"@{x_col}"), (y_label, f"@{y_col}")]
+    p.add_tools(hover)
+    # Sesuaikan gaya label sumbu
+    p.xaxis.axis_label_text_font_style = "bold"
+    p.yaxis.axis_label_text_font_style = "bold"
+    # Atur format untuk sumbu Y agar menampilkan nilai tanpa notasi ilmiah
+    p.yaxis.formatter = NumeralTickFormatter(format="0")
+    # Sesuaikan plot
+    p.legend.location = "top_left"
+    p.legend.click_policy = "hide"
+    # Tampilkan plot menggunakan st.bokeh_chart()
+    st.bokeh_chart(p)
+# Fungsi EDA untuk IQR plot & Histogram 1 Kolom
+def histogram_boxplot(df, nama_kolom, judul="Contoh Bar Plot"):
+    # Ekstrak data kolom
+    data_kolom = df[nama_kolom]
+    # Plot histogram
+    hist, edges = np.histogram(data_kolom, bins=20)
+    # Generate random colors for the bars
+    colors = random.choices(Category20[20], k=len(hist))
+    p1 = figure(title=f"{judul} (Histogram)", tools="save,hover", background_fill_color="#fafafa",
+                width=600, height=400, tooltips=[("Jumlah", "@top"), ("Interval", "@left{0.00} hingga @right{0.00}")],
+                x_axis_label=judul, y_axis_label="Frequency")
+    p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
+            fill_color=colors, line_color="white", alpha=0.7)
+    # Box plot
+    q1 = data_kolom.quantile(0.25)
+    q2 = data_kolom.quantile(0.50)
+    q3 = data_kolom.quantile(0.75)
+    iqr = q3 - q1
+    lower_whisker = data_kolom[data_kolom >= (q1 - 1.5 * iqr)].min()
+    upper_whisker = data_kolom[data_kolom <= (q3 + 1.5 * iqr)].max()
+    outliers = data_kolom[(data_kolom > upper_whisker) | (data_kolom < lower_whisker)]
+    p2 = figure(title=f"{judul} (Boxplot)", tools="save,hover", background_fill_color="#fafafa",
+                width=400, height=400, tooltips=[("Nilai", "@y"), ("Q1", f"{q1:.2f}"),
+                                                 ("Q2 (Median)", f"{q2:.2f}"), ("Q3", f"{q3:.2f}"),
+                                                 ("Lower Whisker", f"{lower_whisker:.2f}"),
+                                                 ("Upper Whisker", f"{upper_whisker:.2f}")])
+    # Menambahkan elemen diagram kotak
+    p2.segment(1, lower_whisker, 1, q1, line_color="black")
+    p2.segment(1, q3, 1, upper_whisker, line_color="black")
+    p2.vbar(1, 0.7, q1, q3, fill_color="navy", line_color="black")
+    p2.vbar(1, 0.7, q2, q2, line_color="black")
+    # Whiskers
+    p2.rect(1, lower_whisker, 0.2, 0.01, line_color="black")
+    p2.rect(1, upper_whisker, 0.2, 0.01, line_color="black")
+    # Outliers
+    p2.scatter([1]*len(outliers), outliers, size=6, color="red", fill_alpha=0.6)
+    # Menghapus label sumbu dan tanda sumbu pada boxplot
+    p2.xaxis.axis_label = ""
+    p2.yaxis.axis_label = ""
+    p2.xaxis.visible = False
+    p2.yaxis.visible = False
+    # Set gaya label sumbu dan tanda sumbu
+    p1.xaxis.axis_label_text_font_style = "bold"
+    p1.xaxis.axis_label_text_font_size = "10pt"
+    p1.xaxis.major_label_text_font_style = "bold"
+    p1.xaxis.major_label_text_font_size = "8pt"
+    p1.yaxis.axis_label_text_font_style = "bold"
+    p1.yaxis.axis_label_text_font_size = "10pt"
+    p1.yaxis.major_label_text_font_style = "bold"
+    p1.yaxis.major_label_text_font_size = "8pt"
+    # Menghapus garis grid
+    p1.grid.grid_line_color = None
+    p2.grid.grid_line_color = None
+    # Mengatur formatter sumbu agar tidak menggunakan notasi ilmiah
+    p1.yaxis.formatter.use_scientific = False
+    p1.xaxis.formatter.use_scientific = False
+    # Menata plot dalam grid
+    grid = gridplot([[p1, p2]])
+    # Tampilkan plot menggunakan st.bokeh_chart()
+    st.bokeh_chart(grid)
+def run():
+    # Membuat judul
+    st.title('Water Quality')
+    # Membuat Sub Header
+    st.header('Water Quality Data Visualization', divider='gray')
+    # Menambahkan Gambar
+    image = Image.open('water.jpg')
+    st.image(image, caption = 'Water Pollution (wallpapers.com)', channels='RGB')
+    # Menambahkan Divider
+    st.divider()
+    # Menampilkan Dataframe
+    st.header('Dataframe', divider='gray')
+    df = pd.read_csv('water_quality.csv')
+    st.dataframe(df)
+    st.divider()
+    # Display descriptive statistics for all numeric columns
+    # Fungsi Untuk Menghitung Mean, Median, Mode dan Mengevaluasi Distribusi
+    def evaluate_distribution(col):
+        mean = col.mean()
+        median = col.median()
+        mode = col.mode()[0]  # Ambil mode pertama jika ada beberapa mode
+        if (abs(mean - median) / mean <= 0.05 and abs(mean - mode) / mean <= 0.05):
+            evaluasi = 'Normal Distribution'
+        elif mean > median:
+            evaluasi = 'Positive Skewness'
+        else:
+            evaluasi = 'Negative Skewness'
+        return pd.Series({'Mean': mean, 'Median': median, 'Mode': mode, 'Evaluasi': evaluasi})
+    # Memilih Hanya Kolom Numerik
+    numerical_cols = df.select_dtypes(include=[np.number])
+    # Terapkan Fungsi ke Setiap Kolom Numerik dalam DataFrame
+    result = numerical_cols.apply(evaluate_distribution)
+    st.header('Descriptive Statistics', divider='gray')
+    st.write(result)
+    st.divider()
+    st.header('Correlation Bacteria and Viruses', divider='gray')
+    scatter_plot_regression(df, x_col='bacteria', y_col='viruses', x_label='Bacteria', y_label='Viruses', title='Correlation Bacteria and Viruses')
+    st.divider()
+    st.header('Correlation Arsenic and Selenium', divider='gray')
+    scatter_plot_regression(df, x_col='arsenic', y_col='selenium', x_label='Arsenic', y_label='Selenium', title='Correlation Arsenic and Selenium')
+    st.divider()
+    st.header('Correlation Lead and Copper', divider='gray')
+    scatter_plot_regression(df, x_col='lead', y_col='copper', x_label='Lead', y_label='copper', title='Correlation Lead and Copper')
+    st.divider()
+    st.header('Correlation Lead and Copper', divider='gray')
+    scatter_plot_regression(df, x_col='chloramine', y_col='bacteria', x_label='Chloramine', y_label='Bacteria', title='Correlation Chloramine and Bacteria')
+    st.divider()
+    # Fungsi untuk menghitung korelasi dan menampilkan hasilnya di Streamlit
+    def tampilkan_korelasi(df):
+        # Hitung korelasi
+        cor = df.corr()
+        # Membuat DataFrame dari Matriks Korelasi
+        cor_df = pd.DataFrame(cor.stack(), columns=['Correlation'])
+        # Menambahkan Kolom Baris dan Kolom
+        cor_df.reset_index(inplace=True)
+        cor_df.columns = ['Variable 1', 'Variable 2', 'Correlation']
+        # Menambahkan Kolom Interpretasi
+        cor_df['Interpretation'] = np.where(cor_df['Correlation'] < 0.05, 'Ada korelasi', 'Tidak ada korelasi')
+        # Menampilkan DataFrame menggunakan Streamlit
+        st.dataframe(cor_df)
+        # Menampilkan heatmap korelasi
+        st.write("Heatmap Korelasi:")
+        fig, ax = plt.subplots(figsize=(10, 8))
+        sns.heatmap(cor, annot=True, fmt=".2f", cmap='coolwarm', ax=ax)
+        st.pyplot(fig)
+    # Implementasi Streamlit
+    def main():
+        # Tombol untuk menghitung dan menampilkan korelasi
+        if st.button('Hitung Korelasi'):
+            tampilkan_korelasi(df)
+    st.divider()
+    st.header('Aluminium Bar Plot', divider='gray')
+    histogram_boxplot(df, 'aluminium', judul="Aluminium Plot")
+    st.divider()
+    st.header('Arsenic Bar Plot', divider='gray')
+    histogram_boxplot(df, 'arsenic', judul="Arsenic Plot")
+    st.divider()
+    # Fungsi untuk membuat pie chart
+    def plot_pie_chart(df, column):
+        # Menghitung distribusi nilai dalam kolom
+        value_counts = df[column].value_counts()
+        # Set up the matplotlib figure
+        fig, ax = plt.subplots(figsize=(8, 8))
+        # Create the pie chart
+        ax.pie(value_counts, labels=value_counts.index, autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors)
+        # Set title
+        plt.title(f'Pie Chart for {column}')
+        # Display the plot
+        st.pyplot(fig)
+    # Menambahkan Gambar
+    image2 = Image.open('output.png')
+    st.header('Feature Selection', divider='gray')
+    st.image(image2, caption = 'Feature Selection', channels='RGB')
+    # Menampilkan gambar menggunakan tombol
+    if st.button('Show Image'):
+        # Gantilah 'image2.png' dengan jalur ke file gambar Anda
+        image_path = 'water1.jpeg'
+        st.image(image_path, caption='Nickel Processing Factory (Smelter) in the Obi Island Industrial Area, North Maluku Province.', channels='RGB')
+if __name__ == '__main__':
+   run()

num_col.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["aluminium", "ammonia", "arsenic", "cadmium", "chloramine", "copper", "bacteria", "viruses", "lead", "nitrites", "perchlorate", "radium", "selenium", "silver", "uranium"]

output.png ADDED Viewed

prediction.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+import json
+#Load All files
+#Load model
+with open('best_pipe.pkl', 'rb') as file_1:
+    best_pipe = pickle.load(file_1)
+with open('num_col.txt', 'r') as file_2:
+    num_col = json.load(file_2)
+def run():
+    st.title('Water Quality Data Input Form')
+    # Membuat form
+    with st.form('Water_Quality'):
+        aluminium = st.number_input('Aluminium', value=0.0, min_value=0.0)
+        ammonia = st.number_input('Ammonia', value=0.0, min_value=0.0)
+        arsenic = st.number_input('Arsenic', value=0.0, min_value=0.0)
+        cadmium = st.number_input('Cadmium', value=0.0, min_value=0.0)
+        chloramine = st.number_input('Chloramine', value=0.0, min_value=0.0)
+        copper = st.number_input('Copper', value=0.0, min_value=0.0)
+        bacteria = st.number_input('Bacteria', value=0.0, min_value=0.0)
+        viruses = st.number_input('Viruses', value=0.0, min_value=0.0)
+        lead = st.number_input('Lead', value=0.0, min_value=0.0)
+        nitrites = st.number_input('Nitrites', value=0.0, min_value=0.0)
+        perchlorate = st.number_input('Perchlorate', value=0.0, min_value=0.0)
+        radium = st.number_input('Radium', value=0.0, min_value=0.0)
+        selenium = st.number_input('Selenium', value=0.0, min_value=0.0)
+        silver = st.number_input('Silver', value=0.0, min_value=0.0)
+        uranium = st.number_input('Uranium', value=0.0, min_value=0.0)
+        # Membuat tombol submit
+        submitted = st.form_submit_button('Submit')
+    # Mengumpulkan data inputan ke dalam dictionary
+    data_inf = {
+        'aluminium': aluminium,
+        'ammonia': ammonia,
+        'arsenic': arsenic,
+        'cadmium': cadmium,
+        'chloramine': chloramine,
+        'copper': copper,
+        'bacteria': bacteria,
+        'viruses': viruses,
+        'lead': lead,
+        'nitrites': nitrites,
+        'perchlorate': perchlorate,
+        'radium': radium,
+        'selenium': selenium,
+        'silver': silver,
+        'uranium': uranium
+    }
+    # Mengubah dictionary menjadi DataFrame
+    data_inf = pd.DataFrame([data_inf])
+    # Menampilkan DataFrame
+    st.dataframe(data_inf)
+    # Memproses data setelah tombol submit ditekan
+    if submitted:
+        # Prediksi menggunakan model
+        y_pred_inf = best_pipe.predict(data_inf)
+        if y_pred_inf == 0:
+            st.write('## Rating : Dangerous')
+        else:
+            st.write('## Rating : Safe')
+# Menjalankan aplikasi
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+pandas
+seaborn
+numpy
+matplotlib
+scipy
+scikit-learn==1.2.2
+Pillow
+plotly

water.jpg ADDED Viewed

water1.jpeg ADDED Viewed

water_quality.csv ADDED Viewed

The diff for this file is too large to render. See raw diff