import streamlit as st import pandas as pd import numpy as np from PIL import Image import streamlit as st import pandas as pd import random import matplotlib.pyplot as plt from bokeh.plotting import figure from bokeh.models import ColumnDataSource, HoverTool from bokeh.models.formatters import NumeralTickFormatter from bokeh.plotting import figure from bokeh.models import HoverTool, NumeralTickFormatter from bokeh.layouts import gridplot from bokeh.palettes import Category20 import seaborn as sns # Fungsi EDA Scatterplot dengan Regresi menggunakan Bokeh def scatter_plot_regression(df, x_col='Year', y_col='Salary', x_label='Tahun', y_label='Gaji Rata-rata', title='Diagram Sebaran dengan Garis Regresi'): # Konversi data menjadi array numpy x_data = df[x_col].values y_data = df[y_col].values # Membuat model regresi linier model = np.polyfit(x_data, y_data, 1) y_pred = np.polyval(model, x_data) # Buat ColumnDataSource source = ColumnDataSource(data={x_col: x_data, y_col: y_data, 'regression_line': y_pred}) # Buat plot baru dengan judul dan label sumbu p = figure(title=title, x_axis_label=x_label, y_axis_label=y_label, width=800, height=400, tools="pan,box_zoom,wheel_zoom,reset,save") # Tambahkan diagram sebaran p.circle(x_col, y_col, source=source, size=8, color="navy", alpha=0.5, legend_label=y_label) # Tambahkan garis regresi p.line(x_col, 'regression_line', source=source, line_width=2, line_color="red", legend_label="Garis Regresi") # Tambahkan alat hover untuk menampilkan nilai data hover = HoverTool() hover.tooltips = [(x_label, f"@{x_col}"), (y_label, f"@{y_col}")] p.add_tools(hover) # Sesuaikan gaya label sumbu p.xaxis.axis_label_text_font_style = "bold" p.yaxis.axis_label_text_font_style = "bold" # Atur format untuk sumbu Y agar menampilkan nilai tanpa notasi ilmiah p.yaxis.formatter = NumeralTickFormatter(format="0") # Sesuaikan plot p.legend.location = "top_left" p.legend.click_policy = "hide" # Tampilkan plot menggunakan st.bokeh_chart() st.bokeh_chart(p) # Fungsi EDA untuk IQR plot & Histogram 1 Kolom def histogram_boxplot(df, nama_kolom, judul="Contoh Bar Plot"): # Ekstrak data kolom data_kolom = df[nama_kolom] # Plot histogram hist, edges = np.histogram(data_kolom, bins=20) # Generate random colors for the bars colors = random.choices(Category20[20], k=len(hist)) p1 = figure(title=f"{judul} (Histogram)", tools="save,hover", background_fill_color="#fafafa", width=600, height=400, tooltips=[("Jumlah", "@top"), ("Interval", "@left{0.00} hingga @right{0.00}")], x_axis_label=judul, y_axis_label="Frequency") p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color=colors, line_color="white", alpha=0.7) # Box plot q1 = data_kolom.quantile(0.25) q2 = data_kolom.quantile(0.50) q3 = data_kolom.quantile(0.75) iqr = q3 - q1 lower_whisker = data_kolom[data_kolom >= (q1 - 1.5 * iqr)].min() upper_whisker = data_kolom[data_kolom <= (q3 + 1.5 * iqr)].max() outliers = data_kolom[(data_kolom > upper_whisker) | (data_kolom < lower_whisker)] p2 = figure(title=f"{judul} (Boxplot)", tools="save,hover", background_fill_color="#fafafa", width=400, height=400, tooltips=[("Nilai", "@y"), ("Q1", f"{q1:.2f}"), ("Q2 (Median)", f"{q2:.2f}"), ("Q3", f"{q3:.2f}"), ("Lower Whisker", f"{lower_whisker:.2f}"), ("Upper Whisker", f"{upper_whisker:.2f}")]) # Menambahkan elemen diagram kotak p2.segment(1, lower_whisker, 1, q1, line_color="black") p2.segment(1, q3, 1, upper_whisker, line_color="black") p2.vbar(1, 0.7, q1, q3, fill_color="navy", line_color="black") p2.vbar(1, 0.7, q2, q2, line_color="black") # Whiskers p2.rect(1, lower_whisker, 0.2, 0.01, line_color="black") p2.rect(1, upper_whisker, 0.2, 0.01, line_color="black") # Outliers p2.scatter([1]*len(outliers), outliers, size=6, color="red", fill_alpha=0.6) # Menghapus label sumbu dan tanda sumbu pada boxplot p2.xaxis.axis_label = "" p2.yaxis.axis_label = "" p2.xaxis.visible = False p2.yaxis.visible = False # Set gaya label sumbu dan tanda sumbu p1.xaxis.axis_label_text_font_style = "bold" p1.xaxis.axis_label_text_font_size = "10pt" p1.xaxis.major_label_text_font_style = "bold" p1.xaxis.major_label_text_font_size = "8pt" p1.yaxis.axis_label_text_font_style = "bold" p1.yaxis.axis_label_text_font_size = "10pt" p1.yaxis.major_label_text_font_style = "bold" p1.yaxis.major_label_text_font_size = "8pt" # Menghapus garis grid p1.grid.grid_line_color = None p2.grid.grid_line_color = None # Mengatur formatter sumbu agar tidak menggunakan notasi ilmiah p1.yaxis.formatter.use_scientific = False p1.xaxis.formatter.use_scientific = False # Menata plot dalam grid grid = gridplot([[p1, p2]]) # Tampilkan plot menggunakan st.bokeh_chart() st.bokeh_chart(grid) def run(): # Membuat judul st.title('Water Quality') # Membuat Sub Header st.header('Water Quality Data Visualization', divider='gray') # Menambahkan Gambar image = Image.open('water.jpg') st.image(image, caption = 'Water Pollution (wallpapers.com)', channels='RGB') # Menambahkan Divider st.divider() # Menampilkan Dataframe st.header('Dataframe', divider='gray') df = pd.read_csv('water_quality.csv') st.dataframe(df) st.divider() # Display descriptive statistics for all numeric columns # Fungsi Untuk Menghitung Mean, Median, Mode dan Mengevaluasi Distribusi def evaluate_distribution(col): mean = col.mean() median = col.median() mode = col.mode()[0] # Ambil mode pertama jika ada beberapa mode if (abs(mean - median) / mean <= 0.05 and abs(mean - mode) / mean <= 0.05): evaluasi = 'Normal Distribution' elif mean > median: evaluasi = 'Positive Skewness' else: evaluasi = 'Negative Skewness' return pd.Series({'Mean': mean, 'Median': median, 'Mode': mode, 'Evaluasi': evaluasi}) # Memilih Hanya Kolom Numerik numerical_cols = df.select_dtypes(include=[np.number]) # Terapkan Fungsi ke Setiap Kolom Numerik dalam DataFrame result = numerical_cols.apply(evaluate_distribution) st.header('Descriptive Statistics', divider='gray') st.write(result) st.divider() st.header('Correlation Bacteria and Viruses', divider='gray') scatter_plot_regression(df, x_col='bacteria', y_col='viruses', x_label='Bacteria', y_label='Viruses', title='Correlation Bacteria and Viruses') st.divider() st.header('Correlation Arsenic and Selenium', divider='gray') scatter_plot_regression(df, x_col='arsenic', y_col='selenium', x_label='Arsenic', y_label='Selenium', title='Correlation Arsenic and Selenium') st.divider() st.header('Correlation Lead and Copper', divider='gray') scatter_plot_regression(df, x_col='lead', y_col='copper', x_label='Lead', y_label='copper', title='Correlation Lead and Copper') st.divider() st.header('Correlation Chloramine and Bacteria', divider='gray') scatter_plot_regression(df, x_col='chloramine', y_col='bacteria', x_label='Chloramine', y_label='Bacteria', title='Correlation Chloramine and Bacteria') st.divider() # Fungsi untuk menghitung korelasi dan menampilkan hasilnya di Streamlit def tampilkan_korelasi(df): # Hitung korelasi cor = df.corr() # Membuat DataFrame dari Matriks Korelasi cor_df = pd.DataFrame(cor.stack(), columns=['Correlation']) # Menambahkan Kolom Baris dan Kolom cor_df.reset_index(inplace=True) cor_df.columns = ['Variable 1', 'Variable 2', 'Correlation'] # Menambahkan Kolom Interpretasi cor_df['Interpretation'] = np.where(cor_df['Correlation'] < 0.05, 'Ada korelasi', 'Tidak ada korelasi') # Menampilkan DataFrame menggunakan Streamlit st.dataframe(cor_df) # Menampilkan heatmap korelasi st.write("Heatmap Korelasi:") fig, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(cor, annot=True, fmt=".2f", cmap='coolwarm', ax=ax) st.pyplot(fig) # Tombol untuk menghitung dan menampilkan korelasi if st.button('Hitung Korelasi'): tampilkan_korelasi(df) st.divider() st.header('Aluminium Bar Plot', divider='gray') histogram_boxplot(df, 'aluminium', judul="Aluminium Plot") st.divider() st.header('Arsenic Bar Plot', divider='gray') histogram_boxplot(df, 'arsenic', judul="Arsenic Plot") st.divider() # Fungsi untuk membuat pie chart def plot_pie_chart(df, column): # Menghitung distribusi nilai dalam kolom value_counts = df[column].value_counts() # Set up the matplotlib figure fig, ax = plt.subplots(figsize=(8, 8)) # Create the pie chart ax.pie(value_counts, labels=value_counts.index, autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors) # Set title plt.title(f'Pie Chart for {column}') # Display the plot st.pyplot(fig) # Menambahkan Gambar image2 = Image.open('output.png') st.header('Feature Selection', divider='gray') st.image(image2, caption = 'Feature Selection', channels='RGB') # Menampilkan gambar menggunakan tombol if st.button('Show Image'): # Gantilah 'image2.png' dengan jalur ke file gambar Anda image_path = 'water1.jpeg' st.image(image_path, caption='Nickel Processing Factory (Smelter) in the Obi Island Industrial Area, North Maluku Province.', channels='RGB') if __name__ == '__main__': run()