nextdrought / pages /15_πŸ“ˆ_Table_Data_Clean.py
Peter Yang
back to visualization
8015fc4
raw
history blame
3.88 kB
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import io
import base64
st.set_page_config(layout="wide")
# Function for the CSV Visualization App
def app():
st.title('CSV Data Cleaning and Visualization')
st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
# File uploader allows user to add their own CSV
uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)
# dataframes = []
if uploaded_files:
for file in uploaded_files:
file.seek(0)
df = pd.read_csv(file)
dataframes.append(df)
if len(dataframes) > 1:
merge = st.checkbox("Merge uploaded CSV files")
if merge:
# Merge options
keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
try:
if keep_first_header_only == "Yes":
for i, df in enumerate(dataframes[1:]):
df.columns = dataframes[0].columns.intersection(df.columns)
dataframes[i+1] = df
merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
if remove_duplicate_rows == "Yes":
merged_df.drop_duplicates(inplace=True)
if remove_empty_rows == "Yes":
merged_df.dropna(how="all", inplace=True)
dataframes = [merged_df]
except ValueError as e:
st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
st.stop()
# Show or hide DataFrames
show_dataframes = st.checkbox("Show DataFrames", value=True)
if show_dataframes:
for i, df in enumerate(dataframes):
st.write(f"DataFrame {i + 1}")
st.dataframe(df)
if st.button("Download cleaned data"):
for i, df in enumerate(dataframes):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
st.markdown(href, unsafe_allow_html=True)
else:
st.warning("Please upload CSV file(s).")
st.stop()
st.markdown("")
st.markdown("---")
st.markdown("")
st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)
# uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
# Pandas DataFrame is created from the CSV file
# if uploaded_file is not None:
# df = pd.read_csv(uploaded_file)
# st.write(df) # Display the dataframe on the app
# # Create a selectbox for user to choose the column to visualize
# columns = df.columns.tolist()
# selected_column = st.selectbox('Select a column to visualize', columns)
# # Using seaborn to create a count plot
# fig, ax = plt.subplots()
# sns.countplot(data=df, x=selected_column, ax=ax)
# plt.xticks(rotation=45) # Rotate X-axis labels to 45 degrees
# # Show the plot
# st.pyplot(fig)
app()