File size: 3,880 Bytes
8015fc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import io
import base64


st.set_page_config(layout="wide")

# Function for the CSV Visualization App
def app():
    st.title('CSV Data Cleaning and Visualization')

    st.markdown("Upload one or multiple CSV files to preprocess and clean your files quickly and stress free.")
    
    # File uploader allows user to add their own CSV
    
    uploaded_files = st.file_uploader("Choose CSV files", type="csv", accept_multiple_files=True)

    # dataframes = []

    if uploaded_files:
        for file in uploaded_files:
            file.seek(0)
            df = pd.read_csv(file)
            dataframes.append(df)
    
        if len(dataframes) > 1:
            merge = st.checkbox("Merge uploaded CSV files")
    
            if merge:
                # Merge options
                keep_first_header_only = st.selectbox("Keep only the header (first row) of the first file", ["Yes", "No"])
                remove_duplicate_rows = st.selectbox("Remove duplicate rows", ["No", "Yes"])
                remove_empty_rows = st.selectbox("Remove empty rows", ["Yes", "No"])
                end_line = st.selectbox("End line", ["\\n", "\\r\\n"])
    
                try:
                    if keep_first_header_only == "Yes":
                        for i, df in enumerate(dataframes[1:]):
                            df.columns = dataframes[0].columns.intersection(df.columns)
                            dataframes[i+1] = df
    
                    merged_df = pd.concat(dataframes, ignore_index=True, join='outer')
    
                    if remove_duplicate_rows == "Yes":
                        merged_df.drop_duplicates(inplace=True)
    
                    if remove_empty_rows == "Yes":
                        merged_df.dropna(how="all", inplace=True)
    
                    dataframes = [merged_df]
    
                except ValueError as e:
                    st.error("Please make sure columns match in all files. If you don't want them to match, select 'No' in the first option.")
                    st.stop()
    
        # Show or hide DataFrames
        show_dataframes = st.checkbox("Show DataFrames", value=True)
    
        if show_dataframes:
            for i, df in enumerate(dataframes):
                st.write(f"DataFrame {i + 1}")
                st.dataframe(df)
    
        if st.button("Download cleaned data"):
            for i, df in enumerate(dataframes):
                csv = df.to_csv(index=False)
                b64 = base64.b64encode(csv.encode()).decode()
                href = f'<a href="data:file/csv;base64,{b64}" download="cleaned_data_{i + 1}.csv">Download cleaned_data_{i + 1}.csv</a>'
                st.markdown(href, unsafe_allow_html=True)
    else:
        st.warning("Please upload CSV file(s).")
        st.stop()
    
    st.markdown("")
    st.markdown("---")
    st.markdown("")
    st.markdown("<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>", unsafe_allow_html=True)

    
    # uploaded_file = st.file_uploader("Upload your input CSV file", type=["csv"])
    # Pandas DataFrame is created from the CSV file
    # if uploaded_file is not None:
    #     df = pd.read_csv(uploaded_file)
    #     st.write(df)  # Display the dataframe on the app

    #     # Create a selectbox for user to choose the column to visualize
    #     columns = df.columns.tolist()
    #     selected_column = st.selectbox('Select a column to visualize', columns)

    #     # Using seaborn to create a count plot
    #     fig, ax = plt.subplots()
    #     sns.countplot(data=df, x=selected_column, ax=ax)
    #     plt.xticks(rotation=45)  # Rotate X-axis labels to 45 degrees
    #     # Show the plot
    #     st.pyplot(fig)

app()