csv_corrector / app.py
danielRamon's picture
Create app.py
6ebb80b verified
from transformers import pipeline
from tqdm import tqdm
import pandas as pd
import streamlit as st
from io import StringIO
def correct_text(uploaded_file, column_to_correct):
"""
Corrects text in the specified column using a text correction model.
Args:
uploaded_file: DataFrame containing the text to correct
column_to_correct: Index of the column to correct
Returns:
DataFrame with corrected text in a new column
"""
corrector = pipeline("text2text-generation",
model="sdadas/byt5-text-correction")
df = uploaded_file
progress_bar = st.progress(0)
status_text = st.text("Correcting text 🧠...")
for index, row in df.iterrows():
if pd.notna(row.iloc[column_to_correct]):
original_text = str(row.iloc[column_to_correct])
corrected_text = corrector(
"<es>" + original_text, max_length=1024)[0]['generated_text']
# Save corrected text only if different from original
if corrected_text != original_text:
df.loc[index, column_to_correct + 1] = corrected_text
progress = (index + 1) / len(df)
progress_bar.progress(progress)
status_text.text(f"Progress: {int(progress * 100)}% completed ")
return df
def choose_columns(dataframe):
"""
Lets user select columns to correct and displays preview of data.
Args:
dataframe: Input DataFrame
Returns:
Selected column index or None if no selection
"""
st.write("Choose the columns to correct πŸ”")
column_to_correct = st.selectbox(
"Select columns to correct", dataframe.columns)
if column_to_correct:
st.write("Preview of data in selected columns πŸ‘€:")
non_empty_data = dataframe[dataframe[column_to_correct].notna()]
st.dataframe(non_empty_data[column_to_correct].head())
if st.button("Correct Text"):
if column_to_correct is not None:
return dataframe.columns.get_loc(column_to_correct)
else:
st.error("Please select a column before correcting text ❌")
return None
def main():
"""Main function to run the text correction application"""
st.title("CSV text Correction App βœ”")
uploaded_file = st.file_uploader("Choose a CSV file πŸ“„", type=["csv"])
if uploaded_file is not None:
try:
dataframe = pd.read_csv(uploaded_file, encoding='utf-8')
column_index = choose_columns(dataframe)
if column_index is not None:
st.write(correct_text(dataframe, column_index))
except UnicodeDecodeError:
st.error(
"Error: Unable to decode the file. Please check the file encoding or try another file.")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()