Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from tqdm import tqdm | |
import pandas as pd | |
import streamlit as st | |
from io import StringIO | |
def correct_text(uploaded_file, column_to_correct): | |
""" | |
Corrects text in the specified column using a text correction model. | |
Args: | |
uploaded_file: DataFrame containing the text to correct | |
column_to_correct: Index of the column to correct | |
Returns: | |
DataFrame with corrected text in a new column | |
""" | |
corrector = pipeline("text2text-generation", | |
model="sdadas/byt5-text-correction") | |
df = uploaded_file | |
progress_bar = st.progress(0) | |
status_text = st.text("Correcting text π§ ...") | |
for index, row in df.iterrows(): | |
if pd.notna(row.iloc[column_to_correct]): | |
original_text = str(row.iloc[column_to_correct]) | |
corrected_text = corrector( | |
"<es>" + original_text, max_length=1024)[0]['generated_text'] | |
# Save corrected text only if different from original | |
if corrected_text != original_text: | |
df.loc[index, column_to_correct + 1] = corrected_text | |
progress = (index + 1) / len(df) | |
progress_bar.progress(progress) | |
status_text.text(f"Progress: {int(progress * 100)}% completed ") | |
return df | |
def choose_columns(dataframe): | |
""" | |
Lets user select columns to correct and displays preview of data. | |
Args: | |
dataframe: Input DataFrame | |
Returns: | |
Selected column index or None if no selection | |
""" | |
st.write("Choose the columns to correct π") | |
column_to_correct = st.selectbox( | |
"Select columns to correct", dataframe.columns) | |
if column_to_correct: | |
st.write("Preview of data in selected columns π:") | |
non_empty_data = dataframe[dataframe[column_to_correct].notna()] | |
st.dataframe(non_empty_data[column_to_correct].head()) | |
if st.button("Correct Text"): | |
if column_to_correct is not None: | |
return dataframe.columns.get_loc(column_to_correct) | |
else: | |
st.error("Please select a column before correcting text β") | |
return None | |
def main(): | |
"""Main function to run the text correction application""" | |
st.title("CSV text Correction App β") | |
uploaded_file = st.file_uploader("Choose a CSV file π", type=["csv"]) | |
if uploaded_file is not None: | |
try: | |
dataframe = pd.read_csv(uploaded_file, encoding='utf-8') | |
column_index = choose_columns(dataframe) | |
if column_index is not None: | |
st.write(correct_text(dataframe, column_index)) | |
except UnicodeDecodeError: | |
st.error( | |
"Error: Unable to decode the file. Please check the file encoding or try another file.") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
if __name__ == "__main__": | |
main() | |