import streamlit as st import sparknlp from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline # Page configuration st.set_page_config( layout="wide", initial_sidebar_state="auto" ) # CSS for styling st.markdown(""" <style> .main-title { font-size: 36px; color: #4A90E2; font-weight: bold; text-align: center; } .section { background-color: #f9f9f9; padding: 10px; border-radius: 10px; margin-top: 10px; } .section p, .section ul { color: #666666; } </style> """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(): documentAssembler = DocumentAssembler() \ .setInputCol("text") \ .setOutputCol("documents") t5 = T5Transformer.pretrained("t5_grammar_error_corrector") \ .setTask("gec:") \ .setInputCols(["documents"])\ .setMaxOutputLength(200)\ .setOutputCol("corrections") pipeline = Pipeline().setStages([documentAssembler, t5]) return pipeline def fit_data(pipeline, data): df = spark.createDataFrame([[data]]).toDF("text") result = pipeline.fit(df).transform(df) return result.select('corrections.result').collect() # Sidebar content model = st.sidebar.selectbox( "Choose the pretrained model", ['t5_grammar_error_corrector'], help="For more info about the models visit: https://sparknlp.org/models" ) # Set up the page layout title = "Correct Sentences Grammar" sub_title = "This demo uses a text-to-text model fine-tuned to correct grammatical errors when the task is set to “gec:”. It is based on Prithiviraj Damodaran’s Gramformer model." st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True) st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True) # Reference notebook link in sidebar link = """ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb#scrollTo=QAZ3vOX_SW7B"> <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/> </a> """ st.sidebar.markdown('Reference notebook:') st.sidebar.markdown(link, unsafe_allow_html=True) # Define the exampless examples = [ "She don't knows nothing about what's happening in the office.", "They was playing soccer yesterday when it start raining heavily.", "This car are more faster than that one, but it costed less money.", "I seen him go to the store, but he don't buy nothing from there.", "We was going to the park but it start raining before we could leave." ] # Text selection and analysis selected_text = st.selectbox("Select an example", examples) custom_input = st.text_input("Try it with your own sentence!") text_to_analyze = custom_input if custom_input else selected_text st.write('Text to analyze:') HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>""" st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True) # Initialize Spark and create pipeline spark = init_spark() pipeline = create_pipeline() output = fit_data(pipeline, text_to_analyze) # Display transformed sentence st.write("Predicted Sentence:") output_text = "".join(output[0][0]) st.markdown(f'<div class="scroll">{output_text}</div>', unsafe_allow_html=True)