Spaces:

spark-nlp
/

sparknlp-pos-tagging

Sleeping

App Files Files Community

sparknlp-pos-tagging / Demo.py

abdullahmubeen10

Update Demo.py

5daa613 verified 6 months ago

raw

history blame contribute delete

4.3 kB

	import streamlit as st
	import sparknlp
	import os
	import pandas as pd

	from sparknlp.base import *
	from sparknlp.annotator import *
	from pyspark.ml import Pipeline
	from sparknlp.pretrained import PretrainedPipeline

	# Page configuration
	st.set_page_config(
	layout="wide",
	page_title="Spark NLP Demos App",
	initial_sidebar_state="auto"
	)

	# CSS for styling
	st.markdown("""
	<style>
	.main-title {
	font-size: 36px;
	color: #4A90E2;
	font-weight: bold;
	text-align: center;
	}
	.section p, .section ul {
	color: #666666;
	}
	</style>
	""", unsafe_allow_html=True)

	@st.cache_resource
	def init_spark():
	return sparknlp.start()

	@st.cache_resource
	def create_pipeline():
	document_assembler = DocumentAssembler() \
	.setInputCol("text") \
	.setOutputCol("document")

	tokenizer = Tokenizer() \
	.setInputCols(["document"]) \
	.setOutputCol("token")

	postagger = PerceptronModel.pretrained("pos_anc", "en") \
	.setInputCols(["document", "token"]) \
	.setOutputCol("pos")

	pipeline = Pipeline(stages=[document_assembler, tokenizer, postagger])
	return pipeline

	def fit_data(pipeline, data):
	empty_df = spark.createDataFrame([['']]).toDF('text')
	pipeline_model = pipeline.fit(empty_df)
	model = LightPipeline(pipeline_model)
	results = model.fullAnnotate(data)
	return results

	# Set up the page layout
	st.markdown('<div class="main-title">State-of-the-Art Part-of-Speech Tagging with Spark NLP</div>', unsafe_allow_html=True)

	# Sidebar content
	model_name = st.sidebar.selectbox(
	"Choose the pretrained model",
	['pos_anc'],
	help="For more info about the models visit: https://sparknlp.org/models"
	)

	# Reference notebook link in sidebar
	link = """
	<a href="https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb#L117">
	<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
	</a>
	"""
	st.sidebar.markdown('Reference notebook:')
	st.sidebar.markdown(link, unsafe_allow_html=True)

	# Load examples
	examples = [
	"Alice went to the market. She bought some fresh vegetables there. The tomatoes she purchased were particularly ripe.",
	"Dr. Smith is a renowned surgeon. He has performed over a thousand successful operations. His colleagues respect him a lot.",
	"The company announced a new product launch. It is expected to revolutionize the industry. The CEO was very excited about it.",
	"Jennifer enjoys hiking. She goes to the mountains every weekend. Her favorite spot is the Blue Ridge Mountains.",
	"The team won the championship. They celebrated their victory with a huge party. Their coach praised their hard work and dedication.",
	"Michael is studying computer science. He finds artificial intelligence fascinating. His dream is to work at a leading tech company.",
	"Tom is a skilled guitarist. He plays in a local band. His performances are always energetic and captivating."
	]

	# st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.")
	selected_text = st.selectbox("Select an example", examples)
	custom_input = st.text_input("Try it with your own Sentence!")

	text_to_analyze = custom_input if custom_input else selected_text

	st.markdown('Full example text')
	st.write(text_to_analyze)

	# Initialize Spark and create pipeline
	spark = init_spark()
	pipeline = create_pipeline()
	output = fit_data(pipeline, text_to_analyze)

	# Display matched sentence
	st.markdown("Processed output:")

	results = {
	'Token': [t.result for t in output[0]['token']],
	'Begin': [p.begin for p in output[0]['pos']],
	'End': [p.end for p in output[0]['pos']],
	'POS': [p.result for p in output[0]['pos']]
	}

	# from annotated_text import annotated_text

	# # Create annotated text
	# annotated_tokens = []
	# for token, pos in zip(results['Token'], results['POS']):
	# annotated_tokens.append((token, pos.lower()))

	# # Annotate the entire text with annotated tokens
	# annotated_text(*annotated_tokens)

	df = pd.DataFrame(results)
	df.index += 1
	st.dataframe(df)