HuuHuy227
new-modified
d6d5bda
import streamlit as st
import spacy
import graphviz
import pandas as pd
import base64
import shutil
import subprocess
# Load English language model for spaCy
nlp = spacy.load('en_core_web_md')
def check_graphviz_installation():
"""
Check if Graphviz is installed and accessible
"""
if shutil.which('dot') is None:
return False
try:
subprocess.run(['dot', '-V'], capture_output=True, check=True)
return True
except (subprocess.SubprocessError, OSError):
return False
def identify_clauses(doc):
"""
Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses
"""
clauses = []
# First identify all subordinate clauses and their spans
subordinate_spans = []
for token in doc:
if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]:
span = doc[token.left_edge.i:token.right_edge.i + 1]
subordinate_spans.append({
"span": span,
"type": {
"ccomp": "Complement Clause",
"xcomp": "Open Complement Clause",
"advcl": "Adverbial Clause",
"relcl": "Adjective Clause"
}[token.dep_]
})
# Find the root and construct the main clause by excluding subordinate spans
root = None
for token in doc:
if token.dep_ == "ROOT":
root = token
break
if root:
# Get all tokens in the root's subtree
main_clause_tokens = set(token for token in root.subtree)
# Remove tokens that are part of subordinate clauses
for sub_clause in subordinate_spans:
for token in sub_clause["span"]:
if token in main_clause_tokens:
main_clause_tokens.remove(token)
# Construct the main clause text from remaining tokens
main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens],
key=lambda x: [t.i for t in doc if t.text == x][0]))
main_clause_text = main_clause_text.strip().replace(",","").replace(".","")
clauses.append({"Type": "Independent Clause", "Text": main_clause_text})
# Add the subordinate clauses
for sub_clause in subordinate_spans:
clauses.append({
"Type": sub_clause["type"],
"Text": sub_clause["span"].text
})
return clauses
def analyze_clause_functions(doc):
"""
Analyze the function of each clause
"""
functions = []
for token in doc:
if token.dep_ == "ROOT":
functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"})
elif token.dep_ == "ccomp":
functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"})
elif token.dep_ == "xcomp":
functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"})
elif token.dep_ == "advcl":
functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"})
elif token.dep_ == "relcl":
functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"})
return functions
def create_dependency_graph(doc):
"""
Create a graphviz visualization of the dependency tree
"""
if not check_graphviz_installation():
return None
dot = graphviz.Digraph(comment='Dependency Tree')
# Add nodes
for token in doc:
dot.node(str(token.i), f"{token.text}\n({token.pos_})")
# Add edges
for token in doc:
if token.head is not token: # Skip root
dot.edge(str(token.head.i), str(token.i), token.dep_)
return dot
def get_graph_download_link(dot):
"""
Generate a download link for the graph image
"""
try:
# Create PDF in memory
pdf = dot.pipe(format='pdf')
# Encode to base64
b64 = base64.b64encode(pdf).decode()
href = f'<a href="data:application/pdf;base64,{b64}" download="syntax_tree.pdf">Download Syntax Tree (PDF)</a>'
return href
except Exception as e:
return f"Error generating download link: {str(e)}"
def main():
# Set page to wide mode for better visualization
st.set_page_config(layout="wide")
st.markdown("<h1 style='text-align: center; color: white;'>English Clause Analyzer</h1>", unsafe_allow_html=True)
st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.")
# Input text
text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100)
if st.button("Analyze"):
if text:
# Process the text
doc = nlp(text)
# Create two columns for layout
col1, col2 = st.columns(2)
with col1:
# Identify clauses
clauses = identify_clauses(doc)
st.subheader(f"Clauses Analysis")
# Convert clauses to DataFrame for better presentation
df_clauses = pd.DataFrame(clauses)
st.table(df_clauses.style.set_properties(**{
'background-color': 'rgba(0,0,0,0.1)',
'color': 'white'
}))
# Display clause functions
functions = analyze_clause_functions(doc)
st.subheader("Clause Functions")
df_functions = pd.DataFrame(functions)
st.table(df_functions.style.set_properties(**{
'background-color': 'rgba(0,0,0,0.1)',
'color': 'white'
}))
with col2:
# Display dependency visualization
st.subheader("Syntax Tree Visualization")
if not check_graphviz_installation():
st.error("Graphviz is not installed. Please install it using:")
st.code("sudo apt-get install graphviz")
st.markdown("After installation, restart the application.")
else:
dot = create_dependency_graph(doc)
st.graphviz_chart(dot)
# Add download button for the graph
st.markdown(get_graph_download_link(dot), unsafe_allow_html=True)
# Display part-of-speech tags in a table
st.subheader("Part-of-Speech Analysis")
pos_data = [{"Word": token.text, "Part of Speech": token.pos_,
"Description": spacy.explain(token.pos_)} for token in doc]
df_pos = pd.DataFrame(pos_data)
st.table(df_pos.style.set_properties(**{
'background-color': 'rgba(0,0,0,0.1)',
'color': 'white'
}))
if __name__ == "__main__":
main()