Spaces:
Sleeping
Sleeping
import streamlit as st | |
import spacy | |
import graphviz | |
import pandas as pd | |
import base64 | |
import shutil | |
import subprocess | |
# Load English language model for spaCy | |
nlp = spacy.load('en_core_web_md') | |
def check_graphviz_installation(): | |
""" | |
Check if Graphviz is installed and accessible | |
""" | |
if shutil.which('dot') is None: | |
return False | |
try: | |
subprocess.run(['dot', '-V'], capture_output=True, check=True) | |
return True | |
except (subprocess.SubprocessError, OSError): | |
return False | |
def identify_clauses(doc): | |
""" | |
Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses | |
""" | |
clauses = [] | |
# First identify all subordinate clauses and their spans | |
subordinate_spans = [] | |
for token in doc: | |
if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]: | |
span = doc[token.left_edge.i:token.right_edge.i + 1] | |
subordinate_spans.append({ | |
"span": span, | |
"type": { | |
"ccomp": "Complement Clause", | |
"xcomp": "Open Complement Clause", | |
"advcl": "Adverbial Clause", | |
"relcl": "Adjective Clause" | |
}[token.dep_] | |
}) | |
# Find the root and construct the main clause by excluding subordinate spans | |
root = None | |
for token in doc: | |
if token.dep_ == "ROOT": | |
root = token | |
break | |
if root: | |
# Get all tokens in the root's subtree | |
main_clause_tokens = set(token for token in root.subtree) | |
# Remove tokens that are part of subordinate clauses | |
for sub_clause in subordinate_spans: | |
for token in sub_clause["span"]: | |
if token in main_clause_tokens: | |
main_clause_tokens.remove(token) | |
# Construct the main clause text from remaining tokens | |
main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens], | |
key=lambda x: [t.i for t in doc if t.text == x][0])) | |
main_clause_text = main_clause_text.strip().replace(",","").replace(".","") | |
clauses.append({"Type": "Independent Clause", "Text": main_clause_text}) | |
# Add the subordinate clauses | |
for sub_clause in subordinate_spans: | |
clauses.append({ | |
"Type": sub_clause["type"], | |
"Text": sub_clause["span"].text | |
}) | |
return clauses | |
def analyze_clause_functions(doc): | |
""" | |
Analyze the function of each clause | |
""" | |
functions = [] | |
for token in doc: | |
if token.dep_ == "ROOT": | |
functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"}) | |
elif token.dep_ == "ccomp": | |
functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"}) | |
elif token.dep_ == "xcomp": | |
functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"}) | |
elif token.dep_ == "advcl": | |
functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"}) | |
elif token.dep_ == "relcl": | |
functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"}) | |
return functions | |
def create_dependency_graph(doc): | |
""" | |
Create a graphviz visualization of the dependency tree | |
""" | |
if not check_graphviz_installation(): | |
return None | |
dot = graphviz.Digraph(comment='Dependency Tree') | |
# Add nodes | |
for token in doc: | |
dot.node(str(token.i), f"{token.text}\n({token.pos_})") | |
# Add edges | |
for token in doc: | |
if token.head is not token: # Skip root | |
dot.edge(str(token.head.i), str(token.i), token.dep_) | |
return dot | |
def get_graph_download_link(dot): | |
""" | |
Generate a download link for the graph image | |
""" | |
try: | |
# Create PDF in memory | |
pdf = dot.pipe(format='pdf') | |
# Encode to base64 | |
b64 = base64.b64encode(pdf).decode() | |
href = f'<a href="data:application/pdf;base64,{b64}" download="syntax_tree.pdf">Download Syntax Tree (PDF)</a>' | |
return href | |
except Exception as e: | |
return f"Error generating download link: {str(e)}" | |
def main(): | |
# Set page to wide mode for better visualization | |
st.set_page_config(layout="wide") | |
st.markdown("<h1 style='text-align: center; color: white;'>English Clause Analyzer</h1>", unsafe_allow_html=True) | |
st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.") | |
# Input text | |
text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100) | |
if st.button("Analyze"): | |
if text: | |
# Process the text | |
doc = nlp(text) | |
# Create two columns for layout | |
col1, col2 = st.columns(2) | |
with col1: | |
# Identify clauses | |
clauses = identify_clauses(doc) | |
st.subheader(f"Clauses Analysis") | |
# Convert clauses to DataFrame for better presentation | |
df_clauses = pd.DataFrame(clauses) | |
st.table(df_clauses.style.set_properties(**{ | |
'background-color': 'rgba(0,0,0,0.1)', | |
'color': 'white' | |
})) | |
# Display clause functions | |
functions = analyze_clause_functions(doc) | |
st.subheader("Clause Functions") | |
df_functions = pd.DataFrame(functions) | |
st.table(df_functions.style.set_properties(**{ | |
'background-color': 'rgba(0,0,0,0.1)', | |
'color': 'white' | |
})) | |
with col2: | |
# Display dependency visualization | |
st.subheader("Syntax Tree Visualization") | |
if not check_graphviz_installation(): | |
st.error("Graphviz is not installed. Please install it using:") | |
st.code("sudo apt-get install graphviz") | |
st.markdown("After installation, restart the application.") | |
else: | |
dot = create_dependency_graph(doc) | |
st.graphviz_chart(dot) | |
# Add download button for the graph | |
st.markdown(get_graph_download_link(dot), unsafe_allow_html=True) | |
# Display part-of-speech tags in a table | |
st.subheader("Part-of-Speech Analysis") | |
pos_data = [{"Word": token.text, "Part of Speech": token.pos_, | |
"Description": spacy.explain(token.pos_)} for token in doc] | |
df_pos = pd.DataFrame(pos_data) | |
st.table(df_pos.style.set_properties(**{ | |
'background-color': 'rgba(0,0,0,0.1)', | |
'color': 'white' | |
})) | |
if __name__ == "__main__": | |
main() |