Spaces:
Running
Running
| import base64 | |
| from collections import Counter | |
| import graphviz | |
| import penman | |
| from multi_amr.data.postprocessing_graph import ParsedStatus | |
| from utils import get_resources, LANGUAGES, translate | |
| import streamlit as st | |
| st.set_page_config( | |
| page_title="Multilingual text-to-AMR demo by Bram Vanroy", | |
| page_icon="π©βπ»" | |
| ) | |
| st.title("π©βπ» Multilingual text-to-AMR") | |
| if "text" not in st.session_state: | |
| st.session_state["text"] = "" | |
| if "language" not in st.session_state: | |
| st.session_state["language"] = "English" | |
| if "use_multilingual" not in st.session_state: | |
| st.session_state["use_multilingual"] = False | |
| text_col, lang_col = st.columns((4, 1)) | |
| text = text_col.text_input(label="Input text", key="text") | |
| src_lang = lang_col.selectbox(label="Language", options=list(LANGUAGES.keys()), index=0, key="language") | |
| multilingual = st.checkbox("Use multilingual model", label_visibility="visible", key="use_multilingual", | |
| help="Whether to use a single multilingual model that was trained on English, Spanish and" | |
| " Dutch together, or (if not checked) language-specific models. Enabling this will" | |
| " results in worse performance but can be of interest for research purposes.") | |
| error_ct = st.empty() | |
| if st.session_state["text"]: | |
| if st.button("Submit"): | |
| text = text.strip() | |
| error_ct.info("Generating abstract meaning representation (AMR)...", icon="π»") | |
| model, tokenizer = get_resources(multilingual, src_lang) | |
| gen_kwargs = { | |
| "max_new_tokens": 512, | |
| "num_beams": 5, | |
| } | |
| outputs = translate(text, src_lang, model, tokenizer, **gen_kwargs) | |
| error_ct.empty() | |
| if outputs["status"][0] == ParsedStatus.BACKOFF: | |
| st.write(f"The system could not generate a valid graph no matter how hard it tried.") | |
| else: | |
| graph = outputs["graph"][0] | |
| visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box", | |
| "fontcolor": "white"}) | |
| # Count which names occur multiple times, e.g. t/talk-01 t2/talk-01 | |
| nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"]) | |
| # Generated initial nodenames for each variable, e.g. {"t": "talk-01", "t2": "talk-01"} | |
| nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"} | |
| # Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)", "t2": "talk-01 (2)"} | |
| # but only the value occurs more than once | |
| nodename_str_c = Counter() | |
| for varname in nodenames: | |
| nodename = nodenames[varname] | |
| if nodename_c[nodename] > 1: | |
| nodename_str_c[nodename] += 1 | |
| nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})" | |
| def get_node_name(item: str): | |
| return nodenames[item] if item in nodenames else item | |
| for triple in graph.triples: | |
| if triple[1] == ":instance": | |
| continue | |
| else: | |
| visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1]) | |
| st.subheader("Graph visualization") | |
| st.graphviz_chart(visualized, use_container_width=True) | |
| # Download link | |
| def create_download_link(img_bytes: bytes): | |
| encoded = base64.b64encode(img_bytes).decode("utf-8") | |
| return f'<a href="data:image/png;charset=utf-8;base64,{encoded}" download="amr-graph.png">Download graph</a>' | |
| img = visualized.pipe(format="png") | |
| st.markdown(create_download_link(img), unsafe_allow_html=True) | |
| # Additional info | |
| st.subheader("PENMAN representation") | |
| st.code(penman.encode(graph)) | |
| else: | |
| error_ct.warning("Text cannot be empty!", icon="β οΈ") | |
| ######################## | |
| # Information, socials # | |
| ######################## | |
| st.header("SignON π€") | |
| st.markdown(""" | |
| <div style="display: flex"> | |
| <img style="margin-right: 1em" alt="SignON logo" src="https://signon-project.eu/wp-content/uploads/2021/05/SignOn_Favicon_500x500px.png" width=64 height=64> | |
| <p><a href="https://signon-project.eu/" target="_blank" title="SignON homepage">SignON</a> aims to bridge the | |
| communication gap between deaf, hard-of-hearing and hearing people through an accessible translation service. | |
| This service will translate between languages and modalities with particular attention for sign languages.</p> | |
| </div>""", unsafe_allow_html=True) | |
| st.markdown("""[Abstract meaning representation](https://aclanthology.org/W13-2322/) (AMR) | |
| is a semantic framework to describe meaning relations of sentences as graphs. In the SignON project, AMR is used as | |
| an interlingua to translate between modalities and languages. To this end, I built MBART models for the task of | |
| generating AMR representations from an input sentence, which is show-cased in this demo. | |
| """) | |
| st.header("Contact βοΈ") | |
| st.markdown("Would you like additional functionality in the demo, do you have questions, or just want to get in touch?" | |
| " Give me a shout on [Twitter](https://twitter.com/BramVanroy)" | |
| " or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!") | |