Spaces:

mikeee
/

mlbee

Running

File size: 6,218 Bytes

"""Fetch text from urls and convert to state.ns.list1/list2."""
# pylint: disable=invalid-name, too-many-statements
import streamlit as st
from icecream import ic
from logzero import logger
from streamlit import session_state as state

from st_mlbee.url2txt import url2txt

ic.configureOutput(
    includeContext=True,
    outputFunction=logger.debug,  # outputFunction=logger.info,
)


def fetch_urls():
    """Fetch text from urls and convert to state.ns.list1/list2."""
    beetype = state.ns.beetype
    sourcecount = state.ns.sourcecount
    value = ""
    # if beetype == "ezbee" or beetype == "mlbee":
    if beetype in ["ezbee", "mlbee"]:
        url1 = (
            "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_en.txt"
        )
        url2 = (
            "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/test_zh.txt"
        )
        value = f"{url1} {url2}"
    if beetype in ["dzbee"]:
        url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
        url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-zh.txt"
        value = f"{url1} {url2}"
    if beetype == "debee":
        url1 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-de.txt"
        url2 = "https://raw.githubusercontent.com/ffreemt/en-de-zh-txt/master/sternstunden04-en.txt"
        value = f"{url1} {url2}"

    dict_ = dict(text1="", text2="")

    def fetch_cb():
        """Fetch text (dict_["text1"|"text2"]) from urls."""
        ic("fetch_cb")
        urls = [elm.strip() for elm in text_inp.split(" ") if elm.strip()]

        # supply http:// if not startswith http
        urls = [elm if elm.startswith("http") else "http://" + elm for elm in urls]

        _ = "\n\t"
        # st.markdown(f" urls submitted: \n{_.join(urls)}")
        ic(f" urls submitted: \n{_.join(urls)}")

        # st.write(" TODO: fetch text from urls.")

        if state.ns.sourcecount == 2:  # 2-sep
            for idx, url in enumerate(urls[:2]):
                try:
                    _ = url2txt(url)
                except Exception as e:
                    logger.error(e)
                    _ = str(e)
                dict_[f"text{idx + 1}"] = _
                ic(f"{idx + 1}: [{url}] {dict_['text' + str(idx + 1)][:100]}")

            ic(dict_["text1"][:10])
            ic(dict_["text2"][:10])
        else:  # 1-mix
            text1 = ""
            for url in urls:
                try:
                    _ = url2txt(url)
                except Exception as e:
                    logger.error(e)
                    _ = str(e)
                text1 += _
            ic(text1[:10])
            dict_["text1"] = text1[:]

        _ = [elm.strip() for elm in dict_["text1"].splitlines() if elm.strip()]
        state.ns.list1 = _
        _ = [elm.strip() for elm in dict_["text2"].splitlines() if elm.strip()]
        state.ns.list2 = _

        list1 = state.ns.list1
        list2 = state.ns.list2
        ic(len(list1), len(list2))

        state.fetched_text1 = dict_["text1"]
        state.fetched_text2 = dict_["text2"]

        # streamlit complains if an initial value of
        # a widget with this key is set
        # state.text_area_urls = text_inp

    # with st.form(key="urls_in_form"):
    # _ = st.expander(f"{beetype}: Paste urls below and press Ctl+Enter or Space Ctl+Enter to testdrive", expanded=True)
    # with _:
    label = f"{beetype}: Paste urls below and press Ctl+Enter or Space Ctl+Enter to testdrive"
    text_inp = st.text_area(
        label=label,
        value=value,
        key="text_area_urls",
        height=25,
        help=" URLs separated by at least a space or a newline（贴网址，空格分开或另起一行, Ctrl+回车提交）",
        on_change=fetch_cb,
        # args=(text_inp,),
    )

    # st.button("Fetch", on_click=fetch_cb, args=(text_inp,))

    def text2lists():
        """Convert text(s) to list(s)."""
        if text1:
            try:
                list1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
                state.ns.list1 = list1[:]
            except Exception as e:
                logger.warning("text1 to list1 errors: %s", e)

        if text2:
            try:
                list2 = [elm.strip() for elm in text2.splitlines() if elm.strip()]
                state.ns.list2 = list2[:]
            except Exception as e:
                logger.warning("text2 to list2 errors: %s", e)

    # show fetch text(s)
    text1 = dict_["text1"]
    text2 = dict_["text2"]
    if state.ns.sourcecount == 2:  # 2-sep
        with st.form(key="fetched_2texts_in_form"):
            _ = st.expander(f"{state.ns.beetype}: fetched text", expanded=True)
            with _:
                col1, col2 = st.columns(2)
                with col1:
                    text1 = st.text_area(
                        label="Edit when necessary, click Submit when ready",
                        key="fetched_text1",
                        # help=""
                        height=500,
                        value=text1,
                    )

                with col2:
                    text2 = st.text_area(
                        label="Edit when necessary, click Submit when ready",
                        # help=""
                        key="fetched_text2",
                        height=500,
                        value=text2,
                    )

            submitted = st.form_submit_button("Submit", on_click=text2lists)

    else:  # 1-mix
        with st.form(key="fetched_1_text_in_form"):
            _ = st.expander(f"{state.ns.beetype}: fetched mixed text", expanded=True)
            with _:
                text1 = st.text_area(
                    label="Edit when necessary, click Submit when ready",
                    key="fetched_mixed_text1",
                    height=500,
                    value=text1,
                )
            submitted = st.form_submit_button("Submit", on_click=text2lists)

    # _ = """
    if not submitted:
        ic("Submit not yet clicked")
        return
    # """

    state.ns.src_filename = ""
    state.ns.updated = True