freemt
commited on
Commit
·
b665d70
1
Parent(s):
78bc728
Update hanzidentifier opencc to handle traditionl chinese
Browse files- app-.py +0 -140
- app.py +1 -0
- litbee/__main__.py +2 -0
- litbee/app-.py +0 -140
- litbee/app.py +10 -11
- litbee/dzbee_page.py +0 -20
- litbee/ezbee_page.py +0 -129
- litbee/fetch_upload.py +28 -18
- litbee/home.py +14 -9
- litbee/info.py +8 -5
- litbee/multipage.py +18 -17
- litbee/options.py +3 -2
- litbee/settings.py +22 -3
- litbee/t2s.py +39 -0
- litbee/utils.py +2 -1
- poetry.lock +68 -1
- pylintrc +7 -0
- pyproject.toml +6 -1
- run-flake8.sh +1 -0
- tests/test_t2s.py +15 -0
app-.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
| 1 |
-
"""Prep __main__.py.
|
| 2 |
-
|
| 3 |
-
https://share.streamlit.io/deploy
|
| 4 |
-
Advanced settings...
|
| 5 |
-
Python version
|
| 6 |
-
3.7
|
| 7 |
-
3.8
|
| 8 |
-
3.9*
|
| 9 |
-
3.10
|
| 10 |
-
|
| 11 |
-
https://docs.streamlit.io/knowledge-base/using-streamlit/hide-row-indices-displaying-dataframe
|
| 12 |
-
Hide row indices when displaying a dataframe
|
| 13 |
-
# CSS to inject contained in a string
|
| 14 |
-
hide_table_row_index = '''
|
| 15 |
-
<style>
|
| 16 |
-
tbody th {display:none}
|
| 17 |
-
.blank {display:none}
|
| 18 |
-
</style>
|
| 19 |
-
'''
|
| 20 |
-
# Inject CSS with Markdown
|
| 21 |
-
st.markdown(hide_table_row_index, unsafe_allow_html=True)
|
| 22 |
-
|
| 23 |
-
# Display a static table
|
| 24 |
-
st.table(df)
|
| 25 |
-
|
| 26 |
-
# Hide row indices with st.dataframe
|
| 27 |
-
# CSS to inject contained in a string
|
| 28 |
-
hide_dataframe_row_index = '''
|
| 29 |
-
<style>
|
| 30 |
-
.row_heading.level0 {display:none}
|
| 31 |
-
.blank {display:none}
|
| 32 |
-
</style>
|
| 33 |
-
'''
|
| 34 |
-
# Inject CSS with Markdown
|
| 35 |
-
st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
|
| 36 |
-
|
| 37 |
-
# Display an interactive table
|
| 38 |
-
st.dataframe(df)
|
| 39 |
-
|
| 40 |
-
https://medium.com/@avra42/streamlit-python-cool-tricks-to-make-your-web-application-look-better-8abfc3763a5b
|
| 41 |
-
hide_menu_style = '''
|
| 42 |
-
<style>
|
| 43 |
-
#MainMenu {visibility: hidden; }
|
| 44 |
-
footer {visibility: hidden;}
|
| 45 |
-
</style>
|
| 46 |
-
'''
|
| 47 |
-
st.markdown(hide_menu_style, unsafe_allow_html=True)
|
| 48 |
-
|
| 49 |
-
"""
|
| 50 |
-
# pylint: disable=invalid-name
|
| 51 |
-
import os
|
| 52 |
-
import sys
|
| 53 |
-
import time
|
| 54 |
-
from pathlib import Path
|
| 55 |
-
from types import SimpleNamespace
|
| 56 |
-
from typing import Optional
|
| 57 |
-
|
| 58 |
-
import loguru
|
| 59 |
-
import logzero
|
| 60 |
-
import pandas as pd
|
| 61 |
-
import streamlit as st
|
| 62 |
-
from loguru import logger as loggu
|
| 63 |
-
from logzero import logger
|
| 64 |
-
from set_loglevel import set_loglevel
|
| 65 |
-
from streamlit import session_state as state
|
| 66 |
-
|
| 67 |
-
from litbee import __version__, litbee
|
| 68 |
-
from litbee.options import options
|
| 69 |
-
|
| 70 |
-
# from litbee.files2df import files2df
|
| 71 |
-
# from litbee.utils import sb_front_cover, instructions, menu_items
|
| 72 |
-
# from litbee.ezbee_page import ezbee_page
|
| 73 |
-
# from litbee.dzbee_page import dzbee_page
|
| 74 |
-
# from litbee.xbee_page import xbee_page
|
| 75 |
-
from litbee.utils import menu_items
|
| 76 |
-
|
| 77 |
-
# from ezbee import ezbee
|
| 78 |
-
|
| 79 |
-
curr_py = sys.version[:3]
|
| 80 |
-
msg = f"Some packages litbee depends on can only run with Python 3.8, current python is {curr_py}, sorry..."
|
| 81 |
-
assert curr_py == "3.8", msg
|
| 82 |
-
|
| 83 |
-
os.environ["TZ"] = "Asia/Shanghai"
|
| 84 |
-
time.tzset()
|
| 85 |
-
os.environ["LOGLEVEL"] = "10" # uncomment this in dev
|
| 86 |
-
logzero.loglevel(set_loglevel())
|
| 87 |
-
|
| 88 |
-
loggu.remove()
|
| 89 |
-
_ = (
|
| 90 |
-
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
| 91 |
-
"<level>{level: <5}</level> | <level>{message}</level> "
|
| 92 |
-
"<cyan>{name}</cyan>:<cyan>{line}</cyan>"
|
| 93 |
-
)
|
| 94 |
-
loggu.add(
|
| 95 |
-
sys.stderr,
|
| 96 |
-
format=_,
|
| 97 |
-
level=set_loglevel(),
|
| 98 |
-
colorize=True,
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
# from PIL import Image
|
| 102 |
-
# page_icon=Image.open("icon.ico"),
|
| 103 |
-
st.set_page_config(
|
| 104 |
-
page_title=f"litbee v{__version__}",
|
| 105 |
-
# page_icon="🧊",
|
| 106 |
-
page_icon="🐝",
|
| 107 |
-
# layout="wide",
|
| 108 |
-
initial_sidebar_state="auto", # "auto" or "expanded" or "collapsed",
|
| 109 |
-
menu_items=menu_items,
|
| 110 |
-
)
|
| 111 |
-
|
| 112 |
-
# pd.set_option("precision", 2)
|
| 113 |
-
pd.set_option("display.precision", 2)
|
| 114 |
-
pd.options.display.float_format = "{:,.2f}".format
|
| 115 |
-
|
| 116 |
-
_ = dict(
|
| 117 |
-
beetype="ezbee",
|
| 118 |
-
src_filename="",
|
| 119 |
-
tgt_filename="",
|
| 120 |
-
src_fileio=b"",
|
| 121 |
-
tgt_fileio=b"",
|
| 122 |
-
src_file="",
|
| 123 |
-
tgt_file="",
|
| 124 |
-
list1=[""],
|
| 125 |
-
list2=[""],
|
| 126 |
-
df=None,
|
| 127 |
-
df_a=None,
|
| 128 |
-
df_s_a=None,
|
| 129 |
-
)
|
| 130 |
-
if "ns" not in state:
|
| 131 |
-
state.ns = SimpleNamespace(**_)
|
| 132 |
-
state.ns.list = [*_]
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
def main():
|
| 136 |
-
"""Bootstrap."""
|
| 137 |
-
options()
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -135,6 +135,7 @@ _ = dict(
|
|
| 135 |
beetype="ezbee",
|
| 136 |
sourcetype="upload",
|
| 137 |
sourcecount=2,
|
|
|
|
| 138 |
src_filename="",
|
| 139 |
tgt_filename="",
|
| 140 |
src_fileio=b"",
|
|
|
|
| 135 |
beetype="ezbee",
|
| 136 |
sourcetype="upload",
|
| 137 |
sourcecount=2,
|
| 138 |
+
sent_ali=None,
|
| 139 |
src_filename="",
|
| 140 |
tgt_filename="",
|
| 141 |
src_fileio=b"",
|
litbee/__main__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
"""Run streamlit run app.py from __main__.py."""
|
|
|
|
| 2 |
import sys
|
|
|
|
| 3 |
from streamlit import cli
|
| 4 |
|
| 5 |
sys.argv = ["streamlit", "run", "app.py"]
|
|
|
|
| 1 |
"""Run streamlit run app.py from __main__.py."""
|
| 2 |
+
# pylint: disable=no-value-for-parameter
|
| 3 |
import sys
|
| 4 |
+
|
| 5 |
from streamlit import cli
|
| 6 |
|
| 7 |
sys.argv = ["streamlit", "run", "app.py"]
|
litbee/app-.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
| 1 |
-
"""Prep __main__.py.
|
| 2 |
-
|
| 3 |
-
https://share.streamlit.io/deploy
|
| 4 |
-
Advanced settings...
|
| 5 |
-
Python version
|
| 6 |
-
3.7
|
| 7 |
-
3.8
|
| 8 |
-
3.9*
|
| 9 |
-
3.10
|
| 10 |
-
|
| 11 |
-
https://docs.streamlit.io/knowledge-base/using-streamlit/hide-row-indices-displaying-dataframe
|
| 12 |
-
Hide row indices when displaying a dataframe
|
| 13 |
-
# CSS to inject contained in a string
|
| 14 |
-
hide_table_row_index = '''
|
| 15 |
-
<style>
|
| 16 |
-
tbody th {display:none}
|
| 17 |
-
.blank {display:none}
|
| 18 |
-
</style>
|
| 19 |
-
'''
|
| 20 |
-
# Inject CSS with Markdown
|
| 21 |
-
st.markdown(hide_table_row_index, unsafe_allow_html=True)
|
| 22 |
-
|
| 23 |
-
# Display a static table
|
| 24 |
-
st.table(df)
|
| 25 |
-
|
| 26 |
-
# Hide row indices with st.dataframe
|
| 27 |
-
# CSS to inject contained in a string
|
| 28 |
-
hide_dataframe_row_index = '''
|
| 29 |
-
<style>
|
| 30 |
-
.row_heading.level0 {display:none}
|
| 31 |
-
.blank {display:none}
|
| 32 |
-
</style>
|
| 33 |
-
'''
|
| 34 |
-
# Inject CSS with Markdown
|
| 35 |
-
st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
|
| 36 |
-
|
| 37 |
-
# Display an interactive table
|
| 38 |
-
st.dataframe(df)
|
| 39 |
-
|
| 40 |
-
https://medium.com/@avra42/streamlit-python-cool-tricks-to-make-your-web-application-look-better-8abfc3763a5b
|
| 41 |
-
hide_menu_style = '''
|
| 42 |
-
<style>
|
| 43 |
-
#MainMenu {visibility: hidden; }
|
| 44 |
-
footer {visibility: hidden;}
|
| 45 |
-
</style>
|
| 46 |
-
'''
|
| 47 |
-
st.markdown(hide_menu_style, unsafe_allow_html=True)
|
| 48 |
-
|
| 49 |
-
"""
|
| 50 |
-
# pylint: disable=invalid-name
|
| 51 |
-
import os
|
| 52 |
-
import sys
|
| 53 |
-
import time
|
| 54 |
-
from pathlib import Path
|
| 55 |
-
from types import SimpleNamespace
|
| 56 |
-
from typing import Optional
|
| 57 |
-
|
| 58 |
-
import loguru
|
| 59 |
-
import logzero
|
| 60 |
-
import pandas as pd
|
| 61 |
-
import streamlit as st
|
| 62 |
-
from loguru import logger as loggu
|
| 63 |
-
from logzero import logger
|
| 64 |
-
from set_loglevel import set_loglevel
|
| 65 |
-
from streamlit import session_state as state
|
| 66 |
-
|
| 67 |
-
from litbee import __version__, litbee
|
| 68 |
-
from litbee.options import options
|
| 69 |
-
|
| 70 |
-
# from litbee.files2df import files2df
|
| 71 |
-
# from litbee.utils import sb_front_cover, instructions, menu_items
|
| 72 |
-
# from litbee.ezbee_page import ezbee_page
|
| 73 |
-
# from litbee.dzbee_page import dzbee_page
|
| 74 |
-
# from litbee.xbee_page import xbee_page
|
| 75 |
-
from litbee.utils import menu_items
|
| 76 |
-
|
| 77 |
-
# from ezbee import ezbee
|
| 78 |
-
|
| 79 |
-
curr_py = sys.version[:3]
|
| 80 |
-
msg = f"Some packages litbee depends on can only run with Python 3.8, current python is {curr_py}, sorry..."
|
| 81 |
-
assert curr_py == "3.8", msg
|
| 82 |
-
|
| 83 |
-
os.environ["TZ"] = "Asia/Shanghai"
|
| 84 |
-
time.tzset()
|
| 85 |
-
os.environ["LOGLEVEL"] = "10" # uncomment this in dev
|
| 86 |
-
logzero.loglevel(set_loglevel())
|
| 87 |
-
|
| 88 |
-
loggu.remove()
|
| 89 |
-
_ = (
|
| 90 |
-
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
| 91 |
-
"<level>{level: <5}</level> | <level>{message}</level> "
|
| 92 |
-
"<cyan>{name}</cyan>:<cyan>{line}</cyan>"
|
| 93 |
-
)
|
| 94 |
-
loggu.add(
|
| 95 |
-
sys.stderr,
|
| 96 |
-
format=_,
|
| 97 |
-
level=set_loglevel(),
|
| 98 |
-
colorize=True,
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
# from PIL import Image
|
| 102 |
-
# page_icon=Image.open("icon.ico"),
|
| 103 |
-
st.set_page_config( # type: ignore
|
| 104 |
-
page_title=f"litbee v{__version__}",
|
| 105 |
-
# page_icon="🧊",
|
| 106 |
-
page_icon="🐝",
|
| 107 |
-
# layout="wide",
|
| 108 |
-
initial_sidebar_state="auto", # "auto" or "expanded" or "collapsed",
|
| 109 |
-
menu_items=menu_items,
|
| 110 |
-
)
|
| 111 |
-
|
| 112 |
-
# pd.set_option("precision", 2)
|
| 113 |
-
pd.set_option("display.precision", 2)
|
| 114 |
-
pd.options.display.float_format = "{:,.2f}".format
|
| 115 |
-
|
| 116 |
-
_ = dict(
|
| 117 |
-
beetype="ezbee",
|
| 118 |
-
src_filename="",
|
| 119 |
-
tgt_filename="",
|
| 120 |
-
src_fileio=b"",
|
| 121 |
-
tgt_fileio=b"",
|
| 122 |
-
src_file="",
|
| 123 |
-
tgt_file="",
|
| 124 |
-
list1=[""],
|
| 125 |
-
list2=[""],
|
| 126 |
-
df=None,
|
| 127 |
-
df_a=None,
|
| 128 |
-
df_s_a=None,
|
| 129 |
-
)
|
| 130 |
-
if "ns" not in state:
|
| 131 |
-
state.ns = SimpleNamespace(**_)
|
| 132 |
-
state.ns.list = [*_]
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
def main():
|
| 136 |
-
"""Bootstrap."""
|
| 137 |
-
options()
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
litbee/app.py
CHANGED
|
@@ -55,13 +55,12 @@ from pathlib import Path
|
|
| 55 |
from types import SimpleNamespace
|
| 56 |
from typing import Optional
|
| 57 |
|
|
|
|
|
|
|
|
|
|
| 58 |
import loguru
|
| 59 |
import logzero
|
| 60 |
import pandas as pd
|
| 61 |
-
import ezbee
|
| 62 |
-
import dzbee
|
| 63 |
-
import debee
|
| 64 |
-
|
| 65 |
import streamlit as st
|
| 66 |
from loguru import logger as loggu
|
| 67 |
from logzero import logger
|
|
@@ -69,25 +68,25 @@ from set_loglevel import set_loglevel
|
|
| 69 |
from streamlit import session_state as state
|
| 70 |
|
| 71 |
from litbee import __version__
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
# from litbee.files2df import files2df
|
| 75 |
# from litbee.utils import sb_front_cover, instructions, menu_items
|
| 76 |
# from litbee.ezbee_page import ezbee_page
|
| 77 |
# from litbee.dzbee_page import dzbee_page
|
| 78 |
# from litbee.xbee_page import xbee_page
|
| 79 |
-
from litbee.utils import menu_items
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
from litbee.multipage import Multipage
|
| 82 |
|
| 83 |
# from litbee.fetch_upload import fetch_upload
|
| 84 |
# from litbee.fetch_paste import fetch_paste
|
| 85 |
# from litbee.fetch_urls import fetch_urls
|
| 86 |
|
| 87 |
-
from litbee.home import home
|
| 88 |
-
from litbee.settings import settings
|
| 89 |
-
from litbee.info import info
|
| 90 |
-
from litbee.utils import style_css
|
| 91 |
|
| 92 |
# from ezbee import ezbee
|
| 93 |
|
|
|
|
| 55 |
from types import SimpleNamespace
|
| 56 |
from typing import Optional
|
| 57 |
|
| 58 |
+
import debee
|
| 59 |
+
import dzbee
|
| 60 |
+
import ezbee
|
| 61 |
import loguru
|
| 62 |
import logzero
|
| 63 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
import streamlit as st
|
| 65 |
from loguru import logger as loggu
|
| 66 |
from logzero import logger
|
|
|
|
| 68 |
from streamlit import session_state as state
|
| 69 |
|
| 70 |
from litbee import __version__
|
| 71 |
+
from litbee.home import home
|
| 72 |
+
from litbee.info import info
|
| 73 |
+
from litbee.multipage import Multipage
|
| 74 |
+
from litbee.settings import settings
|
| 75 |
|
| 76 |
# from litbee.files2df import files2df
|
| 77 |
# from litbee.utils import sb_front_cover, instructions, menu_items
|
| 78 |
# from litbee.ezbee_page import ezbee_page
|
| 79 |
# from litbee.dzbee_page import dzbee_page
|
| 80 |
# from litbee.xbee_page import xbee_page
|
| 81 |
+
from litbee.utils import menu_items, style_css
|
| 82 |
+
|
| 83 |
+
# from litbee.options import options
|
| 84 |
|
|
|
|
| 85 |
|
| 86 |
# from litbee.fetch_upload import fetch_upload
|
| 87 |
# from litbee.fetch_paste import fetch_paste
|
| 88 |
# from litbee.fetch_urls import fetch_urls
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# from ezbee import ezbee
|
| 92 |
|
litbee/dzbee_page.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
"""Display dzbee page."""
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import streamlit as st
|
| 4 |
-
from loguru import logger as loggu
|
| 5 |
-
from logzero import logger
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
def dzbee_page():
|
| 9 |
-
"""Display dzbee page."""
|
| 10 |
-
# st.title('dzbee')
|
| 11 |
-
# st.write('Welcome to app1')
|
| 12 |
-
|
| 13 |
-
try:
|
| 14 |
-
df = st.session_state.ns.df
|
| 15 |
-
except Exception as exc:
|
| 16 |
-
logger.error(exc)
|
| 17 |
-
df = pd.DataFrame([[""]])
|
| 18 |
-
|
| 19 |
-
loggu.debug(" df ")
|
| 20 |
-
st.table(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
litbee/ezbee_page.py
DELETED
|
@@ -1,129 +0,0 @@
|
|
| 1 |
-
"""Display ezbee page."""
|
| 2 |
-
from functools import partial
|
| 3 |
-
|
| 4 |
-
import logzero
|
| 5 |
-
import pandas as pd
|
| 6 |
-
import streamlit as st
|
| 7 |
-
from ezbee import ezbee
|
| 8 |
-
from ezbee.gen_pairs import gen_pairs
|
| 9 |
-
from loguru import logger as loggu
|
| 10 |
-
from logzero import logger
|
| 11 |
-
from set_loglevel import set_loglevel
|
| 12 |
-
from st_aggrid import AgGrid
|
| 13 |
-
from st_aggrid.grid_options_builder import GridOptionsBuilder
|
| 14 |
-
from streamlit import session_state as state
|
| 15 |
-
|
| 16 |
-
logzero.loglevel(set_loglevel())
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def st_radio_horizontal(*args, **kwargs):
|
| 20 |
-
"""Trick to have horizontal st radio to simulate tabs."""
|
| 21 |
-
col, _ = st.columns(2)
|
| 22 |
-
with col:
|
| 23 |
-
# st.write('<style> div[data-testid=column] > div > div > div > div.stRadio > div{flex-direction: row;}</style>', unsafe_allow_html=True)
|
| 24 |
-
# return st.radio(*args, **kwargs)
|
| 25 |
-
st.write(
|
| 26 |
-
"<style> div[data-testid=stSidebar] > div > div > div > div > div > div > div.stRadio > div{flex-direction: row;}</style>",
|
| 27 |
-
unsafe_allow_html=True,
|
| 28 |
-
)
|
| 29 |
-
return st.sidebar.radio(*args, **kwargs)
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
def ezbee_page():
|
| 33 |
-
"""Display ezbee page."""
|
| 34 |
-
# st.title('ezbee')
|
| 35 |
-
# st.write('### ezbee')
|
| 36 |
-
# st.write('Welcome to app1')
|
| 37 |
-
|
| 38 |
-
_ = """
|
| 39 |
-
try:
|
| 40 |
-
df = st.session_state.ns.df
|
| 41 |
-
except Exception as exc:
|
| 42 |
-
logger.error(exc)
|
| 43 |
-
df = pd.DataFrame([[""]])
|
| 44 |
-
# """
|
| 45 |
-
|
| 46 |
-
# st.table(df) # looks alright
|
| 47 |
-
|
| 48 |
-
# stlyed pd dataframe?
|
| 49 |
-
# bigger, no pagination
|
| 50 |
-
# st.markdown(df.to_html(), unsafe_allow_html=True)
|
| 51 |
-
|
| 52 |
-
# ag_grid smallish, editable, probably slower
|
| 53 |
-
|
| 54 |
-
if "df" not in globals():
|
| 55 |
-
logger.debug(" df not defined, return")
|
| 56 |
-
return None
|
| 57 |
-
|
| 58 |
-
df = pd.DataFrame([["", "", ""]], columns=["text1", "text2", "llh"])
|
| 59 |
-
|
| 60 |
-
df_exp = st.expander("to be aligned", expanded=False)
|
| 61 |
-
with df_exp:
|
| 62 |
-
st.write(df) # too small
|
| 63 |
-
|
| 64 |
-
_ = """
|
| 65 |
-
ag_exp = st.expander("done aligned") # , expanded=False
|
| 66 |
-
with ag_exp:
|
| 67 |
-
agdf = AgGrid(
|
| 68 |
-
df,
|
| 69 |
-
# fit_columns_on_grid_load=True,
|
| 70 |
-
editable=True,
|
| 71 |
-
gridOptions=gridOptions,
|
| 72 |
-
key="ag_exp",
|
| 73 |
-
)
|
| 74 |
-
# """
|
| 75 |
-
|
| 76 |
-
list1 = [elm.strip() for elm in df.text1 if elm.strip()]
|
| 77 |
-
list2 = [elm.strip() for elm in df.text2 if elm.strip()]
|
| 78 |
-
logger.info("Processing data...")
|
| 79 |
-
try:
|
| 80 |
-
aset = ezbee(
|
| 81 |
-
list1,
|
| 82 |
-
list2,
|
| 83 |
-
# eps=eps,
|
| 84 |
-
# min_samples=min_samples,
|
| 85 |
-
)
|
| 86 |
-
except Exception as e:
|
| 87 |
-
logger.error("aset = ezbee(...) exc: %s", e)
|
| 88 |
-
aset = ""
|
| 89 |
-
return None
|
| 90 |
-
|
| 91 |
-
# fastlid changed logger.level is changed to 20
|
| 92 |
-
# turn back to loglevel
|
| 93 |
-
logzero.loglevel(set_loglevel())
|
| 94 |
-
if aset:
|
| 95 |
-
logger.debug("aset: %s...%s", aset[:3], aset[-3:])
|
| 96 |
-
|
| 97 |
-
# st.write(aset)
|
| 98 |
-
|
| 99 |
-
aligned_pairs = gen_pairs(list1, list2, aset)
|
| 100 |
-
if aligned_pairs:
|
| 101 |
-
logger.debug("%s...%s", aligned_pairs[:3], aligned_pairs[-3:])
|
| 102 |
-
|
| 103 |
-
df_a = pd.DataFrame(aligned_pairs, columns=["text1", "text2", "llh"])
|
| 104 |
-
|
| 105 |
-
# insert seq no
|
| 106 |
-
df_a.insert(0, "sn", range(len(df_a)))
|
| 107 |
-
|
| 108 |
-
gb = GridOptionsBuilder.from_dataframe(df_a)
|
| 109 |
-
gb.configure_pagination(paginationAutoPageSize=True)
|
| 110 |
-
options = {
|
| 111 |
-
"resizable": True,
|
| 112 |
-
"autoHeight": True,
|
| 113 |
-
"wrapText": True,
|
| 114 |
-
"editable": True,
|
| 115 |
-
}
|
| 116 |
-
gb.configure_default_column(**options)
|
| 117 |
-
gridOptions = gb.build()
|
| 118 |
-
|
| 119 |
-
st.write("aligned (double-click a cell to edit)")
|
| 120 |
-
agdf = AgGrid(
|
| 121 |
-
# df,
|
| 122 |
-
df_a,
|
| 123 |
-
gridOptions=gridOptions,
|
| 124 |
-
key="outside",
|
| 125 |
-
editable=True,
|
| 126 |
-
width="100%",
|
| 127 |
-
height=500,
|
| 128 |
-
# fit_columns_on_grid_load=True,
|
| 129 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
litbee/fetch_upload.py
CHANGED
|
@@ -3,31 +3,33 @@
|
|
| 3 |
org ezbee_page.py.
|
| 4 |
"""
|
| 5 |
# pylint: disable=invalid-name
|
| 6 |
-
|
| 7 |
import inspect
|
|
|
|
|
|
|
| 8 |
from itertools import zip_longest
|
| 9 |
from time import perf_counter
|
| 10 |
|
|
|
|
| 11 |
import logzero
|
| 12 |
import numpy as np
|
| 13 |
import pandas as pd
|
| 14 |
import streamlit as st
|
| 15 |
-
from dzbee import dzbee # noqa
|
| 16 |
-
from ezbee import ezbee # noqa
|
| 17 |
-
from debee import debee # noqa
|
| 18 |
|
| 19 |
-
# from ezbee.gen_pairs import gen_pairs # aset2pairs?
|
| 20 |
from aset2pairs import aset2pairs
|
|
|
|
|
|
|
|
|
|
| 21 |
from fastlid import fastlid
|
| 22 |
from icecream import ic
|
| 23 |
from loguru import logger as loggu
|
| 24 |
from logzero import logger
|
| 25 |
from set_loglevel import set_loglevel
|
| 26 |
-
from st_aggrid import AgGrid,
|
| 27 |
-
|
| 28 |
from streamlit import session_state as state
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
|
| 33 |
def fetch_upload(): # noqa
|
|
@@ -43,7 +45,7 @@ def fetch_upload(): # noqa
|
|
| 43 |
return None
|
| 44 |
|
| 45 |
# src_fileio tgt_fileio
|
| 46 |
-
with st.form(key=
|
| 47 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
| 48 |
with _:
|
| 49 |
col1, col2 = st.columns(2)
|
|
@@ -67,7 +69,7 @@ def fetch_upload(): # noqa
|
|
| 67 |
key="tgt_text",
|
| 68 |
# accept_multiple_files=True,
|
| 69 |
)
|
| 70 |
-
submitted = st.form_submit_button(
|
| 71 |
|
| 72 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
| 73 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
|
@@ -127,10 +129,12 @@ def fetch_upload(): # noqa
|
|
| 127 |
if not (filename1 or filename2):
|
| 128 |
st.write("| no file uploaded")
|
| 129 |
return None
|
| 130 |
-
|
|
|
|
| 131 |
st.write("| file1 not ready")
|
| 132 |
return None
|
| 133 |
-
|
|
|
|
| 134 |
st.write("| file2 not ready")
|
| 135 |
return None
|
| 136 |
|
|
@@ -216,16 +220,18 @@ def fetch_upload(): # noqa
|
|
| 216 |
# logger.debug("fn.__doc__: %s", fn.__doc__)
|
| 217 |
logger.debug("fn.__name__: %s", fn.__name__)
|
| 218 |
|
| 219 |
-
from inspect import getabsfile
|
| 220 |
-
logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
|
|
|
|
|
|
| 221 |
|
| 222 |
with st.spinner(" diggin..."):
|
| 223 |
then = perf_counter()
|
| 224 |
try:
|
| 225 |
# aset = ezbee/dzbee/debee
|
| 226 |
aset = globals()[state.ns.beetype](
|
| 227 |
-
list1,
|
| 228 |
-
list2,
|
| 229 |
# eps=eps,
|
| 230 |
# min_samples=min_samples,
|
| 231 |
)
|
|
@@ -267,7 +273,9 @@ def fetch_upload(): # noqa
|
|
| 267 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
| 268 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
| 269 |
|
| 270 |
-
df_a = pd.DataFrame(
|
|
|
|
|
|
|
| 271 |
|
| 272 |
# if set_loglevel() <= 10:
|
| 273 |
_ = st.expander("done aligned")
|
|
@@ -301,5 +309,7 @@ def fetch_upload(): # noqa
|
|
| 301 |
# width="100%", # width parameter is deprecated
|
| 302 |
height=750,
|
| 303 |
# fit_columns_on_grid_load=True,
|
| 304 |
-
update_mode=GridUpdateMode.MODEL_CHANGED
|
| 305 |
)
|
|
|
|
|
|
|
|
|
| 3 |
org ezbee_page.py.
|
| 4 |
"""
|
| 5 |
# pylint: disable=invalid-name
|
| 6 |
+
# pylint: disable=too-many-return-statements,too-many-branches,too-many-statements, too-many-locals
|
| 7 |
import inspect
|
| 8 |
+
|
| 9 |
+
from functools import partial
|
| 10 |
from itertools import zip_longest
|
| 11 |
from time import perf_counter
|
| 12 |
|
| 13 |
+
import hanzidentifier
|
| 14 |
import logzero
|
| 15 |
import numpy as np
|
| 16 |
import pandas as pd
|
| 17 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
| 18 |
|
|
|
|
| 19 |
from aset2pairs import aset2pairs
|
| 20 |
+
from debee import debee # noqa
|
| 21 |
+
from dzbee import dzbee # noqa
|
| 22 |
+
from ezbee import ezbee # noqa
|
| 23 |
from fastlid import fastlid
|
| 24 |
from icecream import ic
|
| 25 |
from loguru import logger as loggu
|
| 26 |
from logzero import logger
|
| 27 |
from set_loglevel import set_loglevel
|
| 28 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
| 29 |
+
|
| 30 |
from streamlit import session_state as state
|
| 31 |
|
| 32 |
+
from litbee.t2s import t2s
|
| 33 |
|
| 34 |
|
| 35 |
def fetch_upload(): # noqa
|
|
|
|
| 45 |
return None
|
| 46 |
|
| 47 |
# src_fileio tgt_fileio
|
| 48 |
+
with st.form(key="upload_in_form"):
|
| 49 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
| 50 |
with _:
|
| 51 |
col1, col2 = st.columns(2)
|
|
|
|
| 69 |
key="tgt_text",
|
| 70 |
# accept_multiple_files=True,
|
| 71 |
)
|
| 72 |
+
submitted = st.form_submit_button("Submit")
|
| 73 |
|
| 74 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
| 75 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
|
|
|
| 129 |
if not (filename1 or filename2):
|
| 130 |
st.write("| no file uploaded")
|
| 131 |
return None
|
| 132 |
+
|
| 133 |
+
if not filename1:
|
| 134 |
st.write("| file1 not ready")
|
| 135 |
return None
|
| 136 |
+
|
| 137 |
+
if not filename2:
|
| 138 |
st.write("| file2 not ready")
|
| 139 |
return None
|
| 140 |
|
|
|
|
| 220 |
# logger.debug("fn.__doc__: %s", fn.__doc__)
|
| 221 |
logger.debug("fn.__name__: %s", fn.__name__)
|
| 222 |
|
| 223 |
+
# from inspect import getabsfile
|
| 224 |
+
# logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
| 225 |
+
|
| 226 |
+
# convert to simplified chinese if is_tranditional
|
| 227 |
|
| 228 |
with st.spinner(" diggin..."):
|
| 229 |
then = perf_counter()
|
| 230 |
try:
|
| 231 |
# aset = ezbee/dzbee/debee
|
| 232 |
aset = globals()[state.ns.beetype](
|
| 233 |
+
t2s(list1), # t2s, handle trand.chinese
|
| 234 |
+
t2s(list2),
|
| 235 |
# eps=eps,
|
| 236 |
# min_samples=min_samples,
|
| 237 |
)
|
|
|
|
| 273 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
| 274 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
| 275 |
|
| 276 |
+
df_a = pd.DataFrame(
|
| 277 |
+
aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
|
| 278 |
+
)
|
| 279 |
|
| 280 |
# if set_loglevel() <= 10:
|
| 281 |
_ = st.expander("done aligned")
|
|
|
|
| 309 |
# width="100%", # width parameter is deprecated
|
| 310 |
height=750,
|
| 311 |
# fit_columns_on_grid_load=True,
|
| 312 |
+
update_mode=GridUpdateMode.MODEL_CHANGED,
|
| 313 |
)
|
| 314 |
+
|
| 315 |
+
return None
|
litbee/home.py
CHANGED
|
@@ -2,9 +2,10 @@
|
|
| 2 |
|
| 3 |
org ezbee_page.py.
|
| 4 |
"""
|
|
|
|
|
|
|
| 5 |
# pylint: disable=invalid-name
|
| 6 |
from functools import partial
|
| 7 |
-
import inspect
|
| 8 |
from itertools import zip_longest
|
| 9 |
from time import perf_counter
|
| 10 |
|
|
@@ -12,18 +13,19 @@ import logzero
|
|
| 12 |
import numpy as np
|
| 13 |
import pandas as pd
|
| 14 |
import streamlit as st
|
| 15 |
-
from dzbee import dzbee # noqa
|
| 16 |
-
from ezbee import ezbee # noqa
|
| 17 |
-
from debee import debee # noqa
|
| 18 |
|
| 19 |
# from ezbee.gen_pairs import gen_pairs # aset2pairs?
|
| 20 |
from aset2pairs import aset2pairs
|
|
|
|
|
|
|
|
|
|
| 21 |
from fastlid import fastlid
|
| 22 |
from icecream import ic
|
| 23 |
from loguru import logger as loggu
|
| 24 |
from logzero import logger
|
| 25 |
from set_loglevel import set_loglevel
|
| 26 |
-
from st_aggrid import AgGrid,
|
|
|
|
| 27 |
# from st_aggrid.grid_options_builder import GridOptionsBuilder
|
| 28 |
from streamlit import session_state as state
|
| 29 |
|
|
@@ -43,7 +45,7 @@ def home(): # noqa
|
|
| 43 |
return None
|
| 44 |
|
| 45 |
# src_fileio tgt_fileio
|
| 46 |
-
with st.form(key=
|
| 47 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
| 48 |
with _:
|
| 49 |
col1, col2 = st.columns(2)
|
|
@@ -67,7 +69,7 @@ def home(): # noqa
|
|
| 67 |
key="tgt_text",
|
| 68 |
# accept_multiple_files=True,
|
| 69 |
)
|
| 70 |
-
submitted = st.form_submit_button(
|
| 71 |
|
| 72 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
| 73 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
|
@@ -217,6 +219,7 @@ def home(): # noqa
|
|
| 217 |
logger.debug("fn.__name__: %s", fn.__name__)
|
| 218 |
|
| 219 |
from inspect import getabsfile
|
|
|
|
| 220 |
logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
| 221 |
|
| 222 |
with st.spinner(" diggin..."):
|
|
@@ -267,7 +270,9 @@ def home(): # noqa
|
|
| 267 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
| 268 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
| 269 |
|
| 270 |
-
df_a = pd.DataFrame(
|
|
|
|
|
|
|
| 271 |
|
| 272 |
# if set_loglevel() <= 10:
|
| 273 |
_ = st.expander("done aligned")
|
|
@@ -301,5 +306,5 @@ def home(): # noqa
|
|
| 301 |
# width="100%", # width parameter is deprecated
|
| 302 |
height=750,
|
| 303 |
# fit_columns_on_grid_load=True,
|
| 304 |
-
update_mode=GridUpdateMode.MODEL_CHANGED
|
| 305 |
)
|
|
|
|
| 2 |
|
| 3 |
org ezbee_page.py.
|
| 4 |
"""
|
| 5 |
+
import inspect
|
| 6 |
+
|
| 7 |
# pylint: disable=invalid-name
|
| 8 |
from functools import partial
|
|
|
|
| 9 |
from itertools import zip_longest
|
| 10 |
from time import perf_counter
|
| 11 |
|
|
|
|
| 13 |
import numpy as np
|
| 14 |
import pandas as pd
|
| 15 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# from ezbee.gen_pairs import gen_pairs # aset2pairs?
|
| 18 |
from aset2pairs import aset2pairs
|
| 19 |
+
from debee import debee # noqa
|
| 20 |
+
from dzbee import dzbee # noqa
|
| 21 |
+
from ezbee import ezbee # noqa
|
| 22 |
from fastlid import fastlid
|
| 23 |
from icecream import ic
|
| 24 |
from loguru import logger as loggu
|
| 25 |
from logzero import logger
|
| 26 |
from set_loglevel import set_loglevel
|
| 27 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
| 28 |
+
|
| 29 |
# from st_aggrid.grid_options_builder import GridOptionsBuilder
|
| 30 |
from streamlit import session_state as state
|
| 31 |
|
|
|
|
| 45 |
return None
|
| 46 |
|
| 47 |
# src_fileio tgt_fileio
|
| 48 |
+
with st.form(key="upload_in_form"):
|
| 49 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
| 50 |
with _:
|
| 51 |
col1, col2 = st.columns(2)
|
|
|
|
| 69 |
key="tgt_text",
|
| 70 |
# accept_multiple_files=True,
|
| 71 |
)
|
| 72 |
+
submitted = st.form_submit_button("Submit")
|
| 73 |
|
| 74 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
| 75 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
|
|
|
| 219 |
logger.debug("fn.__name__: %s", fn.__name__)
|
| 220 |
|
| 221 |
from inspect import getabsfile
|
| 222 |
+
|
| 223 |
logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
| 224 |
|
| 225 |
with st.spinner(" diggin..."):
|
|
|
|
| 270 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
| 271 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
| 272 |
|
| 273 |
+
df_a = pd.DataFrame(
|
| 274 |
+
aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
|
| 275 |
+
)
|
| 276 |
|
| 277 |
# if set_loglevel() <= 10:
|
| 278 |
_ = st.expander("done aligned")
|
|
|
|
| 306 |
# width="100%", # width parameter is deprecated
|
| 307 |
height=750,
|
| 308 |
# fit_columns_on_grid_load=True,
|
| 309 |
+
update_mode=GridUpdateMode.MODEL_CHANGED,
|
| 310 |
)
|
litbee/info.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
"""Present info about litbee."""
|
| 2 |
-
import ezbee
|
| 3 |
-
import dzbee
|
| 4 |
-
import debee
|
| 5 |
-
|
| 6 |
from textwrap import dedent
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import streamlit as st
|
|
|
|
| 8 |
from litbee import __version__
|
| 9 |
|
| 10 |
# from litbee.utils import style_css
|
|
@@ -23,7 +24,9 @@ msg = dedent(
|
|
| 23 |
<li> xbee/bumblebee: other language pairs, normal para-align
|
| 24 |
</ul>
|
| 25 |
|
| 26 |
-
The algorithm for fast para-align is home-brewn. Two
|
|
|
|
|
|
|
| 27 |
</div>
|
| 28 |
"""
|
| 29 |
).strip()
|
|
|
|
| 1 |
"""Present info about litbee."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from textwrap import dedent
|
| 3 |
+
|
| 4 |
+
import debee
|
| 5 |
+
import dzbee
|
| 6 |
+
import ezbee
|
| 7 |
import streamlit as st
|
| 8 |
+
|
| 9 |
from litbee import __version__
|
| 10 |
|
| 11 |
# from litbee.utils import style_css
|
|
|
|
| 24 |
<li> xbee/bumblebee: other language pairs, normal para-align
|
| 25 |
</ul>
|
| 26 |
|
| 27 |
+
The algorithm for fast para-align is home-brewn. Two
|
| 28 |
+
sent-align algorithms are used: one based on Gale-Church,
|
| 29 |
+
the other machine learning.
|
| 30 |
</div>
|
| 31 |
"""
|
| 32 |
).strip()
|
litbee/multipage.py
CHANGED
|
@@ -13,29 +13,24 @@ class Multipage:
|
|
| 13 |
"""Framework for combining multiple streamlit applications."""
|
| 14 |
|
| 15 |
def __init__(self) -> None:
|
| 16 |
-
"""
|
| 17 |
self.pages = []
|
| 18 |
|
| 19 |
def add_page(self, title, icon, func) -> None:
|
| 20 |
-
"""Class Method to Add pages to the project
|
| 21 |
|
| 22 |
Args:
|
| 23 |
title ([str]): The title of page which we are adding to the list of apps
|
| 24 |
-
|
| 25 |
func: Python function to render this page in Streamlit
|
| 26 |
"""
|
| 27 |
-
|
| 28 |
-
self.pages.append(
|
| 29 |
-
{
|
| 30 |
-
"title": title,
|
| 31 |
-
"icon": icon,
|
| 32 |
-
"function": func
|
| 33 |
-
}
|
| 34 |
-
)
|
| 35 |
|
| 36 |
def run(self):
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
| 39 |
<style>
|
| 40 |
section[data-testid="stSidebar"] > div:first-of-type {
|
| 41 |
background-color: var(--secondary-background-color);
|
|
@@ -51,12 +46,18 @@ class Multipage:
|
|
| 51 |
padding: 1rem 0;
|
| 52 |
}
|
| 53 |
</style>
|
| 54 |
-
""",
|
|
|
|
|
|
|
| 55 |
|
| 56 |
with st.sidebar:
|
| 57 |
-
selected = option_menu(
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Run the selected page
|
| 62 |
for index, item in enumerate(self.pages):
|
|
|
|
| 13 |
"""Framework for combining multiple streamlit applications."""
|
| 14 |
|
| 15 |
def __init__(self) -> None:
|
| 16 |
+
"""Construct class to generate a list which will store all our applications as an instance variable."""
|
| 17 |
self.pages = []
|
| 18 |
|
| 19 |
def add_page(self, title, icon, func) -> None:
|
| 20 |
+
"""Class Method to Add pages to the project.
|
| 21 |
|
| 22 |
Args:
|
| 23 |
title ([str]): The title of page which we are adding to the list of apps
|
| 24 |
+
icon: icon from streamlit-menu-option
|
| 25 |
func: Python function to render this page in Streamlit
|
| 26 |
"""
|
| 27 |
+
self.pages.append({"title": title, "icon": icon, "function": func})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def run(self):
|
| 30 |
+
"""Dropdown to select the page to run."""
|
| 31 |
+
# Dropdown to select the page to run
|
| 32 |
+
st.markdown(
|
| 33 |
+
"""
|
| 34 |
<style>
|
| 35 |
section[data-testid="stSidebar"] > div:first-of-type {
|
| 36 |
background-color: var(--secondary-background-color);
|
|
|
|
| 46 |
padding: 1rem 0;
|
| 47 |
}
|
| 48 |
</style>
|
| 49 |
+
""",
|
| 50 |
+
unsafe_allow_html=True,
|
| 51 |
+
)
|
| 52 |
|
| 53 |
with st.sidebar:
|
| 54 |
+
selected = option_menu(
|
| 55 |
+
None,
|
| 56 |
+
[page["title"] for page in self.pages],
|
| 57 |
+
icons=[page["icon"] for page in self.pages],
|
| 58 |
+
menu_icon="cast",
|
| 59 |
+
default_index=0,
|
| 60 |
+
)
|
| 61 |
|
| 62 |
# Run the selected page
|
| 63 |
for index, item in enumerate(self.pages):
|
litbee/options.py
CHANGED
|
@@ -7,12 +7,13 @@ from loguru import logger as loggu
|
|
| 7 |
from logzero import logger
|
| 8 |
from streamlit import session_state as state
|
| 9 |
|
|
|
|
|
|
|
| 10 |
# from litbee.ezbee_page import ezbee_page
|
| 11 |
# from litbee.dzbee_page import dzbee_page
|
| 12 |
# from litbee.xbee_page import xbee_page
|
| 13 |
from litbee.fetch_upload import fetch_upload
|
| 14 |
from litbee.fetch_urls import fetch_urls
|
| 15 |
-
from litbee.fetch_paste import fetch_paste
|
| 16 |
from litbee.files2df import files2df
|
| 17 |
from litbee.utils import instructions, sb_front_cover
|
| 18 |
|
|
@@ -49,7 +50,7 @@ def options():
|
|
| 49 |
# if beetype not in ["ezbee", "dzbee"]:
|
| 50 |
if beetype not in ["ezbee", "dzbee", "debee"]:
|
| 51 |
st.write("Coming soon")
|
| 52 |
-
return
|
| 53 |
|
| 54 |
# multi-page setup
|
| 55 |
menu = {
|
|
|
|
| 7 |
from logzero import logger
|
| 8 |
from streamlit import session_state as state
|
| 9 |
|
| 10 |
+
from litbee.fetch_paste import fetch_paste
|
| 11 |
+
|
| 12 |
# from litbee.ezbee_page import ezbee_page
|
| 13 |
# from litbee.dzbee_page import dzbee_page
|
| 14 |
# from litbee.xbee_page import xbee_page
|
| 15 |
from litbee.fetch_upload import fetch_upload
|
| 16 |
from litbee.fetch_urls import fetch_urls
|
|
|
|
| 17 |
from litbee.files2df import files2df
|
| 18 |
from litbee.utils import instructions, sb_front_cover
|
| 19 |
|
|
|
|
| 50 |
# if beetype not in ["ezbee", "dzbee"]:
|
| 51 |
if beetype not in ["ezbee", "dzbee", "debee"]:
|
| 52 |
st.write("Coming soon")
|
| 53 |
+
return
|
| 54 |
|
| 55 |
# multi-page setup
|
| 56 |
menu = {
|
litbee/settings.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
"""Prep Settings/Options page."""
|
| 2 |
# pylint: disable=invalid-name
|
| 3 |
from functools import partial
|
|
|
|
| 4 |
import streamlit as st
|
| 5 |
from loguru import logger as loggu
|
| 6 |
from logzero import logger
|
|
@@ -10,7 +11,8 @@ from streamlit import session_state as state
|
|
| 10 |
def settings():
|
| 11 |
"""Prep Settings/Options page.
|
| 12 |
|
| 13 |
-
Refer to options.py
|
|
|
|
| 14 |
# horizotal radio
|
| 15 |
st.write(
|
| 16 |
"<style>div.row-widget.stRadio > div{flex-direction:row;}</style>",
|
|
@@ -24,6 +26,7 @@ def settings():
|
|
| 24 |
# col1, col2 = st.columns(2)
|
| 25 |
|
| 26 |
# with col1:
|
|
|
|
| 27 |
try:
|
| 28 |
index = beetype_list.index(state.ns.beetype)
|
| 29 |
except Exception as e:
|
|
@@ -34,7 +37,7 @@ def settings():
|
|
| 34 |
beetype_list,
|
| 35 |
index=index,
|
| 36 |
format_func=lambda x: f"{x:<7} |",
|
| 37 |
-
help=
|
| 38 |
)
|
| 39 |
state.ns.beetype = beetype
|
| 40 |
|
|
@@ -70,11 +73,27 @@ def settings():
|
|
| 70 |
)
|
| 71 |
state.ns.sourcecount = sourcecount
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# show state.ns[:6]
|
| 74 |
loggu.debug(f" state.ns.list: {state.ns.list}")
|
| 75 |
|
| 76 |
# beetype, sourcetype, sourcecount, filename1, filename2
|
| 77 |
-
_ = map(partial(getattr, state.ns), state.ns.list[:
|
| 78 |
logger.debug(" state.ns.list[:3]: %s", str([*_]))
|
| 79 |
|
| 80 |
# st.write(f"run: {state.ns.count}")
|
|
|
|
| 1 |
"""Prep Settings/Options page."""
|
| 2 |
# pylint: disable=invalid-name
|
| 3 |
from functools import partial
|
| 4 |
+
|
| 5 |
import streamlit as st
|
| 6 |
from loguru import logger as loggu
|
| 7 |
from logzero import logger
|
|
|
|
| 11 |
def settings():
|
| 12 |
"""Prep Settings/Options page.
|
| 13 |
|
| 14 |
+
Refer to options.py
|
| 15 |
+
"""
|
| 16 |
# horizotal radio
|
| 17 |
st.write(
|
| 18 |
"<style>div.row-widget.stRadio > div{flex-direction:row;}</style>",
|
|
|
|
| 26 |
# col1, col2 = st.columns(2)
|
| 27 |
|
| 28 |
# with col1:
|
| 29 |
+
_ = "ezbee: english-chinese; dzbee: german-chinese, debee: german-english; xbee: other language pairs (slow, approx.1000 pairs/3 min) | ezbee: 英/中; dzbee: 德/中, debee: 德/英; xbee: 其他语言对(慢, 约1000对/3分钟)"
|
| 30 |
try:
|
| 31 |
index = beetype_list.index(state.ns.beetype)
|
| 32 |
except Exception as e:
|
|
|
|
| 37 |
beetype_list,
|
| 38 |
index=index,
|
| 39 |
format_func=lambda x: f"{x:<7} |",
|
| 40 |
+
help=_,
|
| 41 |
)
|
| 42 |
state.ns.beetype = beetype
|
| 43 |
|
|
|
|
| 73 |
)
|
| 74 |
state.ns.sourcecount = sourcecount
|
| 75 |
|
| 76 |
+
sentali_list = [None, "fast", "slow"]
|
| 77 |
+
try:
|
| 78 |
+
index = sentali_list.index(state.ns.sentali)
|
| 79 |
+
except Exception as e:
|
| 80 |
+
logger.error("sentali index error: %s, setting to 0", e)
|
| 81 |
+
index = 0
|
| 82 |
+
sentali = st.radio(
|
| 83 |
+
"Sent Align",
|
| 84 |
+
sentali_list,
|
| 85 |
+
index=index,
|
| 86 |
+
format_func=lambda x: f"{str(x):<4} |",
|
| 87 |
+
help="None: no sent align; fast: gale-church; slow: machine-learning",
|
| 88 |
+
disabled=True,
|
| 89 |
+
)
|
| 90 |
+
state.ns.sentali = sentali
|
| 91 |
+
|
| 92 |
# show state.ns[:6]
|
| 93 |
loggu.debug(f" state.ns.list: {state.ns.list}")
|
| 94 |
|
| 95 |
# beetype, sourcetype, sourcecount, filename1, filename2
|
| 96 |
+
_ = map(partial(getattr, state.ns), state.ns.list[:6])
|
| 97 |
logger.debug(" state.ns.list[:3]: %s", str([*_]))
|
| 98 |
|
| 99 |
# st.write(f"run: {state.ns.count}")
|
litbee/t2s.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Convert list to simlified Chinese for traditional Chinese, do nothing otherwise."""
|
| 2 |
+
# pylint: disable=invalid-name
|
| 3 |
+
from typing import List
|
| 4 |
+
import hanzidentifier
|
| 5 |
+
from logzero import logger
|
| 6 |
+
from opencc import OpenCC
|
| 7 |
+
|
| 8 |
+
convert = OpenCC('t2s').convert
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def t2s(lst: List[str]) -> List[str]:
|
| 12 |
+
"""Convert list to simlified Chinese for traditional Chinese, do nothing otherwise.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
list of strings
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
list of strings
|
| 19 |
+
"""
|
| 20 |
+
try:
|
| 21 |
+
# lst[:1000] strim down for extremely large docs
|
| 22 |
+
_ = hanzidentifier.identify(" ".join(lst[:1000]))
|
| 23 |
+
except Exception as e:
|
| 24 |
+
logger.warning("hanzidentifier.is_traditional error: %s, settin to simplified.", e)
|
| 25 |
+
_ = hanzidentifier.SIMP # 2: simplified
|
| 26 |
+
|
| 27 |
+
if _ not in [hanzidentifier.TRAD, hanzidentifier.MIXED]:
|
| 28 |
+
return lst
|
| 29 |
+
|
| 30 |
+
res = []
|
| 31 |
+
for line in lst:
|
| 32 |
+
try:
|
| 33 |
+
_ = convert(line)
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logger.warning("ts2 error: %s, setting to original", e)
|
| 36 |
+
_ = line
|
| 37 |
+
res.append(_)
|
| 38 |
+
|
| 39 |
+
return res
|
litbee/utils.py
CHANGED
|
@@ -40,7 +40,7 @@ msg = dedent(
|
|
| 40 |
|
| 41 |
|
| 42 |
def sb_front_cover():
|
| 43 |
-
"""Prep front cover for sidebar"""
|
| 44 |
st.sidebar.markdown(f"### litbee {__version__} ")
|
| 45 |
|
| 46 |
sb_tit_expander = st.sidebar.expander("More info (click to toggle)", expanded=False)
|
|
@@ -65,6 +65,7 @@ intructins = dedent(
|
|
| 65 |
|
| 66 |
|
| 67 |
def instructions():
|
|
|
|
| 68 |
logger.debug("instructions entry")
|
| 69 |
back_cover_expander = st.expander("Instructions")
|
| 70 |
with back_cover_expander:
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def sb_front_cover():
|
| 43 |
+
"""Prep front cover for sidebar."""
|
| 44 |
st.sidebar.markdown(f"### litbee {__version__} ")
|
| 45 |
|
| 46 |
sb_tit_expander = st.sidebar.expander("More info (click to toggle)", expanded=False)
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
def instructions():
|
| 68 |
+
"""Prep msg."""
|
| 69 |
logger.debug("instructions entry")
|
| 70 |
back_cover_expander = st.expander("Instructions")
|
| 71 |
with back_cover_expander:
|
poetry.lock
CHANGED
|
@@ -519,6 +519,17 @@ python-versions = "*"
|
|
| 519 |
[package.extras]
|
| 520 |
test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
|
| 521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
[[package]]
|
| 523 |
name = "icecream"
|
| 524 |
version = "2.1.2"
|
|
@@ -1038,6 +1049,14 @@ category = "main"
|
|
| 1038 |
optional = false
|
| 1039 |
python-versions = ">=3.8"
|
| 1040 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1041 |
[[package]]
|
| 1042 |
name = "packaging"
|
| 1043 |
version = "21.3"
|
|
@@ -1129,6 +1148,17 @@ python-versions = ">=3.7"
|
|
| 1129 |
docs = ["olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinx-rtd-theme (>=1.0)", "sphinxext-opengraph"]
|
| 1130 |
tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
|
| 1131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1132 |
[[package]]
|
| 1133 |
name = "pluggy"
|
| 1134 |
version = "1.0.0"
|
|
@@ -1689,6 +1719,17 @@ python-versions = ">=3.6"
|
|
| 1689 |
[package.dependencies]
|
| 1690 |
streamlit = ">=0.63"
|
| 1691 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1692 |
[[package]]
|
| 1693 |
name = "terminado"
|
| 1694 |
version = "0.15.0"
|
|
@@ -1952,6 +1993,14 @@ category = "main"
|
|
| 1952 |
optional = false
|
| 1953 |
python-versions = ">=3.4"
|
| 1954 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1955 |
[[package]]
|
| 1956 |
name = "zipp"
|
| 1957 |
version = "3.8.0"
|
|
@@ -1967,7 +2016,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
|
| 1967 |
[metadata]
|
| 1968 |
lock-version = "1.1"
|
| 1969 |
python-versions = "^3.8.3"
|
| 1970 |
-
content-hash = "
|
| 1971 |
|
| 1972 |
[metadata.files]
|
| 1973 |
about-time = [
|
|
@@ -2259,6 +2308,9 @@ gitpython = [
|
|
| 2259 |
grapheme = [
|
| 2260 |
{file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
|
| 2261 |
]
|
|
|
|
|
|
|
|
|
|
| 2262 |
icecream = [
|
| 2263 |
{file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
|
| 2264 |
{file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
|
|
@@ -2489,6 +2541,10 @@ numpy = [
|
|
| 2489 |
{file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"},
|
| 2490 |
{file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"},
|
| 2491 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2492 |
packaging = [
|
| 2493 |
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
| 2494 |
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
|
@@ -2576,6 +2632,10 @@ pillow = [
|
|
| 2576 |
{file = "Pillow-9.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baf3be0b9446a4083cc0c5bb9f9c964034be5374b5bc09757be89f5d2fa247b8"},
|
| 2577 |
{file = "Pillow-9.1.1.tar.gz", hash = "sha256:7502539939b53d7565f3d11d87c78e7ec900d3c72945d4ee0e2f250d598309a0"},
|
| 2578 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2579 |
pluggy = [
|
| 2580 |
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
| 2581 |
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
|
@@ -3107,6 +3167,10 @@ streamlit-option-menu = [
|
|
| 3107 |
{file = "streamlit-option-menu-0.3.2.tar.gz", hash = "sha256:69d1aef6f30f83f29eda3dc9667733bc2e28cd640eb17b4b6ca315f633484c52"},
|
| 3108 |
{file = "streamlit_option_menu-0.3.2-py3-none-any.whl", hash = "sha256:0b7eae3ffdb0276c81d15750465c72957d57d2f766cb027c586d053519731178"},
|
| 3109 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3110 |
terminado = [
|
| 3111 |
{file = "terminado-0.15.0-py3-none-any.whl", hash = "sha256:0d5f126fbfdb5887b25ae7d9d07b0d716b1cc0ccaacc71c1f3c14d228e065197"},
|
| 3112 |
{file = "terminado-0.15.0.tar.gz", hash = "sha256:ab4eeedccfcc1e6134bfee86106af90852c69d602884ea3a1e8ca6d4486e9bfe"},
|
|
@@ -3263,6 +3327,9 @@ xlsxwriter = [
|
|
| 3263 |
{file = "XlsxWriter-3.0.3-py3-none-any.whl", hash = "sha256:df0aefe5137478d206847eccf9f114715e42aaea077e6a48d0e8a2152e983010"},
|
| 3264 |
{file = "XlsxWriter-3.0.3.tar.gz", hash = "sha256:e89f4a1d2fa2c9ea15cde77de95cd3fd8b0345d0efb3964623f395c8c4988b7f"},
|
| 3265 |
]
|
|
|
|
|
|
|
|
|
|
| 3266 |
zipp = [
|
| 3267 |
{file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
|
| 3268 |
{file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
|
|
|
|
| 519 |
[package.extras]
|
| 520 |
test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
|
| 521 |
|
| 522 |
+
[[package]]
|
| 523 |
+
name = "hanzidentifier"
|
| 524 |
+
version = "1.0.2"
|
| 525 |
+
description = "Python module that identifies Chinese text as Simplified or Traditional."
|
| 526 |
+
category = "main"
|
| 527 |
+
optional = false
|
| 528 |
+
python-versions = "*"
|
| 529 |
+
|
| 530 |
+
[package.dependencies]
|
| 531 |
+
zhon = ">=1.1.3"
|
| 532 |
+
|
| 533 |
[[package]]
|
| 534 |
name = "icecream"
|
| 535 |
version = "2.1.2"
|
|
|
|
| 1049 |
optional = false
|
| 1050 |
python-versions = ">=3.8"
|
| 1051 |
|
| 1052 |
+
[[package]]
|
| 1053 |
+
name = "opencc-python-reimplemented"
|
| 1054 |
+
version = "0.1.6"
|
| 1055 |
+
description = "OpenCC made with Python"
|
| 1056 |
+
category = "main"
|
| 1057 |
+
optional = false
|
| 1058 |
+
python-versions = "*"
|
| 1059 |
+
|
| 1060 |
[[package]]
|
| 1061 |
name = "packaging"
|
| 1062 |
version = "21.3"
|
|
|
|
| 1148 |
docs = ["olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinx-rtd-theme (>=1.0)", "sphinxext-opengraph"]
|
| 1149 |
tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
|
| 1150 |
|
| 1151 |
+
[[package]]
|
| 1152 |
+
name = "plotly"
|
| 1153 |
+
version = "5.8.0"
|
| 1154 |
+
description = "An open-source, interactive data visualization library for Python"
|
| 1155 |
+
category = "main"
|
| 1156 |
+
optional = false
|
| 1157 |
+
python-versions = ">=3.6"
|
| 1158 |
+
|
| 1159 |
+
[package.dependencies]
|
| 1160 |
+
tenacity = ">=6.2.0"
|
| 1161 |
+
|
| 1162 |
[[package]]
|
| 1163 |
name = "pluggy"
|
| 1164 |
version = "1.0.0"
|
|
|
|
| 1719 |
[package.dependencies]
|
| 1720 |
streamlit = ">=0.63"
|
| 1721 |
|
| 1722 |
+
[[package]]
|
| 1723 |
+
name = "tenacity"
|
| 1724 |
+
version = "8.0.1"
|
| 1725 |
+
description = "Retry code until it succeeds"
|
| 1726 |
+
category = "main"
|
| 1727 |
+
optional = false
|
| 1728 |
+
python-versions = ">=3.6"
|
| 1729 |
+
|
| 1730 |
+
[package.extras]
|
| 1731 |
+
doc = ["reno", "sphinx", "tornado (>=4.5)"]
|
| 1732 |
+
|
| 1733 |
[[package]]
|
| 1734 |
name = "terminado"
|
| 1735 |
version = "0.15.0"
|
|
|
|
| 1993 |
optional = false
|
| 1994 |
python-versions = ">=3.4"
|
| 1995 |
|
| 1996 |
+
[[package]]
|
| 1997 |
+
name = "zhon"
|
| 1998 |
+
version = "1.1.5"
|
| 1999 |
+
description = "Zhon provides constants used in Chinese text processing."
|
| 2000 |
+
category = "main"
|
| 2001 |
+
optional = false
|
| 2002 |
+
python-versions = "*"
|
| 2003 |
+
|
| 2004 |
[[package]]
|
| 2005 |
name = "zipp"
|
| 2006 |
version = "3.8.0"
|
|
|
|
| 2016 |
[metadata]
|
| 2017 |
lock-version = "1.1"
|
| 2018 |
python-versions = "^3.8.3"
|
| 2019 |
+
content-hash = "eabc5b9c944b380d2a60b4ec1b1f218f4b1a3aea1426c7fb75fdc51d4889e57e"
|
| 2020 |
|
| 2021 |
[metadata.files]
|
| 2022 |
about-time = [
|
|
|
|
| 2308 |
grapheme = [
|
| 2309 |
{file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
|
| 2310 |
]
|
| 2311 |
+
hanzidentifier = [
|
| 2312 |
+
{file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
|
| 2313 |
+
]
|
| 2314 |
icecream = [
|
| 2315 |
{file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
|
| 2316 |
{file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
|
|
|
|
| 2541 |
{file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"},
|
| 2542 |
{file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"},
|
| 2543 |
]
|
| 2544 |
+
opencc-python-reimplemented = [
|
| 2545 |
+
{file = "opencc-python-reimplemented-0.1.6.tar.gz", hash = "sha256:6e4eaae2bd6b04d5c1b5bd7f3f87904ba2f1caa982203fdf2610c8261e47ee24"},
|
| 2546 |
+
{file = "opencc_python_reimplemented-0.1.6-py3.8.egg", hash = "sha256:3071d7ddcecc1b5129434e713e35f73aab9f5bd507d728c908acdbb48879194d"},
|
| 2547 |
+
]
|
| 2548 |
packaging = [
|
| 2549 |
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
| 2550 |
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
|
|
|
| 2632 |
{file = "Pillow-9.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baf3be0b9446a4083cc0c5bb9f9c964034be5374b5bc09757be89f5d2fa247b8"},
|
| 2633 |
{file = "Pillow-9.1.1.tar.gz", hash = "sha256:7502539939b53d7565f3d11d87c78e7ec900d3c72945d4ee0e2f250d598309a0"},
|
| 2634 |
]
|
| 2635 |
+
plotly = [
|
| 2636 |
+
{file = "plotly-5.8.0-py2.py3-none-any.whl", hash = "sha256:0e6e2382aafe2b2978d2c1b10ea93e73ad1ec80fa9a195ff6eea62af7905dfdc"},
|
| 2637 |
+
{file = "plotly-5.8.0.tar.gz", hash = "sha256:58cef3292f5994d82154d51fbc7338c48009fc47ea32ffe052ad29aaa15e0df9"},
|
| 2638 |
+
]
|
| 2639 |
pluggy = [
|
| 2640 |
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
| 2641 |
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
|
|
|
| 3167 |
{file = "streamlit-option-menu-0.3.2.tar.gz", hash = "sha256:69d1aef6f30f83f29eda3dc9667733bc2e28cd640eb17b4b6ca315f633484c52"},
|
| 3168 |
{file = "streamlit_option_menu-0.3.2-py3-none-any.whl", hash = "sha256:0b7eae3ffdb0276c81d15750465c72957d57d2f766cb027c586d053519731178"},
|
| 3169 |
]
|
| 3170 |
+
tenacity = [
|
| 3171 |
+
{file = "tenacity-8.0.1-py3-none-any.whl", hash = "sha256:f78f4ea81b0fabc06728c11dc2a8c01277bfc5181b321a4770471902e3eb844a"},
|
| 3172 |
+
{file = "tenacity-8.0.1.tar.gz", hash = "sha256:43242a20e3e73291a28bcbcacfd6e000b02d3857a9a9fff56b297a27afdc932f"},
|
| 3173 |
+
]
|
| 3174 |
terminado = [
|
| 3175 |
{file = "terminado-0.15.0-py3-none-any.whl", hash = "sha256:0d5f126fbfdb5887b25ae7d9d07b0d716b1cc0ccaacc71c1f3c14d228e065197"},
|
| 3176 |
{file = "terminado-0.15.0.tar.gz", hash = "sha256:ab4eeedccfcc1e6134bfee86106af90852c69d602884ea3a1e8ca6d4486e9bfe"},
|
|
|
|
| 3327 |
{file = "XlsxWriter-3.0.3-py3-none-any.whl", hash = "sha256:df0aefe5137478d206847eccf9f114715e42aaea077e6a48d0e8a2152e983010"},
|
| 3328 |
{file = "XlsxWriter-3.0.3.tar.gz", hash = "sha256:e89f4a1d2fa2c9ea15cde77de95cd3fd8b0345d0efb3964623f395c8c4988b7f"},
|
| 3329 |
]
|
| 3330 |
+
zhon = [
|
| 3331 |
+
{file = "zhon-1.1.5.tar.gz", hash = "sha256:793723575c46f10ace8846c579ce740b04c73e2aa583e04e000aedbd4a47f87f"},
|
| 3332 |
+
]
|
| 3333 |
zipp = [
|
| 3334 |
{file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
|
| 3335 |
{file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
|
pylintrc
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[EXCEPTIONS]
|
| 2 |
+
# C0301 line too long
|
| 3 |
+
# R0801 dup
|
| 4 |
+
# C0103 invalid-name
|
| 5 |
+
# W0612 unused-variable
|
| 6 |
+
# W0611 unused-import
|
| 7 |
+
disable=W0703,R0801,C0103,C0301,W0612,W0611
|
pyproject.toml
CHANGED
|
@@ -22,6 +22,9 @@ streamlit = "^1.9.2"
|
|
| 22 |
debee = "^0.1.0-alpha.2"
|
| 23 |
ezbee = "^0.1.0"
|
| 24 |
streamlit-option-menu = "^0.3.2"
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
[tool.poe.executor]
|
| 27 |
type = "poetry"
|
|
@@ -33,7 +36,9 @@ build = "poetry build"
|
|
| 33 |
_publish = "poetry publish"
|
| 34 |
release = ["test", "build", "_publish"]
|
| 35 |
lint = { cmd = "pylint litbee" }
|
| 36 |
-
|
|
|
|
|
|
|
| 37 |
docstyle = "pydocstyle --convention=google tests litbee"
|
| 38 |
tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
|
| 39 |
|
|
|
|
| 22 |
debee = "^0.1.0-alpha.2"
|
| 23 |
ezbee = "^0.1.0"
|
| 24 |
streamlit-option-menu = "^0.3.2"
|
| 25 |
+
plotly = "^5.8.0"
|
| 26 |
+
hanzidentifier = "^1.0.2"
|
| 27 |
+
opencc-python-reimplemented = "^0.1.6"
|
| 28 |
|
| 29 |
[tool.poe.executor]
|
| 30 |
type = "poetry"
|
|
|
|
| 36 |
_publish = "poetry publish"
|
| 37 |
release = ["test", "build", "_publish"]
|
| 38 |
lint = { cmd = "pylint litbee" }
|
| 39 |
+
isort = "isort tests litbee"
|
| 40 |
+
black = "black tests litbee"
|
| 41 |
+
format = ["isort", "black"]
|
| 42 |
docstyle = "pydocstyle --convention=google tests litbee"
|
| 43 |
tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
|
| 44 |
|
run-flake8.sh
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
flake8 --ignore F401,E501,F841
|
tests/test_t2s.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test t2s."""
|
| 2 |
+
from litbee.t2s import t2s
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_t2s1():
|
| 6 |
+
"""Test trivial t2s."""
|
| 7 |
+
assert t2s(["", ""]) == ["", ""]
|
| 8 |
+
assert t2s(["a\nb", ""]) == ["a\nb", ""]
|
| 9 |
+
assert t2s(["a\n\nb\n", ""]) == ["a\n\nb\n", ""]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_t2s2():
|
| 13 |
+
"""Test t2s."""
|
| 14 |
+
assert t2s(["需攜帶", "需攜帶"]) == ["需携带", "需携带"]
|
| 15 |
+
assert t2s(["需攜帶\n\n需攜帶\n", "需携带\n"]) == ["需携带\n\n需携带\n", "需携带\n"]
|