Spaces:
Runtime error
Runtime error
Merge branch 'master' into huggingface
Browse files- crazy_functions/latex_utils.py +33 -18
- request_llm/bridge_all.py +10 -1
- request_llm/bridge_chatglm.py +3 -3
- toolbox.py +1 -1
crazy_functions/latex_utils.py
CHANGED
|
@@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0):
|
|
| 27 |
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
| 28 |
return text, mask
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
| 31 |
"""
|
| 32 |
Add a preserve text area in this paper (text become untouchable for GPT).
|
|
@@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
|
|
| 326 |
# reverse 操作必须放在最后
|
| 327 |
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
| 328 |
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
|
|
|
| 329 |
root = convert_to_linklist(text, mask)
|
| 330 |
|
| 331 |
# 修复括号
|
|
@@ -672,10 +691,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
|
|
| 672 |
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
| 673 |
return False, -1, [-1]
|
| 674 |
|
| 675 |
-
|
| 676 |
-
def compile_latex_with_timeout(command, timeout=60):
|
| 677 |
import subprocess
|
| 678 |
-
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 679 |
try:
|
| 680 |
stdout, stderr = process.communicate(timeout=timeout)
|
| 681 |
except subprocess.TimeoutExpired:
|
|
@@ -699,24 +717,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|
| 699 |
|
| 700 |
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
| 701 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
| 702 |
-
|
| 703 |
|
| 704 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
| 705 |
-
|
| 706 |
|
| 707 |
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
| 708 |
# 只有第二步成功,才能继续下面的步骤
|
| 709 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
|
| 710 |
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
|
| 711 |
-
|
| 712 |
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
|
| 713 |
-
|
| 714 |
|
| 715 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
|
| 721 |
if mode!='translate_zh':
|
| 722 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
|
@@ -724,13 +742,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|
| 724 |
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
| 725 |
|
| 726 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
|
| 732 |
-
# <--------------------->
|
| 733 |
-
os.chdir(current_dir)
|
| 734 |
|
| 735 |
# <---------- 检查结果 ----------->
|
| 736 |
results_ = ""
|
|
@@ -766,7 +782,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|
| 766 |
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
|
| 767 |
if not can_retry: break
|
| 768 |
|
| 769 |
-
os.chdir(current_dir)
|
| 770 |
return False # 失败啦
|
| 771 |
|
| 772 |
|
|
|
|
| 27 |
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
| 28 |
return text, mask
|
| 29 |
|
| 30 |
+
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
|
| 31 |
+
"""
|
| 32 |
+
Move area out of preserve area (make text editable for GPT)
|
| 33 |
+
count the number of the braces so as to catch compelete text area.
|
| 34 |
+
e.g.
|
| 35 |
+
\begin{abstract} blablablablablabla. \end{abstract}
|
| 36 |
+
"""
|
| 37 |
+
if isinstance(pattern, list): pattern = '|'.join(pattern)
|
| 38 |
+
pattern_compile = re.compile(pattern, flags)
|
| 39 |
+
for res in pattern_compile.finditer(text):
|
| 40 |
+
if not forbid_wrapper:
|
| 41 |
+
mask[res.span()[0]:res.span()[1]] = TRANSFORM
|
| 42 |
+
else:
|
| 43 |
+
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
|
| 44 |
+
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
|
| 45 |
+
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
|
| 46 |
+
return text, mask
|
| 47 |
+
|
| 48 |
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
| 49 |
"""
|
| 50 |
Add a preserve text area in this paper (text become untouchable for GPT).
|
|
|
|
| 344 |
# reverse 操作必须放在最后
|
| 345 |
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
| 346 |
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
| 347 |
+
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
|
| 348 |
root = convert_to_linklist(text, mask)
|
| 349 |
|
| 350 |
# 修复括号
|
|
|
|
| 691 |
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
| 692 |
return False, -1, [-1]
|
| 693 |
|
| 694 |
+
def compile_latex_with_timeout(command, cwd, timeout=60):
|
|
|
|
| 695 |
import subprocess
|
| 696 |
+
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
|
| 697 |
try:
|
| 698 |
stdout, stderr = process.communicate(timeout=timeout)
|
| 699 |
except subprocess.TimeoutExpired:
|
|
|
|
| 717 |
|
| 718 |
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
| 719 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
| 720 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
| 721 |
|
| 722 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
| 723 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
| 724 |
|
| 725 |
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
| 726 |
# 只有第二步成功,才能继续下面的步骤
|
| 727 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
|
| 728 |
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
|
| 729 |
+
ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
|
| 730 |
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
|
| 731 |
+
ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
|
| 732 |
|
| 733 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
| 734 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
| 735 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
| 736 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
| 737 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
| 738 |
|
| 739 |
if mode!='translate_zh':
|
| 740 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
|
|
|
| 742 |
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
| 743 |
|
| 744 |
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
| 745 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
| 746 |
+
ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
|
| 747 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
| 748 |
+
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
| 749 |
|
|
|
|
|
|
|
| 750 |
|
| 751 |
# <---------- 检查结果 ----------->
|
| 752 |
results_ = ""
|
|
|
|
| 782 |
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
|
| 783 |
if not can_retry: break
|
| 784 |
|
|
|
|
| 785 |
return False # 失败啦
|
| 786 |
|
| 787 |
|
request_llm/bridge_all.py
CHANGED
|
@@ -152,7 +152,7 @@ model_info = {
|
|
| 152 |
"token_cnt": get_token_num_gpt4,
|
| 153 |
},
|
| 154 |
|
| 155 |
-
# chatglm
|
| 156 |
"chatglm": {
|
| 157 |
"fn_with_ui": chatglm_ui,
|
| 158 |
"fn_without_ui": chatglm_noui,
|
|
@@ -161,6 +161,15 @@ model_info = {
|
|
| 161 |
"tokenizer": tokenizer_gpt35,
|
| 162 |
"token_cnt": get_token_num_gpt35,
|
| 163 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
# newbing
|
| 165 |
"newbing": {
|
| 166 |
"fn_with_ui": newbing_ui,
|
|
|
|
| 152 |
"token_cnt": get_token_num_gpt4,
|
| 153 |
},
|
| 154 |
|
| 155 |
+
# 将 chatglm 直接对齐到 chatglm2
|
| 156 |
"chatglm": {
|
| 157 |
"fn_with_ui": chatglm_ui,
|
| 158 |
"fn_without_ui": chatglm_noui,
|
|
|
|
| 161 |
"tokenizer": tokenizer_gpt35,
|
| 162 |
"token_cnt": get_token_num_gpt35,
|
| 163 |
},
|
| 164 |
+
"chatglm2": {
|
| 165 |
+
"fn_with_ui": chatglm_ui,
|
| 166 |
+
"fn_without_ui": chatglm_noui,
|
| 167 |
+
"endpoint": None,
|
| 168 |
+
"max_token": 1024,
|
| 169 |
+
"tokenizer": tokenizer_gpt35,
|
| 170 |
+
"token_cnt": get_token_num_gpt35,
|
| 171 |
+
},
|
| 172 |
+
|
| 173 |
# newbing
|
| 174 |
"newbing": {
|
| 175 |
"fn_with_ui": newbing_ui,
|
request_llm/bridge_chatglm.py
CHANGED
|
@@ -40,12 +40,12 @@ class GetGLMHandle(Process):
|
|
| 40 |
while True:
|
| 41 |
try:
|
| 42 |
if self.chatglm_model is None:
|
| 43 |
-
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/
|
| 44 |
device, = get_conf('LOCAL_MODEL_DEVICE')
|
| 45 |
if device=='cpu':
|
| 46 |
-
self.chatglm_model = AutoModel.from_pretrained("THUDM/
|
| 47 |
else:
|
| 48 |
-
self.chatglm_model = AutoModel.from_pretrained("THUDM/
|
| 49 |
self.chatglm_model = self.chatglm_model.eval()
|
| 50 |
break
|
| 51 |
else:
|
|
|
|
| 40 |
while True:
|
| 41 |
try:
|
| 42 |
if self.chatglm_model is None:
|
| 43 |
+
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
| 44 |
device, = get_conf('LOCAL_MODEL_DEVICE')
|
| 45 |
if device=='cpu':
|
| 46 |
+
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
|
| 47 |
else:
|
| 48 |
+
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
|
| 49 |
self.chatglm_model = self.chatglm_model.eval()
|
| 50 |
break
|
| 51 |
else:
|
toolbox.py
CHANGED
|
@@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
|
|
| 498 |
else:
|
| 499 |
report_files = find_recent_files('gpt_log')
|
| 500 |
if len(report_files) == 0:
|
| 501 |
-
return None, chatbot
|
| 502 |
# files.extend(report_files)
|
| 503 |
file_links = ''
|
| 504 |
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|
|
|
|
| 498 |
else:
|
| 499 |
report_files = find_recent_files('gpt_log')
|
| 500 |
if len(report_files) == 0:
|
| 501 |
+
return cookies, None, chatbot
|
| 502 |
# files.extend(report_files)
|
| 503 |
file_links = ''
|
| 504 |
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|