Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Merge branch 'master' into dev
Browse files- crazy_functional.py +6 -0
- crazy_functions/crazy_utils.py +11 -5
- crazy_functions/谷歌检索小助手.py +106 -0
- request_llm/bridge_chatgpt.py +4 -1
- version +2 -2
    	
        crazy_functional.py
    CHANGED
    
    | @@ -72,6 +72,7 @@ def get_crazy_functions(): | |
| 72 | 
             
                from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
         | 
| 73 | 
             
                from crazy_functions.总结word文档 import 总结word文档
         | 
| 74 | 
             
                from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
         | 
|  | |
| 75 |  | 
| 76 | 
             
                function_plugins.update({
         | 
| 77 | 
             
                    "批量翻译PDF文档(多线程)": {
         | 
| @@ -90,6 +91,11 @@ def get_crazy_functions(): | |
| 90 | 
             
                        "AsButton": False,  # 加入下拉菜单中
         | 
| 91 | 
             
                        "Function": HotReload(批量总结PDF文档pdfminer)
         | 
| 92 | 
             
                    },
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 93 | 
             
                    "批量总结Word文档": {
         | 
| 94 | 
             
                        "Color": "stop",
         | 
| 95 | 
             
                        "Function": HotReload(总结word文档)
         | 
|  | |
| 72 | 
             
                from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
         | 
| 73 | 
             
                from crazy_functions.总结word文档 import 总结word文档
         | 
| 74 | 
             
                from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
         | 
| 75 | 
            +
                from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
         | 
| 76 |  | 
| 77 | 
             
                function_plugins.update({
         | 
| 78 | 
             
                    "批量翻译PDF文档(多线程)": {
         | 
|  | |
| 91 | 
             
                        "AsButton": False,  # 加入下拉菜单中
         | 
| 92 | 
             
                        "Function": HotReload(批量总结PDF文档pdfminer)
         | 
| 93 | 
             
                    },
         | 
| 94 | 
            +
                    "谷歌学术检索助手(输入谷歌学术搜索页url)": {
         | 
| 95 | 
            +
                        "Color": "stop",
         | 
| 96 | 
            +
                        "AsButton": False,  # 加入下拉菜单中
         | 
| 97 | 
            +
                        "Function": HotReload(谷歌检索小助手)
         | 
| 98 | 
            +
                    },
         | 
| 99 | 
             
                    "批量总结Word文档": {
         | 
| 100 | 
             
                        "Color": "stop",
         | 
| 101 | 
             
                        "Function": HotReload(总结word文档)
         | 
    	
        crazy_functions/crazy_utils.py
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
             | 
| 2 |  | 
| 3 | 
             
            def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
         | 
| 4 | 
             
                import time
         | 
| @@ -43,10 +43,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp | |
| 43 | 
             
                mutable = [["", time.time()] for _ in range(n_frag)]
         | 
| 44 |  | 
| 45 | 
             
                def _req_gpt(index, inputs, history, sys_prompt):
         | 
| 46 | 
            -
                     | 
| 47 | 
            -
                         | 
| 48 | 
            -
                            index]
         | 
| 49 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 50 | 
             
                    return gpt_say
         | 
| 51 | 
             
                # 异步任务开始
         | 
| 52 | 
             
                futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
         | 
|  | |
| 1 | 
            +
            import traceback
         | 
| 2 |  | 
| 3 | 
             
            def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
         | 
| 4 | 
             
                import time
         | 
|  | |
| 43 | 
             
                mutable = [["", time.time()] for _ in range(n_frag)]
         | 
| 44 |  | 
| 45 | 
             
                def _req_gpt(index, inputs, history, sys_prompt):
         | 
| 46 | 
            +
                    try:
         | 
| 47 | 
            +
                        gpt_say = predict_no_ui_long_connection(
         | 
| 48 | 
            +
                            inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
         | 
| 49 | 
            +
                        )
         | 
| 50 | 
            +
                    except:
         | 
| 51 | 
            +
                        # 收拾残局
         | 
| 52 | 
            +
                        tb_str = '```\n' + traceback.format_exc() + '```'
         | 
| 53 | 
            +
                        gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
         | 
| 54 | 
            +
                        if len(mutable[index][0]) > 0:
         | 
| 55 | 
            +
                            gpt_say += "此线程失败前收到的回答:" + mutable[index][0]
         | 
| 56 | 
             
                    return gpt_say
         | 
| 57 | 
             
                # 异步任务开始
         | 
| 58 | 
             
                futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
         | 
    	
        crazy_functions/谷歌检索小助手.py
    ADDED
    
    | @@ -0,0 +1,106 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
         | 
| 2 | 
            +
            from toolbox import CatchException, report_execption, write_results_to_file
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            def get_meta_information(url, chatbot, history):
         | 
| 5 | 
            +
                import requests
         | 
| 6 | 
            +
                import arxiv
         | 
| 7 | 
            +
                import difflib
         | 
| 8 | 
            +
                from bs4 import BeautifulSoup
         | 
| 9 | 
            +
                from toolbox import get_conf
         | 
| 10 | 
            +
                proxies, = get_conf('proxies')
         | 
| 11 | 
            +
                headers = {
         | 
| 12 | 
            +
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
         | 
| 13 | 
            +
                }
         | 
| 14 | 
            +
                # 发送 GET 请求
         | 
| 15 | 
            +
                response = requests.get(url, proxies=proxies, headers=headers)
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # 解析网页内容
         | 
| 18 | 
            +
                soup = BeautifulSoup(response.text, "html.parser")
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def string_similar(s1, s2):
         | 
| 21 | 
            +
                    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                profile = []
         | 
| 24 | 
            +
                # 获取所有文章的标题和作者
         | 
| 25 | 
            +
                for result in soup.select(".gs_ri"):
         | 
| 26 | 
            +
                    title = result.a.text.replace('\n', ' ').replace('  ', ' ')
         | 
| 27 | 
            +
                    author = result.select_one(".gs_a").text
         | 
| 28 | 
            +
                    try:
         | 
| 29 | 
            +
                        citation = result.select_one(".gs_fl > a[href*='cites']").text  # 引用次数是链接中的文本,直接取出来
         | 
| 30 | 
            +
                    except:
         | 
| 31 | 
            +
                        citation = 'cited by 0'
         | 
| 32 | 
            +
                    abstract = result.select_one(".gs_rs").text.strip()  # 摘要在 .gs_rs 中的文本,需要清除首尾空格
         | 
| 33 | 
            +
                    search = arxiv.Search(
         | 
| 34 | 
            +
                        query = title,
         | 
| 35 | 
            +
                        max_results = 1,
         | 
| 36 | 
            +
                        sort_by = arxiv.SortCriterion.Relevance,
         | 
| 37 | 
            +
                    )
         | 
| 38 | 
            +
                    paper = next(search.results())
         | 
| 39 | 
            +
                    if string_similar(title, paper.title) > 0.90: # same paper
         | 
| 40 | 
            +
                        abstract = paper.summary.replace('\n', ' ')
         | 
| 41 | 
            +
                        is_paper_in_arxiv = True
         | 
| 42 | 
            +
                    else:   # different paper
         | 
| 43 | 
            +
                        abstract = abstract
         | 
| 44 | 
            +
                        is_paper_in_arxiv = False
         | 
| 45 | 
            +
                    paper = next(search.results())
         | 
| 46 | 
            +
                    print(title)
         | 
| 47 | 
            +
                    print(author)
         | 
| 48 | 
            +
                    print(citation)
         | 
| 49 | 
            +
                    profile.append({
         | 
| 50 | 
            +
                        'title':title,
         | 
| 51 | 
            +
                        'author':author,
         | 
| 52 | 
            +
                        'citation':citation,
         | 
| 53 | 
            +
                        'abstract':abstract,
         | 
| 54 | 
            +
                        'is_paper_in_arxiv':is_paper_in_arxiv,
         | 
| 55 | 
            +
                    })
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                    chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract]
         | 
| 58 | 
            +
                    msg = "正常"
         | 
| 59 | 
            +
                    yield chatbot, [], msg 
         | 
| 60 | 
            +
                return profile
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            @CatchException
         | 
| 63 | 
            +
            def 谷歌检索小助手(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
         | 
| 64 | 
            +
                # 基本信息:功能、贡献者
         | 
| 65 | 
            +
                chatbot.append([
         | 
| 66 | 
            +
                    "函数插件功能?",
         | 
| 67 | 
            +
                    "分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."])
         | 
| 68 | 
            +
                yield chatbot, history, '正常'
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                # 尝试导入依赖,如果缺少依赖,则给出安装建议
         | 
| 71 | 
            +
                try:
         | 
| 72 | 
            +
                    import arxiv
         | 
| 73 | 
            +
                    from bs4 import BeautifulSoup
         | 
| 74 | 
            +
                except:
         | 
| 75 | 
            +
                    report_execption(chatbot, history, 
         | 
| 76 | 
            +
                        a = f"解析项目: {txt}", 
         | 
| 77 | 
            +
                        b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
         | 
| 78 | 
            +
                    yield chatbot, history, '正常'
         | 
| 79 | 
            +
                    return
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                # 清空历史,以免输入溢出
         | 
| 82 | 
            +
                history = []
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                if len(meta_paper_info_list[:10]) > 0:
         | 
| 87 | 
            +
                    i_say = "下面是一些学术文献的数据,请从中提取出以下内容。" + \
         | 
| 88 | 
            +
                    "1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \
         | 
| 89 | 
            +
                    f"以下是信息源:{str(meta_paper_info_list[:10])}" 
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                    inputs_show_user = f"请分析此页面中出现的所有文章:{txt}"
         | 
| 92 | 
            +
                    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
         | 
| 93 | 
            +
                        inputs=i_say, inputs_show_user=inputs_show_user, 
         | 
| 94 | 
            +
                        top_p=top_p, temperature=temperature, chatbot=chatbot, history=[], 
         | 
| 95 | 
            +
                        sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown格式。你必须逐个文献进行处理。"
         | 
| 96 | 
            +
                    )
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                    history.extend([ "第一批", gpt_say ])
         | 
| 99 | 
            +
                    meta_paper_info_list = meta_paper_info_list[10:]
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                chatbot.append(["状态?", "已经全部完成"])
         | 
| 102 | 
            +
                msg = '正常'
         | 
| 103 | 
            +
                yield chatbot, history, msg
         | 
| 104 | 
            +
                res = write_results_to_file(history)
         | 
| 105 | 
            +
                chatbot.append(("完成了吗?", res)); 
         | 
| 106 | 
            +
                yield chatbot, history, msg
         | 
    	
        request_llm/bridge_chatgpt.py
    CHANGED
    
    | @@ -104,7 +104,10 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr | |
| 104 | 
             
                result = ''
         | 
| 105 | 
             
                while True:
         | 
| 106 | 
             
                    try: chunk = next(stream_response).decode()
         | 
| 107 | 
            -
                    except StopIteration:  | 
|  | |
|  | |
|  | |
| 108 | 
             
                    if len(chunk)==0: continue
         | 
| 109 | 
             
                    if not chunk.startswith('data:'): 
         | 
| 110 | 
             
                        error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
         | 
|  | |
| 104 | 
             
                result = ''
         | 
| 105 | 
             
                while True:
         | 
| 106 | 
             
                    try: chunk = next(stream_response).decode()
         | 
| 107 | 
            +
                    except StopIteration: 
         | 
| 108 | 
            +
                        break
         | 
| 109 | 
            +
                    except requests.exceptions.ConnectionError:
         | 
| 110 | 
            +
                        chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
         | 
| 111 | 
             
                    if len(chunk)==0: continue
         | 
| 112 | 
             
                    if not chunk.startswith('data:'): 
         | 
| 113 | 
             
                        error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
         | 
    	
        version
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              "version": 2. | 
| 3 | 
             
              "show_feature": true,
         | 
| 4 | 
            -
              "new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。"
         | 
| 5 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
            +
              "version": 2.43,
         | 
| 3 | 
             
              "show_feature": true,
         | 
| 4 | 
            +
              "new_feature": "(1)新增PDF全文翻译功能; (2)新增输入区切换位置的功能; (3)新增垂直布局选项; (4)多线程函数插件优化。(5) 改善多线程运行遇到网络问题时的处理"
         | 
| 5 | 
             
            }
         |