File size: 2,893 Bytes
			
			| e350168 fdffdf0 e350168 8ba144e e350168 8ba144e e350168 8ba144e e350168 fdffdf0 e350168 8ba144e fdffdf0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | import json
import jieba
import re
import requests
import backoff
import time
@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def post_url(url, headers, payload):
    time.sleep(1)
    response = requests.request("POST", url, headers=headers, data=payload)
    return response
def seg(text):
    text = text.replace('\n', " ")
    sentences = re.split(r'(?<=[。!?.!?:])\s*', text)
    sentences  = [string for string in sentences if string != '']
    return sentences
def clean_text(text):
    text = text.replace('\n', "")
    text = re.sub(r"-", " ", text)
    text = re.sub(r"\d+/\d+/\d+", "", text)  # 日期
    text = re.sub(r"[0-2]?[0-9]:[0-6][0-9]", "", text)  # 时间
    text = re.sub(
        r"/[a-zA-Z]*[:\//\]*[A-Za-z0-9\-_]+\.+[A-Za-z0-9\.\/%&=\?\-_]+/i", "", text)  # 网址
    pure_text = ''
    for letter in text:
        if letter.isalpha() or letter == ' ':
            pure_text += letter
    text = ' '.join(word for word in pure_text.split() if len(word) > 1)
    return text
def article_to_group(groups, topics):
    para = {}
    for i in groups:
        if not i[1] in para:
            para[i[1]] = i[0]
        else:
            para[i[1]] = para[i[1]] + i[0]
    return para
def generation(para, max_length):
    API_KEY = "IZt1uK9PAI0LiqleqT0cE30b"
    SECRET_KEY = "Xv5kHB8eyhNuI1B1G7fRgm2SIPdlxGxs"
    def get_access_token():
        url = "https://aip.baidubce.com/oauth/2.0/token"
        params = {"grant_type": "client_credentials",
                  "client_id": API_KEY, "client_secret": SECRET_KEY}
        return str(requests.post(url, params=params).json().get("access_token"))
    url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary?charset=UTF-8&access_token=" + get_access_token()
    topic = {}
    Ai_abstract = []
    for i, (j, k) in enumerate(para.items()):
        input_text = k
        # print(k)
        payload = json.dumps({
            "content": k,
            "max_summary_len": max_length
        })
        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        }
        response = post_url(url, headers, payload)
        text_dict = json.loads(response.text)
        # print(text_dict)
        while('summary' not in text_dict.keys()):
            response = post_url(url, headers, payload)
            text_dict = json.loads(response.text)
            print("ReTrying")
        topic[text_dict['summary']] = (j, k)
        Ai_abstract.append(text_dict['summary'])
    return topic,Ai_abstract
def formate_text(title_dict,outline_list):
    formated = []
    for each in outline_list:
        if(each not in title_dict.keys()):
            formated.append(f"# {each}")
        if(each in title_dict.keys()):
            formated.append(f"## {each}")
            formated.append(title_dict[each][1])
    return formated |