Spaces:

QINGCHE
/

TSA

Sleeping

TSA

File size: 2,893 Bytes

import json
import jieba
import re
import requests
import backoff
import time


@backoff.on_exception(backoff.expo, requests.exceptions.RequestException)
def post_url(url, headers, payload):
    time.sleep(1)
    response = requests.request("POST", url, headers=headers, data=payload)
    return response


def seg(text):
    text = text.replace('\n', " ")
    sentences = re.split(r'(?<=[。！？.!?:])\s*', text)
    sentences  = [string for string in sentences if string != '']
    return sentences


def clean_text(text):
    text = text.replace('\n', "")
    text = re.sub(r"-", " ", text)
    text = re.sub(r"\d+/\d+/\d+", "", text)  # 日期
    text = re.sub(r"[0-2]?[0-9]:[0-6][0-9]", "", text)  # 时间
    text = re.sub(
        r"/[a-zA-Z]*[:\//\]*[A-Za-z0-9\-_]+\.+[A-Za-z0-9\.\/%&=\?\-_]+/i", "", text)  # 网址
    pure_text = ''
    for letter in text:
        if letter.isalpha() or letter == ' ':
            pure_text += letter

    text = ' '.join(word for word in pure_text.split() if len(word) > 1)
    return text


def article_to_group(groups, topics):
    para = {}
    for i in groups:
        if not i[1] in para:
            para[i[1]] = i[0]
        else:
            para[i[1]] = para[i[1]] + i[0]
    return para


def generation(para, max_length):
    API_KEY = "IZt1uK9PAI0LiqleqT0cE30b"
    SECRET_KEY = "Xv5kHB8eyhNuI1B1G7fRgm2SIPdlxGxs"

    def get_access_token():

        url = "https://aip.baidubce.com/oauth/2.0/token"
        params = {"grant_type": "client_credentials",
                  "client_id": API_KEY, "client_secret": SECRET_KEY}
        return str(requests.post(url, params=params).json().get("access_token"))

    url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/news_summary?charset=UTF-8&access_token=" + get_access_token()
    topic = {}
    Ai_abstract = []
    for i, (j, k) in enumerate(para.items()):
        input_text = k
        # print(k)
        payload = json.dumps({
            "content": k,
            "max_summary_len": max_length
        })
        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        }

        response = post_url(url, headers, payload)
        text_dict = json.loads(response.text)
        # print(text_dict)
        while('summary' not in text_dict.keys()):
            response = post_url(url, headers, payload)
            text_dict = json.loads(response.text)
            print("ReTrying")

        topic[text_dict['summary']] = (j, k)
        Ai_abstract.append(text_dict['summary'])
    return topic,Ai_abstract
def formate_text(title_dict,outline_list):
    formated = []
    for each in outline_list:
        if(each not in title_dict.keys()):
            formated.append(f"# {each}")
        if(each in title_dict.keys()):
            formated.append(f"## {each}")
            formated.append(title_dict[each][1])
    return formated