add UI
Browse files- __pycache__/classification.cpython-39.pyc +0 -0
- __pycache__/run.cpython-39.pyc +0 -0
- __pycache__/textInput.cpython-39.pyc +0 -0
- __pycache__/util.cpython-39.pyc +0 -0
- app.py +35 -0
- classification.py +1 -1
- run.py +47 -56
- textInput.py +12 -0
__pycache__/classification.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/classification.cpython-39.pyc and b/__pycache__/classification.cpython-39.pyc differ
|
|
|
__pycache__/run.cpython-39.pyc
ADDED
|
Binary file (1.11 kB). View file
|
|
|
__pycache__/textInput.cpython-39.pyc
ADDED
|
Binary file (705 Bytes). View file
|
|
|
__pycache__/util.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/util.cpython-39.pyc and b/__pycache__/util.cpython-39.pyc differ
|
|
|
app.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import textInput
|
| 4 |
+
|
| 5 |
+
output = []
|
| 6 |
+
keys = []
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
with gr.Blocks() as demo:
|
| 10 |
+
#用markdown语法编辑输出一段话
|
| 11 |
+
gr.Markdown("# 文本分类系统")
|
| 12 |
+
gr.Markdown("请选择要输入的文件或填入文本")
|
| 13 |
+
topic_num = gr.Textbox()
|
| 14 |
+
max_length = gr.Textbox()
|
| 15 |
+
with gr.Tabs():
|
| 16 |
+
with gr.Tab("文本输入"):
|
| 17 |
+
text_input = gr.Textbox()
|
| 18 |
+
text_button = gr.Button("生成")
|
| 19 |
+
|
| 20 |
+
with gr.Tab("文件输入"):
|
| 21 |
+
gr.Markdown("目前支持的格式有PDF、Word、txt")
|
| 22 |
+
file_input = gr.File()
|
| 23 |
+
# 设置tab选项卡
|
| 24 |
+
with gr.Tabs():
|
| 25 |
+
with gr.Tab("分类页"):
|
| 26 |
+
text_keys_output = gr.Textbox()
|
| 27 |
+
|
| 28 |
+
with gr.Tab("摘要页"):
|
| 29 |
+
#Blocks特有组件,设置所有子组件按水平排列
|
| 30 |
+
text_ab_output = gr.Textbox()
|
| 31 |
+
# with gr.Accordion("Open for More!"):
|
| 32 |
+
# gr.Markdown("Look at me...")
|
| 33 |
+
text_button.click(textInput.text_dump_to_json, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output])
|
| 34 |
+
# image_button.click(flip_image, inputs=image_input, outputs=image_output)
|
| 35 |
+
demo.launch()
|
classification.py
CHANGED
|
@@ -8,7 +8,7 @@ import torch
|
|
| 8 |
|
| 9 |
def classify_by_topic(articles, central_topics):
|
| 10 |
|
| 11 |
-
#
|
| 12 |
def compute_similarity(articles, central_topics):
|
| 13 |
|
| 14 |
model = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
|
|
|
|
| 8 |
|
| 9 |
def classify_by_topic(articles, central_topics):
|
| 10 |
|
| 11 |
+
# 计算与每个中心主题的相似度,返回一个矩阵
|
| 12 |
def compute_similarity(articles, central_topics):
|
| 13 |
|
| 14 |
model = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
|
run.py
CHANGED
|
@@ -1,56 +1,47 @@
|
|
| 1 |
-
import util
|
| 2 |
-
import abstract
|
| 3 |
-
import classification
|
| 4 |
-
import inference
|
| 5 |
-
import outline
|
| 6 |
-
from inference import BertClassificationModel
|
| 7 |
-
# input:file/text,topic_num,max_length,output_choice
|
| 8 |
-
# output:file/text/topic_sentence
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
# file_process:
|
| 12 |
-
# in util
|
| 13 |
-
# read file code
|
| 14 |
-
# file to json_text
|
| 15 |
-
|
| 16 |
-
# convert:
|
| 17 |
-
# in util
|
| 18 |
-
# convert code
|
| 19 |
-
# json_text to text
|
| 20 |
-
|
| 21 |
-
# process:
|
| 22 |
-
# in util
|
| 23 |
-
# text process code
|
| 24 |
-
# del stop seg
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
article
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
matrix = inference.inference_matrix(title)
|
| 50 |
-
print(matrix)
|
| 51 |
-
|
| 52 |
-
text_outline,outline_list = outline.passage_outline(matrix,title)
|
| 53 |
-
print(text_outline)
|
| 54 |
-
|
| 55 |
-
output = util.formate_text(title_dict,outline_list)
|
| 56 |
-
print (output)
|
|
|
|
| 1 |
+
import util
|
| 2 |
+
import abstract
|
| 3 |
+
import classification
|
| 4 |
+
import inference
|
| 5 |
+
import outline
|
| 6 |
+
from inference import BertClassificationModel
|
| 7 |
+
# input:file/text,topic_num,max_length,output_choice
|
| 8 |
+
# output:file/text/topic_sentence
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# file_process:
|
| 12 |
+
# in util
|
| 13 |
+
# read file code
|
| 14 |
+
# file to json_text
|
| 15 |
+
|
| 16 |
+
# convert:
|
| 17 |
+
# in util
|
| 18 |
+
# convert code
|
| 19 |
+
# json_text to text
|
| 20 |
+
|
| 21 |
+
# process:
|
| 22 |
+
# in util
|
| 23 |
+
# text process code
|
| 24 |
+
# del stop seg
|
| 25 |
+
|
| 26 |
+
def texClear(article):
|
| 27 |
+
sentencesCleared = [util.clean_text(sentence) for sentence in article]
|
| 28 |
+
return sentencesCleared
|
| 29 |
+
|
| 30 |
+
def textToAb(sentences, article, topic_num, max_length):
|
| 31 |
+
central_sentences = abstract.abstruct_main(sentences, topic_num)
|
| 32 |
+
groups = classification.classify_by_topic(article, central_sentences)
|
| 33 |
+
groups = util.article_to_group(groups, central_sentences)
|
| 34 |
+
title_dict,title = util.generation(groups, max_length)
|
| 35 |
+
# ans:
|
| 36 |
+
# {Ai_abstruct:(main_sentence,paragraph)}
|
| 37 |
+
|
| 38 |
+
matrix = inference.inference_matrix(title)
|
| 39 |
+
|
| 40 |
+
_,outline_list = outline.passage_outline(matrix,title)
|
| 41 |
+
|
| 42 |
+
output = util.formate_text(title_dict,outline_list)
|
| 43 |
+
keys = []
|
| 44 |
+
for key in title.keys():
|
| 45 |
+
keys.append(key)
|
| 46 |
+
|
| 47 |
+
return keys, output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
textInput.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import run
|
| 2 |
+
|
| 3 |
+
def text_dump_to_json(text):
|
| 4 |
+
lines = [x.strip() for x in text.split("\n") if x.strip()!='']
|
| 5 |
+
data = {"text":lines}
|
| 6 |
+
sentences = run.texClear(lines)
|
| 7 |
+
keys, output = run.textToAb(sentences,lines,5,50)
|
| 8 |
+
return keys, output
|
| 9 |
+
|
| 10 |
+
def file_dump_to_json(file):
|
| 11 |
+
|
| 12 |
+
return
|