import streamlit as st | |
from transformers import T5ForConditionalGeneration, T5Tokenizer | |
import opencc | |
#local_path = "./LLM" | |
# 使用中文摘要模型 | |
local_path = 'utrobinmv/t5_summary_en_ru_zh_base_2048' | |
model = T5ForConditionalGeneration.from_pretrained(local_path) | |
tokenizer = T5Tokenizer.from_pretrained(local_path) | |
# Streamlit UI | |
st.title("中文文章摘要工具") | |
# Create an OpenCC converter for converting simplified Chinese to traditional Chinese | |
converter = opencc.OpenCC('s2t') | |
# Input text area for the article | |
article = st.text_area("請輸入文章", "") | |
# Function to generate summary | |
def generate_summary(article): | |
inputs = tokenizer.encode("摘要:" + article, return_tensors="pt", max_length=1024, truncation=True) | |
summary_ids = model.generate(inputs, max_length=180, min_length=60, length_penalty=2.0, num_beams=4, early_stopping=True) | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
return summary | |
# Button to generate summary | |
if st.button("生成摘要"): | |
if article.strip() == "": | |
st.error("請輸入文章。") | |
else: | |
summary = generate_summary(article) | |
traditional_summary = converter.convert(summary) | |
st.subheader("摘要:") | |
st.write(traditional_summary) | |