|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
|
|
model_identifier = "songhieng/khmer-mt5-summarization" |
|
tokenizer = AutoTokenizer.from_pretrained(model_identifier, use_fast=False) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_identifier, use_fast=False) |
|
|
|
|
|
st.set_page_config(page_title="Khmer Text Summarization", layout="wide") |
|
|
|
|
|
st.title("Khmer Text Summarization") |
|
st.write("Enter Khmer text below to generate a concise summary.") |
|
|
|
|
|
user_input = st.text_area("Input Text:", height=300) |
|
|
|
|
|
st.sidebar.header("Summarization Settings") |
|
max_length = st.sidebar.slider("Maximum Summary Length", min_value=50, max_value=300, value=150, step=10) |
|
min_length = st.sidebar.slider("Minimum Summary Length", min_value=10, max_value=100, value=30, step=5) |
|
num_beams = st.sidebar.slider("Number of Beams", min_value=1, max_value=10, value=4, step=1) |
|
|
|
|
|
if st.button("Summarize"): |
|
if user_input.strip(): |
|
try: |
|
|
|
inputs = tokenizer.encode(user_input, return_tensors="pt", truncation=True) |
|
|
|
|
|
summary_ids = model.generate( |
|
inputs, |
|
max_length=max_length, |
|
min_length=min_length, |
|
num_beams=num_beams, |
|
length_penalty=2.0, |
|
early_stopping=True |
|
) |
|
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
|
|
|
st.subheader("Summary:") |
|
st.write(summary) |
|
except Exception as e: |
|
st.error(f"An error occurred during summarization: {e}") |
|
else: |
|
st.warning("Please enter some text to summarize.") |
|
|