import re from langdetect import detect from transformers import pipeline from utils.tag_utils import filter_tags AiSummaryVersion = 1 summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum") en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert") tag_gen_pipe = pipeline("text2text-generation", model="fabiochiu/t5-base-tag-generation") def summarize(text: str): if text is None or len(text) < 10: return { "ver": AiSummaryVersion } summary = get_summarization(text) if len(text) > 100 else text translated = get_en_translation(summary) tags1 = get_classification(translated) tags2 = get_tags(translated) tags = filter_tags(tags1 + tags2) tags = sorted(list(set(tags))) return { "ver": AiSummaryVersion, "summary": summary, "tags": tags, } def get_summarization(text: str): try: result = summarization_pipeline(text) return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] except: return None def get_en_translation(text: str): if text is None: return None try: if is_english(text): return text result = en_translation_pipe(text) return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] except: return None def is_english(text): try: lang = detect(text) return lang == 'en' except: return False def get_tags(text: str): if text is None: return [] try: result = tag_gen_pipe(text) tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text'] tags = re.split(r'[&,]', tag_str) tags = [tag.strip() for tag in tags] tags = [tag for tag in tags if len(tag) > 2 and len(tag.split(' ')) == 1] return tags except: return [] def get_classification(text: str): if text is None: return [] try: result = classification_pipe(text) if isinstance(result, list): return [tag['label'].strip() for tag in result if tag['score'] > 0.75] else: return [result['label'].strip()] if result['score'] > 0.75 else [] except: return []