from langdetect import detect from transformers import pipeline AiSummaryVersion = 1 summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum", max_length=512, min_length=50) en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") text_to_tags_pipe = pipeline('text2text-generation', model='PageOrg/t5-small-tagging-text', max_length=64) def summarize(id: str, text: str): if text is None or len(text) < 10: return { "ver": AiSummaryVersion } summary = get_summarization(text) if len(text) > 2000 else text translated = get_en_translation(summary) tags = get_tags(translated) tags = sorted(list(set(tags))) value = { "id": id, "ver": AiSummaryVersion, "summary": summary, "tags": tags, } return value def get_summarization(text: str): try: result = summarization_pipeline(text) return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] except: return None def get_en_translation(text: str): if text is None: return None try: if is_english(text): return text result = en_translation_pipe(text) return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] except: return None def is_english(text): try: lang = detect(text) return lang == 'en' except: return False def get_tags(text: str): if text is None: return [] try: result = text_to_tags_pipe(text) tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text'] return [tag.strip() for tag in tag_str.split(',')] except: return []