from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import torch import streamlit as st # ✅ Step 1: Emoji 翻译模型(你自己训练的模型) emoji_model_id = "JenniferHJF/qwen1.5-emoji-finetuned" emoji_tokenizer = AutoTokenizer.from_pretrained(emoji_model_id, trust_remote_code=True) emoji_model = AutoModelForCausalLM.from_pretrained( emoji_model_id, trust_remote_code=True, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ).to("cuda" if torch.cuda.is_available() else "cpu") emoji_model.eval() # ✅ Step 2: 可选择的冒犯性文本识别模型 model_options = { "Toxic-BERT": "unitary/toxic-bert", "Roberta Offensive": "cardiffnlp/twitter-roberta-base-offensive", "BERT Emotion": "bhadresh-savani/bert-base-go-emotion" } # Streamlit 侧边栏模型选择 selected_model = st.sidebar.selectbox("Choose classification model", list(model_options.keys())) selected_model_id = model_options[selected_model] classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1) def classify_emoji_text(text: str): """ Step 1: 翻译文本中的 emoji Step 2: 使用分类器判断是否冒犯 """ prompt = f"输入:{text}\n输出:" input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device) with torch.no_grad(): output_ids = emoji_model.generate(**input_ids, max_new_tokens=64, do_sample=False) decoded = emoji_tokenizer.decode(output_ids[0], skip_special_tokens=True) # 保留真正输出部分(移除 prompt) if "输出:" in decoded: translated_text = decoded.split("输出:")[-1].strip() else: translated_text = decoded.strip() result = classifier(translated_text)[0] label = result["label"] score = result["score"] return translated_text, label, score