Spaces:

aeresd
/

test_1

Sleeping

test_1 / app.py

Update app.py

932e610 verified 5 months ago

4.98 kB

	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	import torch
	import streamlit as st
	from PIL import Image
	import pytesseract
	import pandas as pd
	import plotly.express as px

	# ✅ Step 1: Emoji 翻译模型（你自己训练的模型）
	emoji_model_id = "JenniferHJF/qwen1.5-emoji-finetuned"
	emoji_tokenizer = AutoTokenizer.from_pretrained(emoji_model_id, trust_remote_code=True)
	emoji_model = AutoModelForCausalLM.from_pretrained(
	emoji_model_id,
	trust_remote_code=True,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
	).to("cuda" if torch.cuda.is_available() else "cpu")
	emoji_model.eval()

	# ✅ Step 2: 可选择的冒犯性文本识别模型
	model_options = {
	"Toxic-BERT": "unitary/toxic-bert",
	"Roberta Offensive": "cardiffnlp/twitter-roberta-base-offensive",
	"BERT Emotion": "bhadresh-savani/bert-base-go-emotion"
	}

	# ✅ 页面配置
	st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")

	# ✅ 侧边栏：模型选择
	with st.sidebar:
	st.header("🧠 Settings")
	selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
	selected_model_id = model_options[selected_model]
	classifier = pipeline("text-classification", model=selected_model_id,
	device=0 if torch.cuda.is_available() else -1)

	# 初始化会话历史
	if "history" not in st.session_state:
	st.session_state.history = []


	def classify_emoji_text(text: str):
	prompt = f"输入：{text}\n输出："
	input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
	with torch.no_grad():
	output_ids = emoji_model.generate(**input_ids, max_new_tokens=64, do_sample=False)
	decoded = emoji_tokenizer.decode(output_ids[0], skip_special_tokens=True)
	translated_text = decoded.split("输出：")[-1].strip() if "输出：" in decoded else decoded.strip()

	result = classifier(translated_text)[0]
	label = result["label"]
	score = result["score"]
	reasoning = (
	f"The sentence was flagged as '{label}' due to potentially offensive phrases. "
	"Consider replacing emotionally charged, ambiguous, or abusive terms."
	)

	st.session_state.history.append({
	"text": text,
	"translated": translated_text,
	"label": label,
	"score": score,
	"reason": reasoning
	})
	return translated_text, label, score, reasoning

	# 主页面布局
	st.title("🚨 Emoji Offensive Text Detector & Analysis")
	st.markdown("---")

	# 输入与分析
	st.header("✍️ Input & Moderation")
	def text_moderation_section():
	st.markdown("Enter text with emojis or upload an image with text.")
	text = st.text_area("Sentence (or OCR text will appear here):", height=120)

	uploaded_file = st.file_uploader("Or upload an image for OCR:", type=["jpg", "jpeg", "png"])
	if uploaded_file:
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image", use_column_width=True)
	with st.spinner("Extracting text via OCR..."):
	ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng").strip()
	st.text_area("Extracted Text:", value=ocr_text, height=120)
	text = ocr_text

	if st.button("🚦 Analyze Text") and text:
	with st.spinner("Processing..."):
	try:
	translated, label, score, reason = classify_emoji_text(text)
	st.subheader("🔄 Translated Text")
	st.code(translated)
	st.subheader(f"🎯 Prediction: {label}")
	st.write(f"Confidence: {score:.2%}")
	st.subheader("🧠 Explanation")
	st.info(reason)
	except Exception as e:
	st.error(f"Error during processing: {e}")

	# 分析仪表板
	st.markdown("---")
	st.header("📊 Violation Analysis")
	def analysis_dashboard():
	if not st.session_state.history:
	st.info("No data to display. Please analyze some text first.")
	return

	df = pd.DataFrame(st.session_state.history)

	# 建议列表
	st.subheader("📝 Offensive Terms & Suggestions")
	for item in st.session_state.history:
	st.markdown(f"- Input: {item['text']}")
	st.markdown(f" - Translated: {item['translated']}")
	st.markdown(f" - Label: {item['label']} ({item['score']:.2%})")
	st.markdown(f" - Suggestion: {item['reason']}")

	# 雷达图
	radar_df = pd.DataFrame({
	"Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
	"Score": [0.7, 0.4, 0.3, 0.5, 0.6]
	})
	radar_fig = px.line_polar(
	radar_df,
	r='Score',
	theta='Category',
	line_close=True,
	title="⚠️ Risk Radar by Category"
	)
	radar_fig.update_traces(line_color='black')
	st.plotly_chart(radar_fig)

	# 渲染各部分
	text_moderation_section()
	analysis_dashboard()