Spaces:

HamidRezaei
/

Offensive-Detection-Lora

Sleeping

App Files Files Community

Offensive-Detection-Lora / app.py

HamidRezaei

Update app.py

400c7e3 verified 5 months ago

raw

history blame contribute delete

2.88 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import os
	import torch

	from cleantext import clean
	import hazm
	import re

	def cleanhtml(raw_html):
	cleanr = re.compile('<.*?>')
	cleantext = re.sub(cleanr, '', raw_html)
	return cleantext


	def cleaning(text):
	text = text.strip()

	# regular cleaning
	text = clean(text,
	clean_all=True,
	punct=True,
	stopwords=True,
	stemming=True,
	extra_spaces=True
	)

	# cleaning htmls
	text = cleanhtml(text)

	# normalizing
	normalizer = hazm.Normalizer()
	text = normalizer.normalize(text)

	# removing wierd patterns
	wierd_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags (iOS)
	u"\U00002702-\U000027B0"
	u"\U000024C2-\U0001F251"
	u"\U0001f926-\U0001f937"
	u'\U00010000-\U0010ffff'
	u"\u200d"
	u"\u2640-\u2642"
	u"\u2600-\u2B55"
	u"\u23cf"
	u"\u23e9"
	u"\u231a"
	u"\u3030"
	u"\ufe0f"
	u"\u2069"
	u"\u2066"
	# u"\u200c"
	u"\u2068"
	u"\u2067"
	"]+", flags=re.UNICODE)

	text = wierd_pattern.sub(r'', text)

	# removing extra spaces, hashtags
	text = re.sub("#", "", text)
	text = re.sub("\s+", " ", text)

	return text

	tokenizer = AutoTokenizer.from_pretrained("HamidRezaei/Persian-Offensive-Language-Detection")
	model = AutoModelForSequenceClassification.from_pretrained("HamidRezaei/Persian-Offensive-Language-Detection")

	st.title("Offensive or Not?")
	prompt = st.text_area(label="Send a message")
	button = st.button("send")

	if prompt:
	normalized_prompt = cleaning(prompt)

	encoding = tokenizer(normalized_prompt, return_tensors="pt")
	encoding = {k: v.to(model.device) for k,v in encoding.items()}

	outputs = model(**encoding)
	logits = outputs.logits

	# apply sigmoid + threshold
	sigmoid = torch.nn.Sigmoid()
	probs = sigmoid(logits.squeeze().cpu())
	score = probs.item()
	st.markdown(f"Offensive: score {score}" if score > 0.5 else f"Not Offensive: score {score}")