|
import gradio as gr |
|
import numpy as np |
|
import string |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
from nltk.corpus import stopwords |
|
from nltk.stem import WordNetLemmatizer |
|
import tensorflow as tf |
|
from tensorflow import keras |
|
from keras import layers |
|
from tensorflow.keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
from tensorflow.keras.models import load_model |
|
from joblib import load |
|
import pickle |
|
|
|
nltk.download('stopwords') |
|
nltk.download('omw-1.4') |
|
nltk.download('wordnet') |
|
nltk.download('punkt') |
|
|
|
try: |
|
model = load_model('shubham_english_text_model.h5') |
|
except ValueError as e: |
|
print(f"Error: {e}") |
|
with open('shubham_english_text_tokenizer.pkl', 'rb') as handle: |
|
tokenizer = pickle.load(handle) |
|
|
|
def preprocess(text, tokenizer): |
|
lemmatizer = WordNetLemmatizer() |
|
vocab = set() |
|
stop_words = set(stopwords.words('english')) |
|
tokens = word_tokenize(text) |
|
tokens = [word for word in tokens if word.lower() not in stop_words and word not in string.punctuation] |
|
tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens] |
|
vocab.update(tokens) |
|
preprocessed_text = ' '.join(tokens) |
|
X = tokenizer.texts_to_sequences(preprocessed_text) |
|
max_len = max(len(y) for y in X) |
|
X = pad_sequences(X, maxlen=max_len) |
|
return X |
|
|
|
def predict(text): |
|
X = preprocess(text, tokenizer) |
|
pred = model.predict(X) |
|
probabilities = np.mean(pred, axis=0) |
|
if probabilities.tolist()[0] > 0.72: |
|
prediction = "The string is classified as normal speech." |
|
else: |
|
prediction = "The string is classified as hate speech." |
|
return prediction, probabilities.tolist() |
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter text here..."), |
|
outputs=[gr.Textbox(label="Prediction"), gr.Textbox(label="Probabilities")], |
|
title="Hate Speech Classifier", |
|
description="A classifier to detect hate speech in a given text.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |