Spaces:
Sleeping
Sleeping
File size: 1,321 Bytes
331f4df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from tensorflow import keras
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')
# Available backend options are: "jax", "torch", "tensorflow".
import os
os.environ["KERAS_BACKEND"] = "jax"
import keras
model = keras.saving.load_model("hf://ARI-HIPA-AI-Team/keras_model")
def preprocess_text(text):
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Only remove non-alphanumeric characters except spaces
# Tokenize and remove stopwords
tokens = word_tokenize(text.lower())
tokens = [word for word in tokens if word not in stop_words]
# Lemmatize
doc = nlp(' '.join(tokens))
lemmas = [token.lemma_ for token in doc]
return ' '.join(lemmas)
def predict(text):
inputs = preprocess_text(text)
outputs = model(inputs)
return "This text is a violation = " + outputs
demo = gr.Interface(fn=predict, inputs="text", outputs="text")
demo.launch() |