import gradio as gr import pandas as pd import tensorflow as tf import nltk import spacy import re from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from tensorflow.keras.preprocessing.sequence import pad_sequences import requests import pickle # Download necessary resources import spacy.cli spacy.cli.download("en_core_web_sm") nltk.download('punkt_tab') nltk.download('stopwords') stop_words = set(stopwords.words('english')) nlp = spacy.load('en_core_web_sm') # Download the model file from Hugging Face model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras" local_model_path = "saved_keras_model.keras" response = requests.get(model_url) with open(local_model_path, 'wb') as f: f.write(response.content) print(f"Model downloaded to {local_model_path}") # Load the downloaded model model = tf.keras.models.load_model(local_model_path) print(f"Model loaded from {local_model_path}") # Load the tokenizer tokenizer_file_path = "tokenizer.pickle" with open(tokenizer_file_path, 'rb') as handle: tokenizer = pickle.load(handle) print("Tokenizer loaded from tokenizer.pickle") def preprocess_text(text): text = re.sub(r'[^a-zA-Z0-9\s]', '', text) tokens = word_tokenize(text.lower()) tokens = [word for word in tokens if word not in stop_words] doc = nlp(' '.join(tokens)) lemmas = [token.lemma_ for token in doc] return ' '.join(lemmas) def predict(text): try: print(f"Input text: {text}") inputs = preprocess_text(text) print(f"Preprocessed text: {inputs}") inputs = tokenizer.texts_to_sequences([inputs]) print(f"Tokenized text: {inputs}") inputs = pad_sequences(inputs, maxlen=1000, padding='post') print(f"Padded text: {inputs}") outputs = model.predict(inputs) print(f"Model outputs: {outputs}") return f"This text is a violation = {outputs[0][0]:.2f}" except Exception as e: print(f"Error during prediction: {e}") return f"Error during prediction: {e}" # Set up the Gradio interface demo = gr.Interface(fn=predict, inputs="text", outputs="text") demo.launch()