Spaces:

ARI-HIPA-AI-Team
/

HIPA-AI

Sleeping

File size: 2,198 Bytes

331f4df
 
 
 
 
 
 
 
 
3e3fdaa
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
8f1c4bc
 
 
83f48db
8f1c4bc
 
 
331f4df
8f1c4bc
 
 
 
 
331f4df
3e3fdaa
 
 
 
 
 
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
b094da2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331f4df
8f1c4bc
331f4df
b094da2

import gradio as gr
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences
import requests
import pickle

# Download necessary resources
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')

# Download the model file from Hugging Face
model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
local_model_path = "saved_keras_model.keras"

response = requests.get(model_url)
with open(local_model_path, 'wb') as f:
    f.write(response.content)

print(f"Model downloaded to {local_model_path}")

# Load the downloaded model
model = tf.keras.models.load_model(local_model_path)
print(f"Model loaded from {local_model_path}")

# Load the tokenizer
tokenizer_file_path = "tokenizer.pickle"
with open(tokenizer_file_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

print("Tokenizer loaded from tokenizer.pickle")

def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word not in stop_words]
    doc = nlp(' '.join(tokens))
    lemmas = [token.lemma_ for token in doc]
    return ' '.join(lemmas)

def predict(text):
    try:
        print(f"Input text: {text}")
        inputs = preprocess_text(text)
        print(f"Preprocessed text: {inputs}")
        
        inputs = tokenizer.texts_to_sequences([inputs])
        print(f"Tokenized text: {inputs}")
        
        inputs = pad_sequences(inputs, maxlen=1000, padding='post')
        print(f"Padded text: {inputs}")

        outputs = model.predict(inputs)
        print(f"Model outputs: {outputs}")
        
        return f"This text is a violation = {outputs[0][0]:.2f}"
    except Exception as e:
        print(f"Error during prediction: {e}")
        return f"Error during prediction: {e}"

# Set up the Gradio interface
demo = gr.Interface(fn=predict, inputs="text", outputs="text")
demo.launch()