File size: 2,198 Bytes
331f4df
 
 
 
 
 
 
 
 
3e3fdaa
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
8f1c4bc
 
 
83f48db
8f1c4bc
 
 
331f4df
8f1c4bc
 
 
 
 
331f4df
3e3fdaa
 
 
 
 
 
 
331f4df
3e3fdaa
331f4df
 
 
 
 
 
 
b094da2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331f4df
8f1c4bc
331f4df
b094da2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import pandas as pd
import tensorflow as tf
import nltk
import spacy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences
import requests
import pickle

# Download necessary resources
import spacy.cli
spacy.cli.download("en_core_web_sm")
nltk.download('punkt_tab')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
nlp = spacy.load('en_core_web_sm')

# Download the model file from Hugging Face
model_url = "https://huggingface.co/Zmorell/HIPA_2/resolve/main/saved_keras_model.keras"
local_model_path = "saved_keras_model.keras"

response = requests.get(model_url)
with open(local_model_path, 'wb') as f:
    f.write(response.content)

print(f"Model downloaded to {local_model_path}")

# Load the downloaded model
model = tf.keras.models.load_model(local_model_path)
print(f"Model loaded from {local_model_path}")

# Load the tokenizer
tokenizer_file_path = "tokenizer.pickle"
with open(tokenizer_file_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

print("Tokenizer loaded from tokenizer.pickle")

def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word not in stop_words]
    doc = nlp(' '.join(tokens))
    lemmas = [token.lemma_ for token in doc]
    return ' '.join(lemmas)

def predict(text):
    try:
        print(f"Input text: {text}")
        inputs = preprocess_text(text)
        print(f"Preprocessed text: {inputs}")
        
        inputs = tokenizer.texts_to_sequences([inputs])
        print(f"Tokenized text: {inputs}")
        
        inputs = pad_sequences(inputs, maxlen=1000, padding='post')
        print(f"Padded text: {inputs}")

        outputs = model.predict(inputs)
        print(f"Model outputs: {outputs}")
        
        return f"This text is a violation = {outputs[0][0]:.2f}"
    except Exception as e:
        print(f"Error during prediction: {e}")
        return f"Error during prediction: {e}"

# Set up the Gradio interface
demo = gr.Interface(fn=predict, inputs="text", outputs="text")
demo.launch()