Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,11 +2,11 @@ import gradio as gr
|
|
| 2 |
import tensorflow as tf
|
| 3 |
import numpy as np
|
| 4 |
import nltk
|
|
|
|
| 5 |
from nltk.corpus import stopwords
|
| 6 |
from nltk.tokenize import word_tokenize
|
| 7 |
from nltk.stem import WordNetLemmatizer
|
| 8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| 9 |
-
from tensorflow.keras.preprocessing.text import Tokenizer
|
| 10 |
import re
|
| 11 |
|
| 12 |
# Load the model
|
|
@@ -51,12 +51,11 @@ max_url_length = 180
|
|
| 51 |
max_html_length = 2000
|
| 52 |
max_words = 10000
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
html_tokenizer.fit_on_texts(["dummy"])
|
| 60 |
|
| 61 |
def preprocess_input(input_text, tokenizer, max_length):
|
| 62 |
sequences = tokenizer.texts_to_sequences([input_text])
|
|
@@ -80,9 +79,9 @@ def get_prediction(input_text, input_type):
|
|
| 80 |
def phishing_detection(input_text, input_type):
|
| 81 |
prediction = get_prediction(input_text, input_type)
|
| 82 |
if prediction > 0.7:
|
| 83 |
-
return f"Warning: This site is likely a phishing site!"
|
| 84 |
else:
|
| 85 |
-
return f"Safe: This site is not likely a phishing site."
|
| 86 |
|
| 87 |
iface = gr.Interface(
|
| 88 |
fn=phishing_detection,
|
|
|
|
| 2 |
import tensorflow as tf
|
| 3 |
import numpy as np
|
| 4 |
import nltk
|
| 5 |
+
import pickle
|
| 6 |
from nltk.corpus import stopwords
|
| 7 |
from nltk.tokenize import word_tokenize
|
| 8 |
from nltk.stem import WordNetLemmatizer
|
| 9 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
|
|
| 10 |
import re
|
| 11 |
|
| 12 |
# Load the model
|
|
|
|
| 51 |
max_html_length = 2000
|
| 52 |
max_words = 10000
|
| 53 |
|
| 54 |
+
# Load tokenizers
|
| 55 |
+
with open('url_tokenizer.pkl', 'rb') as f:
|
| 56 |
+
url_tokenizer = pickle.load(f)
|
| 57 |
+
with open('html_tokenizer.pkl', 'rb') as f:
|
| 58 |
+
html_tokenizer = pickle.load(f)
|
|
|
|
| 59 |
|
| 60 |
def preprocess_input(input_text, tokenizer, max_length):
|
| 61 |
sequences = tokenizer.texts_to_sequences([input_text])
|
|
|
|
| 79 |
def phishing_detection(input_text, input_type):
|
| 80 |
prediction = get_prediction(input_text, input_type)
|
| 81 |
if prediction > 0.7:
|
| 82 |
+
return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
|
| 83 |
else:
|
| 84 |
+
return f"Safe: This site is not likely a phishing site. ({prediction:.2f})"
|
| 85 |
|
| 86 |
iface = gr.Interface(
|
| 87 |
fn=phishing_detection,
|