13nishit commited on
Commit
418f62c
·
verified ·
1 Parent(s): fc590d3

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +62 -0
  2. letter_image.jpg +0 -0
  3. main.py +73 -0
  4. model.pkl +3 -0
  5. vectorizer.pkl +3 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import string
4
+ import sklearn
5
+ import nltk
6
+
7
+
8
+ #Downloading NLTK libraries
9
+ nltk.download('punkt')
10
+ nltk.download('stopwords')
11
+
12
+ from nltk.corpus import stopwords
13
+ from nltk.stem.porter import PorterStemmer
14
+
15
+ ps = PorterStemmer()
16
+ def transform_text(text):
17
+ text = text.lower()
18
+ text = nltk.word_tokenize(text)
19
+ y=[]
20
+ for i in text:
21
+ if i.isalnum():
22
+ y.append(i)
23
+
24
+ text = y[:]
25
+ y.clear()
26
+
27
+ for i in text:
28
+ if i not in stopwords.words('english') and i not in string.punctuation:
29
+ y.append(i)
30
+
31
+ text = y[:]
32
+ y.clear()
33
+
34
+ for i in text:
35
+ y.append(ps.stem(i))
36
+
37
+
38
+ return " ".join(y)
39
+
40
+ tfidf = pickle.load(open('vectorizer.pkl','rb'))
41
+ model = pickle.load(open('model.pkl','rb'))
42
+
43
+ st.title("Email/SMS Spam Classifier")
44
+
45
+ input_sms=st.text_input("Enter the message")
46
+
47
+ if st.button('Predict'):
48
+
49
+ # 1. pre process
50
+ transform_sms=transform_text(input_sms)
51
+ # 2. vectorize
52
+ vector_input=tfidf.transform([transform_sms])
53
+ # 3. predict
54
+ result = model.predict(vector_input)[0]
55
+ # 4. Display
56
+ if result == 1:
57
+ st.header("SPAM")
58
+ else:
59
+ st.header("NOT SPAM")
60
+
61
+
62
+
letter_image.jpg ADDED
main.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request
2
+ import pickle
3
+ import string
4
+ import nltk
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem.porter import PorterStemmer
7
+
8
+ app = Flask(__name__)
9
+
10
+ # Downloading NLTK libraries
11
+ nltk.download('punkt')
12
+ nltk.download('stopwords')
13
+
14
+ ps = PorterStemmer()
15
+
16
+ def transform_text(text):
17
+ text = text.lower()
18
+ text = nltk.word_tokenize(text)
19
+ y = []
20
+ for i in text:
21
+ if i.isalnum():
22
+ y.append(i)
23
+
24
+ text = y[:]
25
+ y.clear()
26
+
27
+ for i in text:
28
+ if i not in stopwords.words('english') and i not in string.punctuation:
29
+ y.append(i)
30
+
31
+ text = y[:]
32
+ y.clear()
33
+
34
+ for i in text:
35
+ y.append(ps.stem(i))
36
+
37
+ return " ".join(y)
38
+
39
+ # Load the TF-IDF vectorizer and the model
40
+ with open('vectorizer.pkl', 'rb') as f:
41
+ tfidf = pickle.load(f)
42
+
43
+ with open('model.pkl', 'rb') as f:
44
+ model = pickle.load(f)
45
+
46
+ @app.route('/')
47
+ def index():
48
+ return render_template('index.html')
49
+
50
+ @app.route('/predict', methods=['POST'])
51
+ def predict():
52
+ if request.method == 'POST':
53
+ input_sms = request.form['sms']
54
+
55
+ # Preprocess the input
56
+ transform_sms = transform_text(input_sms)
57
+
58
+ # Vectorize the input
59
+ vector_input = tfidf.transform([transform_sms])
60
+
61
+ # Predict
62
+ result = model.predict(vector_input)[0]
63
+ # Convert result to string
64
+ if result == 1:
65
+ result_text = "SPAM"
66
+ else:
67
+ result_text = "NOT SPAM"
68
+
69
+ # Return prediction result
70
+ return render_template('result.html', result=result_text)
71
+
72
+ if __name__ == '__main__':
73
+ app.run(debug=True)
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d353a616171e314953eabebc9a78df13bb413ce897405b9a2f75bf66628f6b88
3
+ size 96613
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e72be2ef2426d68ec215d4c53863d551f808d70afed6a7d168c70abd3052809
3
+ size 181743