Tochile commited on
Commit
66830f7
·
1 Parent(s): 05254ba

Deploy Flask ML app to offensive space

Browse files
Files changed (1) hide show
  1. classifier.ipynb +0 -1609
classifier.ipynb DELETED
@@ -1,1609 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pandas as pd\n",
10
- "import numpy as np"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 2,
16
- "metadata": {},
17
- "outputs": [],
18
- "source": [
19
- "from sklearn.feature_extraction.text import CountVectorizer\n",
20
- "from sklearn.feature_extraction.text import TfidfVectorizer\n"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 6,
26
- "metadata": {},
27
- "outputs": [],
28
- "source": [
29
- "import matplotlib.pyplot as plt\n",
30
- "import seaborn as sns"
31
- ]
32
- },
33
- {
34
- "cell_type": "code",
35
- "execution_count": 20,
36
- "metadata": {},
37
- "outputs": [],
38
- "source": [
39
- "df= pd.read_csv(\"data.csv\")"
40
- ]
41
- },
42
- {
43
- "cell_type": "code",
44
- "execution_count": 21,
45
- "metadata": {},
46
- "outputs": [
47
- {
48
- "data": {
49
- "text/html": [
50
- "<div>\n",
51
- "<style scoped>\n",
52
- " .dataframe tbody tr th:only-of-type {\n",
53
- " vertical-align: middle;\n",
54
- " }\n",
55
- "\n",
56
- " .dataframe tbody tr th {\n",
57
- " vertical-align: top;\n",
58
- " }\n",
59
- "\n",
60
- " .dataframe thead th {\n",
61
- " text-align: right;\n",
62
- " }\n",
63
- "</style>\n",
64
- "<table border=\"1\" class=\"dataframe\">\n",
65
- " <thead>\n",
66
- " <tr style=\"text-align: right;\">\n",
67
- " <th></th>\n",
68
- " <th>class</th>\n",
69
- " <th>comments</th>\n",
70
- " </tr>\n",
71
- " </thead>\n",
72
- " <tbody>\n",
73
- " <tr>\n",
74
- " <th>0</th>\n",
75
- " <td>1</td>\n",
76
- " <td>you are sick upstairs</td>\n",
77
- " </tr>\n",
78
- " <tr>\n",
79
- " <th>1</th>\n",
80
- " <td>0</td>\n",
81
- " <td>i really dont understand your pointxa0 it seem...</td>\n",
82
- " </tr>\n",
83
- " <tr>\n",
84
- " <th>2</th>\n",
85
- " <td>0</td>\n",
86
- " <td>axcxa0majority of canadians can and has been w...</td>\n",
87
- " </tr>\n",
88
- " <tr>\n",
89
- " <th>3</th>\n",
90
- " <td>0</td>\n",
91
- " <td>sdl ok but i would hope theyd sign him to a on...</td>\n",
92
- " </tr>\n",
93
- " <tr>\n",
94
- " <th>4</th>\n",
95
- " <td>0</td>\n",
96
- " <td>yeah and where are you now</td>\n",
97
- " </tr>\n",
98
- " </tbody>\n",
99
- "</table>\n",
100
- "</div>"
101
- ],
102
- "text/plain": [
103
- " class comments\n",
104
- "0 1 you are sick upstairs\n",
105
- "1 0 i really dont understand your pointxa0 it seem...\n",
106
- "2 0 axcxa0majority of canadians can and has been w...\n",
107
- "3 0 sdl ok but i would hope theyd sign him to a on...\n",
108
- "4 0 yeah and where are you now"
109
- ]
110
- },
111
- "execution_count": 21,
112
- "metadata": {},
113
- "output_type": "execute_result"
114
- }
115
- ],
116
- "source": [
117
- "df.head()"
118
- ]
119
- },
120
- {
121
- "cell_type": "code",
122
- "execution_count": 22,
123
- "metadata": {},
124
- "outputs": [
125
- {
126
- "data": {
127
- "text/plain": [
128
- "8744"
129
- ]
130
- },
131
- "execution_count": 22,
132
- "metadata": {},
133
- "output_type": "execute_result"
134
- }
135
- ],
136
- "source": [
137
- "df.size"
138
- ]
139
- },
140
- {
141
- "cell_type": "code",
142
- "execution_count": 23,
143
- "metadata": {},
144
- "outputs": [
145
- {
146
- "data": {
147
- "text/plain": [
148
- "Index(['class', 'comments'], dtype='object')"
149
- ]
150
- },
151
- "execution_count": 23,
152
- "metadata": {},
153
- "output_type": "execute_result"
154
- }
155
- ],
156
- "source": [
157
- "df.columns"
158
- ]
159
- },
160
- {
161
- "cell_type": "code",
162
- "execution_count": 24,
163
- "metadata": {},
164
- "outputs": [
165
- {
166
- "data": {
167
- "text/plain": [
168
- "class int64\n",
169
- "comments object\n",
170
- "dtype: object"
171
- ]
172
- },
173
- "execution_count": 24,
174
- "metadata": {},
175
- "output_type": "execute_result"
176
- }
177
- ],
178
- "source": [
179
- "#checking for datatypes\n",
180
- "df.dtypes"
181
- ]
182
- },
183
- {
184
- "cell_type": "code",
185
- "execution_count": 25,
186
- "metadata": {},
187
- "outputs": [
188
- {
189
- "data": {
190
- "text/plain": [
191
- "class 0\n",
192
- "comments 0\n",
193
- "dtype: int64"
194
- ]
195
- },
196
- "execution_count": 25,
197
- "metadata": {},
198
- "output_type": "execute_result"
199
- }
200
- ],
201
- "source": [
202
- "#checking for missing nan\n",
203
- "df.isnull().isnull().sum()"
204
- ]
205
- },
206
- {
207
- "cell_type": "code",
208
- "execution_count": 26,
209
- "metadata": {},
210
- "outputs": [],
211
- "source": [
212
- "df_data = df[[\"class\", \"comments\"]]"
213
- ]
214
- },
215
- {
216
- "cell_type": "code",
217
- "execution_count": 27,
218
- "metadata": {},
219
- "outputs": [
220
- {
221
- "data": {
222
- "text/plain": [
223
- "Index(['class', 'comments'], dtype='object')"
224
- ]
225
- },
226
- "execution_count": 27,
227
- "metadata": {},
228
- "output_type": "execute_result"
229
- }
230
- ],
231
- "source": [
232
- "df_data.columns"
233
- ]
234
- },
235
- {
236
- "cell_type": "code",
237
- "execution_count": 28,
238
- "metadata": {},
239
- "outputs": [],
240
- "source": [
241
- "df_x = df_data[\"comments\"]\n",
242
- "df_y = df_data[\"class\"]"
243
- ]
244
- },
245
- {
246
- "cell_type": "code",
247
- "execution_count": 30,
248
- "metadata": {},
249
- "outputs": [],
250
- "source": [
251
- "#Feature Extraction from Text\n",
252
- "\n",
253
- "cv = CountVectorizer()\n",
254
- "ex = cv.fit_transform([\"Hello baby gitl\", \"My Nigga wetin dey\"])"
255
- ]
256
- },
257
- {
258
- "cell_type": "code",
259
- "execution_count": 31,
260
- "metadata": {},
261
- "outputs": [
262
- {
263
- "data": {
264
- "text/plain": [
265
- "array([[1, 0, 1, 1, 0, 0, 0],\n",
266
- " [0, 1, 0, 0, 1, 1, 1]], dtype=int64)"
267
- ]
268
- },
269
- "execution_count": 31,
270
- "metadata": {},
271
- "output_type": "execute_result"
272
- }
273
- ],
274
- "source": [
275
- "ex.toarray()"
276
- ]
277
- },
278
- {
279
- "cell_type": "code",
280
- "execution_count": 32,
281
- "metadata": {},
282
- "outputs": [
283
- {
284
- "data": {
285
- "text/plain": [
286
- "['baby', 'dey', 'gitl', 'hello', 'my', 'nigga', 'wetin']"
287
- ]
288
- },
289
- "execution_count": 32,
290
- "metadata": {},
291
- "output_type": "execute_result"
292
- }
293
- ],
294
- "source": [
295
- "cv.get_feature_names()"
296
- ]
297
- },
298
- {
299
- "cell_type": "code",
300
- "execution_count": 34,
301
- "metadata": {},
302
- "outputs": [],
303
- "source": [
304
- "corpus = df_x\n",
305
- "cv = CountVectorizer()\n",
306
- "X = cv.fit_transform(corpus)"
307
- ]
308
- },
309
- {
310
- "cell_type": "code",
311
- "execution_count": 35,
312
- "metadata": {},
313
- "outputs": [
314
- {
315
- "data": {
316
- "text/plain": [
317
- "array([[0, 0, 0, ..., 0, 0, 0],\n",
318
- " [0, 0, 0, ..., 0, 0, 0],\n",
319
- " [0, 0, 0, ..., 0, 0, 0],\n",
320
- " ...,\n",
321
- " [0, 0, 0, ..., 0, 0, 0],\n",
322
- " [0, 0, 0, ..., 0, 0, 0],\n",
323
- " [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"
324
- ]
325
- },
326
- "execution_count": 35,
327
- "metadata": {},
328
- "output_type": "execute_result"
329
- }
330
- ],
331
- "source": [
332
- "X.toarray()"
333
- ]
334
- },
335
- {
336
- "cell_type": "code",
337
- "execution_count": 36,
338
- "metadata": {
339
- "collapsed": true
340
- },
341
- "outputs": [
342
- {
343
- "data": {
344
- "text/plain": [
345
- "['00',\n",
346
- " '000',\n",
347
- " '0000',\n",
348
- " '00000',\n",
349
- " '000000',\n",
350
- " '0000harrison',\n",
351
- " '0001nobama',\n",
352
- " '000nnjfeigelsondebevoisecomnnnnnnall',\n",
353
- " '000sxcxa0',\n",
354
- " '001',\n",
355
- " '0011',\n",
356
- " '00gallonxcxa0',\n",
357
- " '00nnemmanueltouheythehillcomnnnnand',\n",
358
- " '00nnthis',\n",
359
- " '00xa0',\n",
360
- " '00xa0islam',\n",
361
- " '01',\n",
362
- " '010',\n",
363
- " '010nndid',\n",
364
- " '010nnthe',\n",
365
- " '010xcxa0maxine',\n",
366
- " '011',\n",
367
- " '011nnthe',\n",
368
- " '01it',\n",
369
- " '01nnnndear',\n",
370
- " '01nthe',\n",
371
- " '01skeeter',\n",
372
- " '0bama',\n",
373
- " '0bstructionism',\n",
374
- " '0f',\n",
375
- " '0fps',\n",
376
- " '0k',\n",
377
- " '0kg',\n",
378
- " '0ld',\n",
379
- " '0nndallas',\n",
380
- " '0nokc',\n",
381
- " '0nsee',\n",
382
- " '0px',\n",
383
- " '0pxparam',\n",
384
- " '0s',\n",
385
- " '0th',\n",
386
- " '0thcentury',\n",
387
- " '0you',\n",
388
- " '10',\n",
389
- " '100',\n",
390
- " '1000',\n",
391
- " '10000',\n",
392
- " '100000',\n",
393
- " '100k',\n",
394
- " '100nthey',\n",
395
- " '100st',\n",
396
- " '100sxa0',\n",
397
- " '101',\n",
398
- " '1011',\n",
399
- " '10110',\n",
400
- " '10k',\n",
401
- " '10lbs',\n",
402
- " '10mp',\n",
403
- " '10nceltics',\n",
404
- " '10nnwhat',\n",
405
- " '10pm',\n",
406
- " '10s',\n",
407
- " '10snnn',\n",
408
- " '10th',\n",
409
- " '10x',\n",
410
- " '10xa0',\n",
411
- " '10xa0ncountries',\n",
412
- " '11',\n",
413
- " '11010',\n",
414
- " '110111',\n",
415
- " '11despite',\n",
416
- " '11nspurs',\n",
417
- " '11th',\n",
418
- " '1500',\n",
419
- " '16',\n",
420
- " '17',\n",
421
- " '18',\n",
422
- " '1976',\n",
423
- " '19800',\n",
424
- " '1am',\n",
425
- " '1bpblogspotcomyvszmvvirwtmokfaugmiaaaaaaaaa0knllaahknniss100kill',\n",
426
- " '1cm',\n",
427
- " '1htmlxcxa0nnfor',\n",
428
- " '1kmonth',\n",
429
- " '1m',\n",
430
- " '1mgdl',\n",
431
- " '1mm',\n",
432
- " '1nbarney',\n",
433
- " '1nbobcats',\n",
434
- " '1nncorporations',\n",
435
- " '1nnndeniece',\n",
436
- " '1nnpar',\n",
437
- " '1nnso',\n",
438
- " '1paterno',\n",
439
- " '1st',\n",
440
- " '1th',\n",
441
- " '1when',\n",
442
- " '1xa0thought',\n",
443
- " '1xa0xa0',\n",
444
- " '1xcxa0frederick',\n",
445
- " '1yearold',\n",
446
- " '1yo',\n",
447
- " '1yr',\n",
448
- " '2017',\n",
449
- " '2m',\n",
450
- " '30',\n",
451
- " '30k',\n",
452
- " '30th',\n",
453
- " '4th',\n",
454
- " '504',\n",
455
- " '50k',\n",
456
- " '59',\n",
457
- " '5k',\n",
458
- " '70',\n",
459
- " '86',\n",
460
- " '900pm',\n",
461
- " '9ice',\n",
462
- " '9ja',\n",
463
- " 'a1',\n",
464
- " 'aaaaaaand',\n",
465
- " 'aaaah',\n",
466
- " 'aaahhh',\n",
467
- " 'aac',\n",
468
- " 'aamir',\n",
469
- " 'aap',\n",
470
- " 'aarongmyers',\n",
471
- " 'ab',\n",
472
- " 'abacha',\n",
473
- " 'abandon',\n",
474
- " 'abandoned',\n",
475
- " 'abandonment',\n",
476
- " 'abc',\n",
477
- " 'abe',\n",
478
- " 'abeds',\n",
479
- " 'abeg',\n",
480
- " 'abel',\n",
481
- " 'aberdeen',\n",
482
- " 'abetted',\n",
483
- " 'abey',\n",
484
- " 'abi',\n",
485
- " 'abide',\n",
486
- " 'abiding',\n",
487
- " 'abigail',\n",
488
- " 'abilities',\n",
489
- " 'ability',\n",
490
- " 'abilitya',\n",
491
- " 'abilityn',\n",
492
- " 'abit',\n",
493
- " 'able',\n",
494
- " 'abnormal',\n",
495
- " 'abolish',\n",
496
- " 'abomination',\n",
497
- " 'abominations',\n",
498
- " 'abort',\n",
499
- " 'aborted',\n",
500
- " 'abortifacients',\n",
501
- " 'abortion',\n",
502
- " 'abortionneven',\n",
503
- " 'abortions',\n",
504
- " 'abortionsnnalso',\n",
505
- " 'about',\n",
506
- " 'aboutn',\n",
507
- " 'aboutnot',\n",
508
- " 'aboutxa0u0u0u0au0u0a',\n",
509
- " 'above',\n",
510
- " 'abraham',\n",
511
- " 'abroad',\n",
512
- " 'abroadxcxa0nfacts',\n",
513
- " 'abruptly',\n",
514
- " 'abs',\n",
515
- " 'abs0lutely',\n",
516
- " 'abscam',\n",
517
- " 'absence',\n",
518
- " 'absolute',\n",
519
- " 'absolutejoke',\n",
520
- " 'absolutely',\n",
521
- " 'abstaining',\n",
522
- " 'absurd',\n",
523
- " 'absurdly',\n",
524
- " 'absurdumnatt',\n",
525
- " 'absurt',\n",
526
- " 'abundantly',\n",
527
- " 'aburrido',\n",
528
- " 'abuse',\n",
529
- " 'abused',\n",
530
- " 'abuses',\n",
531
- " 'abusing',\n",
532
- " 'abusive',\n",
533
- " 'abxcxa0',\n",
534
- " 'abxxv',\n",
535
- " 'abysmal',\n",
536
- " 'ac',\n",
537
- " 'academic',\n",
538
- " 'acc',\n",
539
- " 'accentblacks',\n",
540
- " 'accept',\n",
541
- " 'acceptable',\n",
542
- " 'accepted',\n",
543
- " 'accepting',\n",
544
- " 'access',\n",
545
- " 'accessible',\n",
546
- " 'accessories',\n",
547
- " 'accident',\n",
548
- " 'accidental',\n",
549
- " 'accidentally',\n",
550
- " 'accidents',\n",
551
- " 'accolades',\n",
552
- " 'accomplish',\n",
553
- " 'accomplished',\n",
554
- " 'accomplishednnyou',\n",
555
- " 'accomplishment',\n",
556
- " 'accomplishments',\n",
557
- " 'accordance',\n",
558
- " 'according',\n",
559
- " 'account',\n",
560
- " 'accountability',\n",
561
- " 'accountable',\n",
562
- " 'accountsnjust',\n",
563
- " 'accra',\n",
564
- " 'accumulate',\n",
565
- " 'accurate',\n",
566
- " 'accurately',\n",
567
- " 'accusation',\n",
568
- " 'accuse',\n",
569
- " 'accused',\n",
570
- " 'accuser',\n",
571
- " 'accusing',\n",
572
- " 'accustomed',\n",
573
- " 'ace',\n",
574
- " 'acha',\n",
575
- " 'acheson',\n",
576
- " 'achieve',\n",
577
- " 'achievement',\n",
578
- " 'achievements',\n",
579
- " 'achievementsbut',\n",
580
- " 'achieving',\n",
581
- " 'acid',\n",
582
- " 'aciddragonxa0xa0rourke',\n",
583
- " 'ackbar',\n",
584
- " 'acknowledge',\n",
585
- " 'acknowledged',\n",
586
- " 'acknowledging',\n",
587
- " 'acl',\n",
588
- " 'acordxe',\n",
589
- " 'acquire',\n",
590
- " 'acquired',\n",
591
- " 'acquiring',\n",
592
- " 'acquitted',\n",
593
- " 'acres',\n",
594
- " 'across',\n",
595
- " 'act',\n",
596
- " 'acted',\n",
597
- " 'acting',\n",
598
- " 'actio',\n",
599
- " 'action',\n",
600
- " 'actions',\n",
601
- " 'actitud',\n",
602
- " 'active',\n",
603
- " 'actively',\n",
604
- " 'activists',\n",
605
- " 'activistsxcxa0xcxa0',\n",
606
- " 'activities',\n",
607
- " 'activity',\n",
608
- " 'actor',\n",
609
- " 'actorif',\n",
610
- " 'actress',\n",
611
- " 'acts',\n",
612
- " 'actsxcxa0hasnt',\n",
613
- " 'actual',\n",
614
- " 'actuality',\n",
615
- " 'actually',\n",
616
- " 'actuallyxa0',\n",
617
- " 'actxa0',\n",
618
- " 'acute',\n",
619
- " 'ad',\n",
620
- " 'ada',\n",
621
- " 'adage',\n",
622
- " 'adam',\n",
623
- " 'adam1',\n",
624
- " 'adamant',\n",
625
- " 'adamawa',\n",
626
- " 'adamomarsxa0xa0dieofnvxa0the',\n",
627
- " 'adapt',\n",
628
- " 'adapted',\n",
629
- " 'adaptible',\n",
630
- " 'add',\n",
631
- " 'added',\n",
632
- " 'addict',\n",
633
- " 'addicted',\n",
634
- " 'addiction',\n",
635
- " 'addicts',\n",
636
- " 'adding',\n",
637
- " 'addition',\n",
638
- " 'additional',\n",
639
- " 'additionally',\n",
640
- " 'address',\n",
641
- " 'addresses',\n",
642
- " 'addressing',\n",
643
- " 'addressstop',\n",
644
- " 'adds',\n",
645
- " 'addy',\n",
646
- " 'adele',\n",
647
- " 'adept',\n",
648
- " 'adhere',\n",
649
- " 'adherents',\n",
650
- " 'adhocits',\n",
651
- " 'adicted',\n",
652
- " 'adjust',\n",
653
- " 'admin',\n",
654
- " 'administration',\n",
655
- " 'administrationnto',\n",
656
- " 'administrationnwould',\n",
657
- " 'administrations',\n",
658
- " 'administrative',\n",
659
- " 'administratorxa0',\n",
660
- " 'adminstration',\n",
661
- " 'admirably',\n",
662
- " 'admiral',\n",
663
- " 'admire',\n",
664
- " 'admiring',\n",
665
- " 'admit',\n",
666
- " 'admits',\n",
667
- " 'admitted',\n",
668
- " 'admittedly',\n",
669
- " 'admitting',\n",
670
- " 'adolescent',\n",
671
- " 'adolf',\n",
672
- " 'adopt',\n",
673
- " 'adopted',\n",
674
- " 'adopting',\n",
675
- " 'adoption',\n",
676
- " 'adoptions',\n",
677
- " 'adorable',\n",
678
- " 'adult',\n",
679
- " 'adulterer',\n",
680
- " 'adulterers',\n",
681
- " 'adultery',\n",
682
- " 'adults',\n",
683
- " 'adultsxcxa0',\n",
684
- " 'adultsxcxa0xcxa0most',\n",
685
- " 'adumbas',\n",
686
- " 'advancement',\n",
687
- " 'advancements',\n",
688
- " 'advantage',\n",
689
- " 'adverise',\n",
690
- " 'adversaries',\n",
691
- " 'adversary',\n",
692
- " 'advertising',\n",
693
- " 'advice',\n",
694
- " 'advicenand',\n",
695
- " 'advised',\n",
696
- " 'adviser',\n",
697
- " 'advisers',\n",
698
- " 'advisor',\n",
699
- " 'advocate',\n",
700
- " 'advocates',\n",
701
- " 'advocating',\n",
702
- " 'aes',\n",
703
- " 'afar',\n",
704
- " 'affair',\n",
705
- " 'affairs',\n",
706
- " 'affairsnnextremist',\n",
707
- " 'affect',\n",
708
- " 'affected',\n",
709
- " 'affects',\n",
710
- " 'afffects',\n",
711
- " 'affiliate',\n",
712
- " 'affiliation',\n",
713
- " 'affirming',\n",
714
- " 'affixed',\n",
715
- " 'affluent',\n",
716
- " 'affod',\n",
717
- " 'afford',\n",
718
- " 'afforded',\n",
719
- " 'affraid',\n",
720
- " 'affront',\n",
721
- " 'afgans',\n",
722
- " 'afghan',\n",
723
- " 'afghanistan',\n",
724
- " 'afiliados',\n",
725
- " 'afixing',\n",
726
- " 'afraid',\n",
727
- " 'africa',\n",
728
- " 'africaand',\n",
729
- " 'africamore',\n",
730
- " 'african',\n",
731
- " 'africans',\n",
732
- " 'africansnslavery',\n",
733
- " 'africayou',\n",
734
- " 'afrika',\n",
735
- " 'afroamerican',\n",
736
- " 'after',\n",
737
- " 'afterall',\n",
738
- " 'aftermath',\n",
739
- " 'afternoonthat',\n",
740
- " 'afterward',\n",
741
- " 'afterwardsnn',\n",
742
- " 'afterwardsnninstead',\n",
743
- " 'afucking',\n",
744
- " 'afvet11nnim',\n",
745
- " 'ag',\n",
746
- " 'again',\n",
747
- " 'again1',\n",
748
- " 'againever',\n",
749
- " 'againhe',\n",
750
- " 'againnif',\n",
751
- " 'againnn',\n",
752
- " 'againnn1',\n",
753
- " 'againnnand',\n",
754
- " 'againnnmeanwhile',\n",
755
- " 'againnnunless',\n",
756
- " 'againnnyou',\n",
757
- " 'agains',\n",
758
- " 'against',\n",
759
- " 'againstnthe',\n",
760
- " 'againtonyretardo',\n",
761
- " 'againxa0xa0',\n",
762
- " 'agbaya',\n",
763
- " 'age',\n",
764
- " 'aged',\n",
765
- " 'agencies',\n",
766
- " 'agency',\n",
767
- " 'agenda',\n",
768
- " 'agent',\n",
769
- " 'agents',\n",
770
- " 'ages',\n",
771
- " 'aggie',\n",
772
- " 'aggressive',\n",
773
- " 'aginst',\n",
774
- " 'agism',\n",
775
- " 'ago',\n",
776
- " 'agoxcxa0',\n",
777
- " 'agree',\n",
778
- " 'agreed',\n",
779
- " 'agreeing',\n",
780
- " 'agreement',\n",
781
- " 'agrees',\n",
782
- " 'agriculture',\n",
783
- " 'aguycommenting',\n",
784
- " 'agxcxa0',\n",
785
- " 'ah',\n",
786
- " 'ahahah',\n",
787
- " 'ahahahahah',\n",
788
- " 'ahead',\n",
789
- " 'aheadnnq',\n",
790
- " 'ahn',\n",
791
- " 'ahole',\n",
792
- " 'aholennnot',\n",
793
- " 'aholexa0',\n",
794
- " 'ahr1xa0xa0',\n",
795
- " 'ahs',\n",
796
- " 'aid',\n",
797
- " 'aided',\n",
798
- " 'aids',\n",
799
- " 'ail',\n",
800
- " 'aimed',\n",
801
- " 'aimless',\n",
802
- " 'aint',\n",
803
- " 'aipac',\n",
804
- " 'air',\n",
805
- " 'airbad',\n",
806
- " 'airbrush',\n",
807
- " 'airchair',\n",
808
- " 'aircraft',\n",
809
- " 'airing',\n",
810
- " 'airmix',\n",
811
- " 'airplane',\n",
812
- " 'airport',\n",
813
- " 'airwaves',\n",
814
- " 'ajumoke',\n",
815
- " 'aka',\n",
816
- " 'akiinwale',\n",
817
- " 'akinwunmiambode',\n",
818
- " 'akklaim',\n",
819
- " 'akp',\n",
820
- " 'aks',\n",
821
- " 'aktuxe1lnu011bju011xedhokritickxehomoralizujxedcxedho',\n",
822
- " 'al',\n",
823
- " 'alabama',\n",
824
- " 'alain',\n",
825
- " 'alainironu',\n",
826
- " 'alakda',\n",
827
- " 'alan',\n",
828
- " 'alarm',\n",
829
- " 'alarming',\n",
830
- " 'alasthis',\n",
831
- " 'albaxf1il',\n",
832
- " 'album',\n",
833
- " 'albumnnfor',\n",
834
- " 'albums',\n",
835
- " 'alcohol',\n",
836
- " 'alcoholic',\n",
837
- " 'alcohols',\n",
838
- " 'aldasity',\n",
839
- " 'aldickweed',\n",
840
- " 'aldri',\n",
841
- " 'ale',\n",
842
- " 'alecs',\n",
843
- " 'aleging',\n",
844
- " 'aleisterhyde',\n",
845
- " 'alejate',\n",
846
- " 'alenlemone',\n",
847
- " 'alert',\n",
848
- " 'alessandra',\n",
849
- " 'alex',\n",
850
- " 'alexander',\n",
851
- " 'alexis',\n",
852
- " 'ali',\n",
853
- " 'aliberal',\n",
854
- " 'alice',\n",
855
- " 'alien',\n",
856
- " 'alienated',\n",
857
- " 'alienates',\n",
858
- " 'alienating',\n",
859
- " 'aliens',\n",
860
- " 'aliensnnyou',\n",
861
- " 'aliensxa0to',\n",
862
- " 'alike',\n",
863
- " 'alikexa0',\n",
864
- " 'alinsky',\n",
865
- " 'alisterdognwhat',\n",
866
- " 'alive',\n",
867
- " 'alivennits',\n",
868
- " 'alivexa0',\n",
869
- " 'alkohol',\n",
870
- " 'all',\n",
871
- " 'alla',\n",
872
- " 'allah',\n",
873
- " 'allahpunditnnvia',\n",
874
- " 'allan',\n",
875
- " 'allegations',\n",
876
- " 'alleged',\n",
877
- " 'allegedly',\n",
878
- " 'allegiance',\n",
879
- " 'allen',\n",
880
- " 'allergic',\n",
881
- " 'allergies',\n",
882
- " 'allergiesxa0nni',\n",
883
- " 'alley',\n",
884
- " 'allgaybryan',\n",
885
- " 'alliance',\n",
886
- " 'alliances',\n",
887
- " 'allies',\n",
888
- " 'alligator',\n",
889
- " 'allinit1',\n",
890
- " 'allkpop',\n",
891
- " 'alll',\n",
892
- " 'allmxenna',\n",
893
- " 'allnassessments',\n",
894
- " 'allnnthis',\n",
895
- " 'allnnyour',\n",
896
- " 'allnthat',\n",
897
- " 'allnthe',\n",
898
- " 'allocated',\n",
899
- " 'allot',\n",
900
- " 'allow',\n",
901
- " 'allowed',\n",
902
- " 'allowfullscreentrue',\n",
903
- " 'allowing',\n",
904
- " 'allows',\n",
905
- " 'allowscriptaccessalways',\n",
906
- " 'allpowerful',\n",
907
- " 'allpro',\n",
908
- " 'allra',\n",
909
- " 'alls',\n",
910
- " 'allstar',\n",
911
- " 'allt',\n",
912
- " 'allu0i',\n",
913
- " 'allways',\n",
914
- " 'allxa0',\n",
915
- " 'allxa0as',\n",
916
- " 'allxa0passed',\n",
917
- " 'allxcxa0',\n",
918
- " 'allxcxa0nnthe',\n",
919
- " 'ally',\n",
920
- " 'allyxa0',\n",
921
- " 'almajiris',\n",
922
- " 'almighty',\n",
923
- " 'almost',\n",
924
- " 'alone',\n",
925
- " 'alonebitch',\n",
926
- " 'alonehell',\n",
927
- " 'alonennand',\n",
928
- " 'alonexa0',\n",
929
- " 'along',\n",
930
- " 'alongshes',\n",
931
- " 'alot',\n",
932
- " 'alotnkeeps',\n",
933
- " 'already',\n",
934
- " 'alreadynkennedy',\n",
935
- " 'alreadyxa0',\n",
936
- " 'alrightnwell',\n",
937
- " 'also',\n",
938
- " 'alsoruns',\n",
939
- " 'alsoxa0',\n",
940
- " 'alsum',\n",
941
- " 'altar',\n",
942
- " 'alter',\n",
943
- " 'alterego',\n",
944
- " 'alternate',\n",
945
- " 'alternatives',\n",
946
- " 'although',\n",
947
- " 'altogether',\n",
948
- " 'alumni',\n",
949
- " 'always',\n",
950
- " 'am',\n",
951
- " 'ama',\n",
952
- " 'amala',\n",
953
- " 'amanda',\n",
954
- " 'amateur',\n",
955
- " 'amazaballs',\n",
956
- " 'amazed',\n",
957
- " 'amazement',\n",
958
- " 'amazing',\n",
959
- " 'amazzzzing',\n",
960
- " 'ambassadornand',\n",
961
- " 'amber',\n",
962
- " 'ambition',\n",
963
- " 'ambitious',\n",
964
- " 'amd',\n",
965
- " 'amdnnnnnyou',\n",
966
- " 'amen',\n",
967
- " 'amendment',\n",
968
- " 'amendments',\n",
969
- " 'amendmentxcxa0nnroman',\n",
970
- " 'amends',\n",
971
- " 'amerians',\n",
972
- " 'america',\n",
973
- " 'americacutting',\n",
974
- " 'american',\n",
975
- " 'american0',\n",
976
- " 'americannvan',\n",
977
- " 'americans',\n",
978
- " 'americansnnif',\n",
979
- " 'americansno',\n",
980
- " 'americansxa0xa0nlol',\n",
981
- " 'americansxcxa0',\n",
982
- " 'americanyoure',\n",
983
- " 'americas',\n",
984
- " 'americaxa0',\n",
985
- " 'americaxcxa0',\n",
986
- " 'americaxex0xdnxex0xcwhat',\n",
987
- " 'amerikan',\n",
988
- " 'amerikans',\n",
989
- " 'amid',\n",
990
- " 'amir',\n",
991
- " 'ammede',\n",
992
- " 'ammunition',\n",
993
- " 'ammunitionnas',\n",
994
- " 'amnesty',\n",
995
- " 'among',\n",
996
- " 'amoral',\n",
997
- " 'amount',\n",
998
- " 'amounted',\n",
999
- " 'amounts',\n",
1000
- " 'amous',\n",
1001
- " 'amp',\n",
1002
- " 'amso',\n",
1003
- " 'amused',\n",
1004
- " 'amusement',\n",
1005
- " 'amuses',\n",
1006
- " 'amxa0',\n",
1007
- " 'amy',\n",
1008
- " 'an',\n",
1009
- " 'anachronism',\n",
1010
- " 'anal',\n",
1011
- " 'analogy',\n",
1012
- " 'analy',\n",
1013
- " 'analysis',\n",
1014
- " 'analyst',\n",
1015
- " 'anaphylactic',\n",
1016
- " 'anathema',\n",
1017
- " 'anc',\n",
1018
- " 'ancelotti',\n",
1019
- " 'ancestors',\n",
1020
- " 'ancient',\n",
1021
- " 'and',\n",
1022
- " 'andato',\n",
1023
- " 'anderson',\n",
1024
- " 'andersons',\n",
1025
- " 'andf',\n",
1026
- " 'andfuckedup',\n",
1027
- " 'andi',\n",
1028
- " 'andignorant',\n",
1029
- " 'andino',\n",
1030
- " 'andnothers',\n",
1031
- " 'andntestify',\n",
1032
- " 'andor',\n",
1033
- " 'andrea',\n",
1034
- " 'andrew',\n",
1035
- " 'android',\n",
1036
- " 'andxa0',\n",
1037
- " 'andxa0surveillance',\n",
1038
- " 'andxcxa0a',\n",
1039
- " 'andxcxa0he',\n",
1040
- " 'andxcxa0medicare',\n",
1041
- " 'andxcxa0sink',\n",
1042
- " 'andxcxa0social',\n",
1043
- " 'andxcxa0that',\n",
1044
- " 'andy',\n",
1045
- " 'andyou',\n",
1046
- " 'andys',\n",
1047
- " 'anecdotal',\n",
1048
- " 'anelka',\n",
1049
- " 'anelkas',\n",
1050
- " 'anew',\n",
1051
- " 'anf',\n",
1052
- " 'angel',\n",
1053
- " 'angeles',\n",
1054
- " 'angelic',\n",
1055
- " 'angelnffs',\n",
1056
- " 'angelos',\n",
1057
- " 'angels',\n",
1058
- " 'anger',\n",
1059
- " 'angle',\n",
1060
- " 'anglo',\n",
1061
- " 'anglos',\n",
1062
- " 'angry',\n",
1063
- " 'angrys',\n",
1064
- " 'angsty',\n",
1065
- " 'anh',\n",
1066
- " 'ani',\n",
1067
- " 'animal',\n",
1068
- " 'animals',\n",
1069
- " 'animation',\n",
1070
- " 'anime',\n",
1071
- " 'animosity',\n",
1072
- " 'animoto',\n",
1073
- " 'animus',\n",
1074
- " 'annabelle',\n",
1075
- " 'annabortion',\n",
1076
- " 'annars',\n",
1077
- " 'annat',\n",
1078
- " 'annavaram',\n",
1079
- " 'annaxa0',\n",
1080
- " 'anndan',\n",
1081
- " 'announced',\n",
1082
- " 'announcement',\n",
1083
- " 'announcements',\n",
1084
- " 'announcers',\n",
1085
- " 'annoy',\n",
1086
- " 'annoyed',\n",
1087
- " 'annoying',\n",
1088
- " 'annoys',\n",
1089
- " 'annual',\n",
1090
- " 'annul',\n",
1091
- " 'ano',\n",
1092
- " 'anointing',\n",
1093
- " 'anology',\n",
1094
- " 'anon',\n",
1095
- " 'anonymous',\n",
1096
- " 'anonymouseorg',\n",
1097
- " 'anonymously',\n",
1098
- " 'another',\n",
1099
- " 'anothers',\n",
1100
- " 'anout',\n",
1101
- " 'ans',\n",
1102
- " 'anser',\n",
1103
- " 'answer',\n",
1104
- " 'answered',\n",
1105
- " 'answering',\n",
1106
- " 'answers',\n",
1107
- " 'ant',\n",
1108
- " 'antaga',\n",
1109
- " 'antagonize',\n",
1110
- " 'anthem',\n",
1111
- " 'anthems',\n",
1112
- " 'anthony',\n",
1113
- " 'anthropophagy',\n",
1114
- " 'anti',\n",
1115
- " 'antiabortion',\n",
1116
- " 'antiamerican',\n",
1117
- " 'anticipate',\n",
1118
- " 'anticipated',\n",
1119
- " 'anticsfirst',\n",
1120
- " 'antigay',\n",
1121
- " 'antiimmigrant',\n",
1122
- " 'antilib',\n",
1123
- " 'antisemit',\n",
1124
- " 'antisemitic',\n",
1125
- " 'antitolerance',\n",
1126
- " 'antiwar',\n",
1127
- " 'antlers',\n",
1128
- " 'ants',\n",
1129
- " 'antunnaru',\n",
1130
- " 'anuofia',\n",
1131
- " 'anus',\n",
1132
- " 'anxa0nexecutivexa0privilegenclaim',\n",
1133
- " 'any',\n",
1134
- " 'anybody',\n",
1135
- " 'anycockll',\n",
1136
- " 'anyday',\n",
1137
- " 'anyhow',\n",
1138
- " 'anymore',\n",
1139
- " 'anymorenncan',\n",
1140
- " 'anymorexcxa0',\n",
1141
- " 'anynbetter',\n",
1142
- " 'anynnational',\n",
1143
- " 'anyone',\n",
1144
- " 'anyoneits',\n",
1145
- " 'anyonenhad',\n",
1146
- " 'anyones',\n",
1147
- " 'anyonr',\n",
1148
- " 'anyplacen',\n",
1149
- " 'anything',\n",
1150
- " 'anythingnboth',\n",
1151
- " 'anythingnif',\n",
1152
- " 'anythingnn',\n",
1153
- " 'anytime',\n",
1154
- " 'anytimennget',\n",
1155
- " 'anyway',\n",
1156
- " 'anywayit',\n",
1157
- " 'anyways',\n",
1158
- " 'anywhere',\n",
1159
- " 'aok',\n",
1160
- " 'apa',\n",
1161
- " 'apart',\n",
1162
- " 'apartfrom',\n",
1163
- " 'apartment',\n",
1164
- " 'ape',\n",
1165
- " 'apegary',\n",
1166
- " 'apes',\n",
1167
- " 'aphrodisiacx',\n",
1168
- " 'apnay',\n",
1169
- " 'apni',\n",
1170
- " 'apo',\n",
1171
- " 'apologetic',\n",
1172
- " 'apologies',\n",
1173
- " 'apologise',\n",
1174
- " 'apologist',\n",
1175
- " 'apologists',\n",
1176
- " 'apologize',\n",
1177
- " 'apologized',\n",
1178
- " 'apologizing',\n",
1179
- " 'apology',\n",
1180
- " 'apoplectic',\n",
1181
- " 'apostasy',\n",
1182
- " 'app',\n",
1183
- " 'appalachians',\n",
1184
- " 'appalling',\n",
1185
- " 'apparent',\n",
1186
- " 'apparently',\n",
1187
- " 'appealing',\n",
1188
- " 'appear',\n",
1189
- " 'appearance',\n",
1190
- " 'appeared',\n",
1191
- " 'appearing',\n",
1192
- " 'appears',\n",
1193
- " 'appease',\n",
1194
- " 'appeasing',\n",
1195
- " 'applauseking',\n",
1196
- " 'apple',\n",
1197
- " 'applegadget',\n",
1198
- " 'apples',\n",
1199
- " 'applicable',\n",
1200
- " 'application',\n",
1201
- " 'applied',\n",
1202
- " 'applies',\n",
1203
- " 'apply',\n",
1204
- " 'appointed',\n",
1205
- " 'appoints',\n",
1206
- " 'appreciate',\n",
1207
- " 'appreciates',\n",
1208
- " 'appreciation',\n",
1209
- " 'apprentice',\n",
1210
- " 'approaching',\n",
1211
- " 'approachingxa0',\n",
1212
- " 'appropriate',\n",
1213
- " 'approvalnobama',\n",
1214
- " 'approve',\n",
1215
- " 'approved',\n",
1216
- " 'approvel',\n",
1217
- " 'approximately',\n",
1218
- " 'apps',\n",
1219
- " 'appt',\n",
1220
- " 'april',\n",
1221
- " 'aquino',\n",
1222
- " 'aquire',\n",
1223
- " 'aquiring',\n",
1224
- " 'ar',\n",
1225
- " 'arab',\n",
1226
- " 'arabia',\n",
1227
- " 'arabs',\n",
1228
- " 'aramis',\n",
1229
- " 'arar',\n",
1230
- " 'aratsass',\n",
1231
- " 'arbetas',\n",
1232
- " 'arbitrarynnthe',\n",
1233
- " 'arborist',\n",
1234
- " 'arborists',\n",
1235
- " 'arbystyle',\n",
1236
- " 'arc',\n",
1237
- " 'arcade',\n",
1238
- " 'archangel',\n",
1239
- " 'architecture',\n",
1240
- " 'archives',\n",
1241
- " 'are',\n",
1242
- " 'area',\n",
1243
- " 'arealconservatinntill',\n",
1244
- " 'areas',\n",
1245
- " 'arefunny',\n",
1246
- " 'arehope',\n",
1247
- " 'arena',\n",
1248
- " 'arenand',\n",
1249
- " 'arennnpussyblockquotennfuck',\n",
1250
- " 'arennso',\n",
1251
- " 'arennthe',\n",
1252
- " 'arenplease',\n",
1253
- " 'arent',\n",
1254
- " 'arenxex0xt',\n",
1255
- " 'areonly',\n",
1256
- " 'aressted',\n",
1257
- " 'arestupid',\n",
1258
- " 'areufeff',\n",
1259
- " 'arewere',\n",
1260
- " 'arexa0',\n",
1261
- " 'arexa0absolutelyxa0right',\n",
1262
- " 'arexa0always',\n",
1263
- " 'arexa0consequencesxa0for',\n",
1264
- " 'arexa0not',\n",
1265
- " 'arexa0puzzling',\n",
1266
- " 'arexa0xa0there',\n",
1267
- " 'arexcxa0',\n",
1268
- " 'argentina',\n",
1269
- " 'argentine',\n",
1270
- " 'argentinean',\n",
1271
- " 'argentines',\n",
1272
- " 'argentinians',\n",
1273
- " 'argie',\n",
1274
- " 'argies',\n",
1275
- " 'argo',\n",
1276
- " 'arguaments',\n",
1277
- " 'argue',\n",
1278
- " 'argued',\n",
1279
- " 'arguement',\n",
1280
- " 'arguements',\n",
1281
- " 'arguing',\n",
1282
- " 'argument',\n",
1283
- " 'arguments',\n",
1284
- " 'aries',\n",
1285
- " 'aristocracy',\n",
1286
- " 'aritculett',\n",
1287
- " 'arizona',\n",
1288
- " 'arkannnyou',\n",
1289
- " 'arlington',\n",
1290
- " 'arm',\n",
1291
- " 'armando',\n",
1292
- " 'armies',\n",
1293
- " 'armiesnnthe',\n",
1294
- " 'armpits',\n",
1295
- " 'arms',\n",
1296
- " 'army',\n",
1297
- " 'armyin',\n",
1298
- " 'arnt',\n",
1299
- " 'arod',\n",
1300
- " 'aroud',\n",
1301
- " 'around',\n",
1302
- " 'aroundnnso',\n",
1303
- " 'arrange',\n",
1304
- " 'arrangements',\n",
1305
- " 'arrangementsnntell',\n",
1306
- " 'arrest',\n",
1307
- " 'arrested',\n",
1308
- " 'arrestedndid',\n",
1309
- " 'arrests',\n",
1310
- " 'arrive',\n",
1311
- " 'arrived',\n",
1312
- " 'arriving',\n",
1313
- " 'arrnott',\n",
1314
- " 'arrogance',\n",
1315
- " 'arrogant',\n",
1316
- " 'arrogate',\n",
1317
- " 'ars',\n",
1318
- " 'arse',\n",
1319
- " 'arseanal',\n",
1320
- " 'arsehole',\n",
1321
- " 'arsenal',\n",
1322
- " 'arsenalnxa0ni',\n",
1323
- " 'arsene',\n",
1324
- " 'art',\n",
1325
- " 'article',\n",
1326
- " 'articlennin',\n",
1327
- " 'articles',\n",
1328
- " 'articlexa0bdonald',\n",
1329
- " 'articulate',\n",
1330
- " 'articulett',\n",
1331
- " 'artificial',\n",
1332
- " 'artist',\n",
1333
- " 'artists',\n",
1334
- " 'arufeffe',\n",
1335
- " 'arum',\n",
1336
- " 'as',\n",
1337
- " 'asad',\n",
1338
- " 'asami',\n",
1339
- " 'asap',\n",
1340
- " 'asdrubal',\n",
1341
- " 'ashamed',\n",
1342
- " 'ashewo',\n",
1343
- " 'ashley',\n",
1344
- " 'ashole',\n",
1345
- " ...]"
1346
- ]
1347
- },
1348
- "execution_count": 36,
1349
- "metadata": {},
1350
- "output_type": "execute_result"
1351
- }
1352
- ],
1353
- "source": [
1354
- "cv.get_feature_names()"
1355
- ]
1356
- },
1357
- {
1358
- "cell_type": "code",
1359
- "execution_count": 38,
1360
- "metadata": {},
1361
- "outputs": [],
1362
- "source": [
1363
- "#model Building\n",
1364
- "\n",
1365
- "from sklearn.model_selection import train_test_split"
1366
- ]
1367
- },
1368
- {
1369
- "cell_type": "code",
1370
- "execution_count": 39,
1371
- "metadata": {},
1372
- "outputs": [],
1373
- "source": [
1374
- "X_train, X_test, y_train, y_test = train_test_split(X, df_y, test_size=0.3, random_state=42)"
1375
- ]
1376
- },
1377
- {
1378
- "cell_type": "code",
1379
- "execution_count": 40,
1380
- "metadata": {},
1381
- "outputs": [
1382
- {
1383
- "data": {
1384
- "text/plain": [
1385
- "<3060x18923 sparse matrix of type '<class 'numpy.int64'>'\n",
1386
- "\twith 71327 stored elements in Compressed Sparse Row format>"
1387
- ]
1388
- },
1389
- "execution_count": 40,
1390
- "metadata": {},
1391
- "output_type": "execute_result"
1392
- }
1393
- ],
1394
- "source": [
1395
- "X_train"
1396
- ]
1397
- },
1398
- {
1399
- "cell_type": "code",
1400
- "execution_count": 46,
1401
- "metadata": {},
1402
- "outputs": [
1403
- {
1404
- "data": {
1405
- "text/plain": [
1406
- "0.8147865853658537"
1407
- ]
1408
- },
1409
- "execution_count": 46,
1410
- "metadata": {},
1411
- "output_type": "execute_result"
1412
- }
1413
- ],
1414
- "source": [
1415
- "#logistic Regression 81%\n",
1416
- "\n",
1417
- "from sklearn.linear_model import LogisticRegression\n",
1418
- "clf = LogisticRegression()\n",
1419
- "clf.fit(X_train, y_train)\n",
1420
- "clf.score(X_test, y_test)"
1421
- ]
1422
- },
1423
- {
1424
- "cell_type": "code",
1425
- "execution_count": 47,
1426
- "metadata": {},
1427
- "outputs": [],
1428
- "source": [
1429
- "# #naive bayes 77%\n",
1430
- "\n",
1431
- "# from sklearn.naive_bayes import MultinomialNB\n",
1432
- "# clf = MultinomialNB()\n",
1433
- "# clf.fit(X_train, y_train)\n",
1434
- "# clf.score(X_test, y_test)"
1435
- ]
1436
- },
1437
- {
1438
- "cell_type": "code",
1439
- "execution_count": 48,
1440
- "metadata": {},
1441
- "outputs": [
1442
- {
1443
- "data": {
1444
- "text/plain": [
1445
- "array([1, 1, 0, ..., 0, 0, 0], dtype=int64)"
1446
- ]
1447
- },
1448
- "execution_count": 48,
1449
- "metadata": {},
1450
- "output_type": "execute_result"
1451
- }
1452
- ],
1453
- "source": [
1454
- "clf.predict(X_test)"
1455
- ]
1456
- },
1457
- {
1458
- "cell_type": "code",
1459
- "execution_count": 49,
1460
- "metadata": {},
1461
- "outputs": [],
1462
- "source": [
1463
- "#sample prediction\n",
1464
- "comment = ['Hello fine boy']\n",
1465
- "vect = cv.transform(comment).toarray() "
1466
- ]
1467
- },
1468
- {
1469
- "cell_type": "code",
1470
- "execution_count": 50,
1471
- "metadata": {},
1472
- "outputs": [
1473
- {
1474
- "data": {
1475
- "text/plain": [
1476
- "array([0], dtype=int64)"
1477
- ]
1478
- },
1479
- "execution_count": 50,
1480
- "metadata": {},
1481
- "output_type": "execute_result"
1482
- }
1483
- ],
1484
- "source": [
1485
- "clf.predict(vect)"
1486
- ]
1487
- },
1488
- {
1489
- "cell_type": "code",
1490
- "execution_count": 51,
1491
- "metadata": {},
1492
- "outputs": [
1493
- {
1494
- "name": "stdout",
1495
- "output_type": "stream",
1496
- "text": [
1497
- "Not Hate Speech\n"
1498
- ]
1499
- }
1500
- ],
1501
- "source": [
1502
- "if clf.predict(vect) == 1:\n",
1503
- " print(\"Hate Sppech\")\n",
1504
- "else:\n",
1505
- " print(\"Not Hate Speech\")"
1506
- ]
1507
- },
1508
- {
1509
- "cell_type": "code",
1510
- "execution_count": 53,
1511
- "metadata": {},
1512
- "outputs": [
1513
- {
1514
- "data": {
1515
- "text/plain": [
1516
- "array([1], dtype=int64)"
1517
- ]
1518
- },
1519
- "execution_count": 53,
1520
- "metadata": {},
1521
- "output_type": "execute_result"
1522
- }
1523
- ],
1524
- "source": [
1525
- "#sample prediction 2\n",
1526
- "comment2 = ['fuck you nigga']\n",
1527
- "vect2 = cv.transform(comment2).toarray()\n",
1528
- "clf.predict(vect2)"
1529
- ]
1530
- },
1531
- {
1532
- "cell_type": "code",
1533
- "execution_count": 54,
1534
- "metadata": {},
1535
- "outputs": [],
1536
- "source": [
1537
- "import pickle"
1538
- ]
1539
- },
1540
- {
1541
- "cell_type": "code",
1542
- "execution_count": 55,
1543
- "metadata": {},
1544
- "outputs": [],
1545
- "source": [
1546
- "logisticML = open(\"myFinalModel.pkl\", \"wb\")"
1547
- ]
1548
- },
1549
- {
1550
- "cell_type": "code",
1551
- "execution_count": 56,
1552
- "metadata": {},
1553
- "outputs": [],
1554
- "source": [
1555
- "pickle.dump(clf, logisticML)"
1556
- ]
1557
- },
1558
- {
1559
- "cell_type": "code",
1560
- "execution_count": 57,
1561
- "metadata": {},
1562
- "outputs": [],
1563
- "source": [
1564
- "logisticML.close()"
1565
- ]
1566
- },
1567
- {
1568
- "cell_type": "code",
1569
- "execution_count": 58,
1570
- "metadata": {},
1571
- "outputs": [],
1572
- "source": [
1573
- "\n",
1574
- "\n",
1575
- "#load the model\n",
1576
- "my_model = open(\"myFinalModel.pkl\", \"rb\")\n",
1577
- "new_model = pickle.load(my_model)"
1578
- ]
1579
- },
1580
- {
1581
- "cell_type": "code",
1582
- "execution_count": null,
1583
- "metadata": {},
1584
- "outputs": [],
1585
- "source": []
1586
- }
1587
- ],
1588
- "metadata": {
1589
- "kernelspec": {
1590
- "display_name": "Python 3",
1591
- "language": "python",
1592
- "name": "python3"
1593
- },
1594
- "language_info": {
1595
- "codemirror_mode": {
1596
- "name": "ipython",
1597
- "version": 3
1598
- },
1599
- "file_extension": ".py",
1600
- "mimetype": "text/x-python",
1601
- "name": "python",
1602
- "nbconvert_exporter": "python",
1603
- "pygments_lexer": "ipython3",
1604
- "version": "3.6.8"
1605
- }
1606
- },
1607
- "nbformat": 4,
1608
- "nbformat_minor": 4
1609
- }