Upload 8 files
Browse files- README.md +9 -13
- app.py +30 -63
- requirements.txt +5 -1
- swahili.csv +117 -0
- swahili_dataset.csv +117 -0
- tokenizer.pickle +3 -0
- toxic.h5 +3 -0
- train.ipynb +240 -0
README.md
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
license: unknown
|
11 |
-
---
|
12 |
-
|
13 |
-
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
|
|
1 |
+
# Hate Speech Classification System
|
2 |
+
|
3 |
+
The model parses text data and returns boolean responses in relation to data provided.
|
4 |
+
|
5 |
+
sdk: gradio
|
6 |
+
sdk_version: 3.36.1
|
7 |
+
app_file: app.py
|
8 |
+
|
9 |
+
Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,63 +1,30 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
messages,
|
32 |
-
max_tokens=max_tokens,
|
33 |
-
stream=True,
|
34 |
-
temperature=temperature,
|
35 |
-
top_p=top_p,
|
36 |
-
):
|
37 |
-
token = message.choices[0].delta.content
|
38 |
-
|
39 |
-
response += token
|
40 |
-
yield response
|
41 |
-
|
42 |
-
"""
|
43 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
44 |
-
"""
|
45 |
-
demo = gr.ChatInterface(
|
46 |
-
respond,
|
47 |
-
additional_inputs=[
|
48 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
49 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
50 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
51 |
-
gr.Slider(
|
52 |
-
minimum=0.1,
|
53 |
-
maximum=1.0,
|
54 |
-
value=0.95,
|
55 |
-
step=0.05,
|
56 |
-
label="Top-p (nucleus sampling)",
|
57 |
-
),
|
58 |
-
],
|
59 |
-
)
|
60 |
-
|
61 |
-
|
62 |
-
if __name__ == "__main__":
|
63 |
-
demo.launch()
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import gradio as gr
|
3 |
+
from gradio.components import input
|
4 |
+
import pandas as pd
|
5 |
+
import pickle
|
6 |
+
from keras.utils import pad_sequences
|
7 |
+
|
8 |
+
max_len = 200
|
9 |
+
|
10 |
+
# set the vocabulary mapping on a new TextVectorization layer
|
11 |
+
with open('tokenizer.pickle', 'rb') as handle:
|
12 |
+
tokenizer = pickle.load(handle)
|
13 |
+
model = tf.keras.models.load_model('toxic.h5')
|
14 |
+
|
15 |
+
|
16 |
+
arr=["toxic","severe_toxic","obscene threat","insult","identity_hate"]
|
17 |
+
def score_comment(comment):
|
18 |
+
sequences = tokenizer.texts_to_sequences([comment])
|
19 |
+
inp = pad_sequences(sequences,maxlen=max_len)
|
20 |
+
results = model.predict(inp)
|
21 |
+
text = ''
|
22 |
+
for i in range(len(arr)):
|
23 |
+
text += '{}: {}\n'.format(arr[i], results[0][i]>0.5)
|
24 |
+
return text
|
25 |
+
|
26 |
+
interface = gr.Interface(fn=score_comment,
|
27 |
+
inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),
|
28 |
+
outputs='text')
|
29 |
+
|
30 |
+
interface.launch(debug=True,share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1 +1,5 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.36.1
|
2 |
+
keras==2.13.1
|
3 |
+
pandas==2.0.3
|
4 |
+
Pillow==10.0.0
|
5 |
+
tensorflow==2.13.0
|
swahili.csv
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
comment,toxic,severe_toxic,obscene threat,insult,identity_hate
|
2 |
+
Huyu msee ni malaya,1,0,0,1,0
|
3 |
+
Zakayo ni mbwa,0,0,0,1,0
|
4 |
+
Chira ni shoga,1,1,0,1,0
|
5 |
+
Akinirushia mamdomo mi ntamuua,1,1,1,0,0
|
6 |
+
Haga sana,0,0,0,1,0
|
7 |
+
Kuma ya mamako,1,1,0,1,0
|
8 |
+
Wewe ni mjinga sana!,1,0,0,1,0
|
9 |
+
Nataka nikuchape vibaya!,0,1,0,0,0
|
10 |
+
Huo ni upuuzi mtupu!,1,0,0,0,0
|
11 |
+
Wewe ni takataka!,0,0,0,0,1
|
12 |
+
Nitakuua!,0,0,1,0,0
|
13 |
+
Unakera sana!,1,0,0,1,0
|
14 |
+
Unazungumza kama mpumbavu!,1,0,0,0,0
|
15 |
+
Wewe ni mchafu!,0,0,0,1,0
|
16 |
+
Nitaharibu maisha yako!,0,0,1,0,0
|
17 |
+
Hiyo ni ujinga wa hali ya juu!,1,0,0,0,0
|
18 |
+
Wewe ni nani kuzungumza hivyo?,0,0,0,1,0
|
19 |
+
Mimi nitakumaliza!,0,1,1,0,0
|
20 |
+
Una sura mbaya sana!,0,0,0,1,0
|
21 |
+
Usinieleze mambo yako ya kijinga!,1,0,0,0,0
|
22 |
+
Wewe ni kichekesho!,0,0,0,1,0
|
23 |
+
Nitakupiga hadi utoe damu!,0,1,0,0,0
|
24 |
+
Wewe ni taka taka!,1,0,0,1,0
|
25 |
+
Ondoka hapa!,1,0,0,0,0
|
26 |
+
Nitakuvunja miguu!,0,1,1,0,0
|
27 |
+
Wewe ni mpuuzi tu!,1,0,0,1,0
|
28 |
+
Nitakuchoma moto!,0,1,1,0,0
|
29 |
+
Unafanya mambo ya kipumbavu!,1,0,0,0,0
|
30 |
+
Nakuona ni kama mavi ya mbwa!,0,0,0,1,0
|
31 |
+
Usiniletee upuuzi wako!,1,0,0,0,0
|
32 |
+
Utakufa maskini!,0,0,0,1,0
|
33 |
+
Wewe ni bure kabisa!,1,0,0,0,0
|
34 |
+
Nitakufanya ujutie kuzaliwa kwako!,0,1,1,0,0
|
35 |
+
Wewe ni kama paka wa mtaani!,0,0,0,1,0
|
36 |
+
Nitaharibu uso wako!,0,1,1,0,0
|
37 |
+
Unafikiri wewe ni nani?,1,0,0,0,0
|
38 |
+
Hakuna mtu anayekupenda!,1,0,0,0,0
|
39 |
+
Unapaswa kufa!,0,0,1,0,0
|
40 |
+
Wewe ni mnyama tu!,1,0,0,0,0
|
41 |
+
Wewe ni mjinga na kipumbavu!,1,0,0,1,0
|
42 |
+
Nitakupiga vibaya hadi ulie!,0,1,0,0,0
|
43 |
+
Usiniletee upumbavu wako!,1,0,0,0,0
|
44 |
+
Wewe ni bure kama taka!,1,0,0,1,0
|
45 |
+
Wewe ni kipande cha mbwa!,1,0,0,1,0
|
46 |
+
Nitaharibu kila kitu kwako!,0,1,1,0,0
|
47 |
+
Wewe ni mfano wa upumbavu!,1,0,0,0,0
|
48 |
+
Unakera kama mbu!,1,0,0,1,0
|
49 |
+
Wewe ni shetani!,1,0,0,0,0
|
50 |
+
Nitakutoa roho!,0,1,1,0,0
|
51 |
+
Hakuna mtu anataka kukuona!,1,0,0,0,0
|
52 |
+
Wewe ni mbwa mwitu!,1,0,0,0,0
|
53 |
+
Nitakupiga ngumi!,0,1,0,0,0
|
54 |
+
Wewe ni sumu!,1,0,0,0,0
|
55 |
+
Nitachoma nyumba yako!,0,1,1,0,0
|
56 |
+
Wewe ni mfano wa ujinga!,1,0,0,0,0
|
57 |
+
Wewe ni kama kinyesi!,1,0,0,1,0
|
58 |
+
Nitaharibu maisha yako!,0,1,1,0,0
|
59 |
+
Wewe ni bure kabisa!,1,0,0,1,0
|
60 |
+
Wewe ni kama panya!,1,0,0,0,0
|
61 |
+
Nakuona kama panya!,1,0,0,1,0
|
62 |
+
Wewe ni kipande cha mavi!,1,0,0,1,0
|
63 |
+
Wewe ni chizi!,1,0,0,0,0
|
64 |
+
Wewe ni mshenzi!,1,0,0,1,0
|
65 |
+
Nitakupiga teke!,0,1,0,0,0
|
66 |
+
Wewe ni bure kama takataka!,1,0,0,1,0
|
67 |
+
Wewe ni kama kuku wa mtaani!,1,0,0,0,0
|
68 |
+
Nitakukata kichwa!,0,1,1,0,0
|
69 |
+
Wewe ni bure kabisa kama takataka!,1,0,0,1,0
|
70 |
+
Wewe ni mwehu!,1,0,0,0,0
|
71 |
+
Nitakutupa jalalani!,0,1,1,0,0
|
72 |
+
Wewe ni kipande cha taka!,1,0,0,1,0
|
73 |
+
Wewe ni kama mbwa mwitu!,1,0,0,0,0
|
74 |
+
Wewe ni mfano wa pumbavu!,1,0,0,0,0
|
75 |
+
Nitakupiga teke hadi ufe!,0,1,0,0,0
|
76 |
+
Wewe ni mfano wa upuuzi!,1,0,0,0,0
|
77 |
+
Wewe ni kipande cha taka!,1,0,0,1,0
|
78 |
+
Wewe ni bure kama kinyesi!,1,0,0,1,0
|
79 |
+
Wewe ni mnyama!,1,0,0,0,0
|
80 |
+
Nitakupiga hadi utoe machozi!,0,1,0,0,0
|
81 |
+
Wewe ni kichekesho cha dunia!,1,0,0,1,0
|
82 |
+
Wewe ni takataka ya mtaa!,1,0,0,1,0
|
83 |
+
Nitakupiga na risasi!,0,1,1,0,0
|
84 |
+
Wewe ni mfano wa upumbavu!,1,0,0,0,0
|
85 |
+
Wewe ni kama mbwa!,1,0,0,0,0
|
86 |
+
Nitaharibu uso wako!,0,1,1,0,0
|
87 |
+
Wewe ni kama paka wa mtaani!,1,0,0,0,0
|
88 |
+
Nitakufanya ujutie kuzaliwa kwako!,0,1,1,0,0
|
89 |
+
Wewe ni kipande cha mbwa!,1,0,0,1,0
|
90 |
+
Nitakupiga vibaya hadi ulie!,0,1,0,0,0
|
91 |
+
Wewe ni kipande cha takataka!,1,0,0,1,0
|
92 |
+
Wewe ni sumu kwa jamii!,1,0,0,0,0
|
93 |
+
Nitakuchoma moto!,0,1,1,0,0
|
94 |
+
Wewe ni kichekesho cha mtaa!,1,0,0,1,0
|
95 |
+
Wewe ni kama kinyesi cha mbwa!,1,0,0,1,0
|
96 |
+
Nitakuvunja miguu!,0,1,1,0,0
|
97 |
+
Wewe ni takataka kabisa!,1,0,0,1,0
|
98 |
+
Wewe ni kama mavi ya mbwa!,1,0,0,1,0
|
99 |
+
Nitakupiga hadi utoe damu!,0,1,0,0,0
|
100 |
+
Ukiniletea mamdomo mi ntakumada,1,1,1,1,1
|
101 |
+
Madem wa Kenya ni malaya,1,1,1,1,1
|
102 |
+
Uskii jamaa anatupa haga,1,1,0,1,0
|
103 |
+
Sura kama punda,1,0,0,1,0
|
104 |
+
Manze ngeus ni nyangumi,1,1,0,1,1
|
105 |
+
Si unakaa poa aki,0,0,0,0,0
|
106 |
+
Huyo chali ni mpoa,0,0,0,0,0
|
107 |
+
Ngeus amenidrive crazy,0,0,0,0,0
|
108 |
+
Johnte ni fala achana tu na yeye,1,0,1,1,1
|
109 |
+
Nairobii kumejaa malaya,1,1,1,1,1
|
110 |
+
Ngeus wake wake ni hoe,1,1,0,1,1
|
111 |
+
Dem wake ni whore,1,1,1,1,1
|
112 |
+
Manze fuck.,0,1,0,0,0
|
113 |
+
Jamaa anapenda nginyo sana,1,1,0,1,0
|
114 |
+
Jamaa anastream gwati sana,1,0,1,1,1
|
115 |
+
Njoti yake ni kama maziwa mala,1,1,0,1,1
|
116 |
+
Niaje bro,0,0,0,0,0
|
117 |
+
Kamaa ako na ufala sana,1,0,0,1,0
|
swahili_dataset.csv
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
comment,toxic,severe_toxic,obscene threat,insult,identity_hate
|
2 |
+
Huyu msee ni malaya,1,0,0,1,0
|
3 |
+
Zakayo ni mbwa,0,0,0,1,0
|
4 |
+
Chira ni shoga,1,1,0,1,0
|
5 |
+
Akinirushia mamdomo mi ntamuua,1,1,1,0,0
|
6 |
+
Haga sana,0,0,0,1,0
|
7 |
+
Kuma ya mamako,1,1,0,1,0
|
8 |
+
Wewe ni mjinga sana!,1,0,0,1,0
|
9 |
+
Nataka nikuchape vibaya!,0,1,0,0,0
|
10 |
+
Huo ni upuuzi mtupu!,1,0,0,0,0
|
11 |
+
Wewe ni takataka!,0,0,0,0,1
|
12 |
+
Nitakuua!,0,0,1,0,0
|
13 |
+
Unakera sana!,1,0,0,1,0
|
14 |
+
|
15 |
+
Unazungumza kama mpumbavu!,1,0,0,0,0
|
16 |
+
Wewe ni mchafu!,0,0,0,1,0
|
17 |
+
Nitaharibu maisha yako!,0,0,1,0,0
|
18 |
+
Hiyo ni ujinga wa hali ya juu!,1,0,0,0,0
|
19 |
+
Wewe ni nani kuzungumza hivyo?,0,0,0,1,0
|
20 |
+
Mimi nitakumaliza!,0,1,1,0,0
|
21 |
+
Una sura mbaya sana!,0,0,0,1,0
|
22 |
+
Usinieleze mambo yako ya kijinga!,1,0,0,0,0
|
23 |
+
Wewe ni kichekesho!,0,0,0,1,0
|
24 |
+
Nitakupiga hadi utoe damu!,0,1,0,0,0
|
25 |
+
Wewe ni taka taka!,1,0,0,1,0
|
26 |
+
Ondoka hapa!,1,0,0,0,0
|
27 |
+
Nitakuvunja miguu!,0,1,1,0,0
|
28 |
+
Wewe ni mpuuzi tu!,1,0,0,1,0
|
29 |
+
Nitakuchoma moto!,0,1,1,0,0
|
30 |
+
Unafanya mambo ya kipumbavu!,1,0,0,0,0
|
31 |
+
Nakuona ni kama mavi ya mbwa!,0,0,0,1,0
|
32 |
+
Usiniletee upuuzi wako!,1,0,0,0,0
|
33 |
+
Utakufa maskini!,0,0,0,1,0
|
34 |
+
Wewe ni bure kabisa!,1,0,0,0,0
|
35 |
+
Nitakufanya ujutie kuzaliwa kwako!,0,1,1,0,0
|
36 |
+
Wewe ni kama paka wa mtaani!,0,0,0,1,0
|
37 |
+
Nitaharibu uso wako!,0,1,1,0,0
|
38 |
+
Unafikiri wewe ni nani?,1,0,0,0,0
|
39 |
+
Hakuna mtu anayekupenda!,1,0,0,0,0
|
40 |
+
Unapaswa kufa!,0,0,1,0,0
|
41 |
+
Wewe ni mnyama tu!,1,0,0,0,0
|
42 |
+
Wewe ni mjinga na kipumbavu!,1,0,0,1,0
|
43 |
+
Nitakupiga vibaya hadi ulie!,0,1,0,0,0
|
44 |
+
Usiniletee upumbavu wako!,1,0,0,0,0
|
45 |
+
Wewe ni bure kama taka!,1,0,0,1,0
|
46 |
+
Wewe ni kipande cha mbwa!,1,0,0,1,0
|
47 |
+
Nitaharibu kila kitu kwako!,0,1,1,0,0
|
48 |
+
Wewe ni mfano wa upumbavu!,1,0,0,0,0
|
49 |
+
Unakera kama mbu!,1,0,0,1,0
|
50 |
+
Wewe ni shetani!,1,0,0,0,0
|
51 |
+
Nitakutoa roho!,0,1,1,0,0
|
52 |
+
Hakuna mtu anataka kukuona!,1,0,0,0,0
|
53 |
+
Wewe ni mbwa mwitu!,1,0,0,0,0
|
54 |
+
Nitakupiga ngumi!,0,1,0,0,0
|
55 |
+
Wewe ni sumu!,1,0,0,0,0
|
56 |
+
Nitachoma nyumba yako!,0,1,1,0,0
|
57 |
+
Wewe ni mfano wa ujinga!,1,0,0,0,0
|
58 |
+
Wewe ni kama kinyesi!,1,0,0,1,0
|
59 |
+
Nitaharibu maisha yako!,0,1,1,0,0
|
60 |
+
Wewe ni bure kabisa!,1,0,0,1,0
|
61 |
+
Wewe ni kama panya!,1,0,0,0,0
|
62 |
+
Nakuona kama panya!,1,0,0,1,0
|
63 |
+
Wewe ni kipande cha mavi!,1,0,0,1,0
|
64 |
+
Wewe ni chizi!,1,0,0,0,0
|
65 |
+
Wewe ni mshenzi!,1,0,0,1,0
|
66 |
+
Nitakupiga teke!,0,1,0,0,0
|
67 |
+
Wewe ni bure kama takataka!,1,0,0,1,0
|
68 |
+
Wewe ni kama kuku wa mtaani!,1,0,0,0,0
|
69 |
+
Nitakukata kichwa!,0,1,1,0,0
|
70 |
+
Wewe ni bure kabisa kama takataka!,1,0,0,1,0
|
71 |
+
Wewe ni mwehu!,1,0,0,0,0
|
72 |
+
Nitakutupa jalalani!,0,1,1,0,0
|
73 |
+
Wewe ni kipande cha taka!,1,0,0,1,0
|
74 |
+
Wewe ni kama mbwa mwitu!,1,0,0,0,0
|
75 |
+
Wewe ni mfano wa pumbavu!,1,0,0,0,0
|
76 |
+
Nitakupiga teke hadi ufe!,0,1,0,0,0
|
77 |
+
Wewe ni mfano wa upuuzi!,1,0,0,0,0
|
78 |
+
Wewe ni kipande cha taka!,1,0,0,1,0
|
79 |
+
Wewe ni bure kama kinyesi!,1,0,0,1,0
|
80 |
+
Wewe ni mnyama!,1,0,0,0,0
|
81 |
+
Nitakupiga hadi utoe machozi!,0,1,0,0,0
|
82 |
+
Wewe ni kichekesho cha dunia!,1,0,0,1,0
|
83 |
+
Wewe ni takataka ya mtaa!,1,0,0,1,0
|
84 |
+
Nitakupiga na risasi!,0,1,1,0,0
|
85 |
+
Wewe ni mfano wa upumbavu!,1,0,0,0,0
|
86 |
+
Wewe ni kama mbwa!,1,0,0,0,0
|
87 |
+
Nitaharibu uso wako!,0,1,1,0,0
|
88 |
+
Wewe ni kama paka wa mtaani!,1,0,0,0,0
|
89 |
+
Nitakufanya ujutie kuzaliwa kwako!,0,1,1,0,0
|
90 |
+
Wewe ni kipande cha mbwa!,1,0,0,1,0
|
91 |
+
Nitakupiga vibaya hadi ulie!,0,1,0,0,0
|
92 |
+
Wewe ni kipande cha takataka!,1,0,0,1,0
|
93 |
+
Wewe ni sumu kwa jamii!,1,0,0,0,0
|
94 |
+
Nitakuchoma moto!,0,1,1,0,0
|
95 |
+
Wewe ni kichekesho cha mtaa!,1,0,0,1,0
|
96 |
+
Wewe ni kama kinyesi cha mbwa!,1,0,0,1,0
|
97 |
+
Nitakuvunja miguu!,0,1,1,0,0
|
98 |
+
Wewe ni takataka kabisa!,1,0,0,1,0
|
99 |
+
Wewe ni kama mavi ya mbwa!,1,0,0,1,0
|
100 |
+
Nitakupiga hadi utoe damu!,0,1,0,0,0
|
101 |
+
Ukiniletea mamdomo mi ntakumada,1,1,1,1,1
|
102 |
+
Madem wa Kenya ni malaya,1,1,1,1,1
|
103 |
+
Uskii jamaa anatupa haga,1,1,0,1,0
|
104 |
+
Sura kama punda,1,0,0,1,0
|
105 |
+
Manze ngeus ni nyangumi,1,1,0,1,1
|
106 |
+
Si unakaa poa aki,0,0,0,0,0
|
107 |
+
Huyo chali ni mpoa,0,0,0,0,0
|
108 |
+
Ngeus amenidrive crazy,0,0,0,0,0
|
109 |
+
Johnte ni fala achana tu na yeye,1,0,1,1,1
|
110 |
+
Nairobii kumejaa malaya,1,1,1,1,1
|
111 |
+
Ngeus wake wake ni hoe,1,1,0,1,1
|
112 |
+
Dem wake ni whore,1,0,0,,1
|
113 |
+
Manze fuck.,0,1,0,0,0
|
114 |
+
Jamaa anapenda nginyo sana,1,1,0,1,0
|
115 |
+
Jamaa anastream gwati sana,1,0,1,1,1
|
116 |
+
Njoti yake ni kama maziwa mala,1,1,0,1,1
|
117 |
+
|
tokenizer.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d77d70fdcb351caea5ee6d9dfbd607f61ac419b4a04ec521d84605bbc9f41165
|
3 |
+
size 7740158
|
toxic.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73b4cc477172e73a10c43cc26bc3c628a71b2a2a6c145b5edf6a8ce42d4905e8
|
3 |
+
size 1816640
|
train.ipynb
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"### Add Additional Datasets to The Model."
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 2,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"import pandas as pd\n",
|
17 |
+
"import numpy as np\n",
|
18 |
+
"from sklearn.model_selection import train_test_split\n",
|
19 |
+
"# from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
20 |
+
"import tensorflow as tf\n",
|
21 |
+
"import pickle"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 13,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [
|
29 |
+
{
|
30 |
+
"data": {
|
31 |
+
"text/html": [
|
32 |
+
"<div>\n",
|
33 |
+
"<style scoped>\n",
|
34 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
35 |
+
" vertical-align: middle;\n",
|
36 |
+
" }\n",
|
37 |
+
"\n",
|
38 |
+
" .dataframe tbody tr th {\n",
|
39 |
+
" vertical-align: top;\n",
|
40 |
+
" }\n",
|
41 |
+
"\n",
|
42 |
+
" .dataframe thead th {\n",
|
43 |
+
" text-align: right;\n",
|
44 |
+
" }\n",
|
45 |
+
"</style>\n",
|
46 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
47 |
+
" <thead>\n",
|
48 |
+
" <tr style=\"text-align: right;\">\n",
|
49 |
+
" <th></th>\n",
|
50 |
+
" <th>comment</th>\n",
|
51 |
+
" <th>toxic</th>\n",
|
52 |
+
" <th>severe_toxic</th>\n",
|
53 |
+
" <th>obscene threat</th>\n",
|
54 |
+
" <th>insult</th>\n",
|
55 |
+
" <th>identity_hate</th>\n",
|
56 |
+
" </tr>\n",
|
57 |
+
" </thead>\n",
|
58 |
+
" <tbody>\n",
|
59 |
+
" <tr>\n",
|
60 |
+
" <th>0</th>\n",
|
61 |
+
" <td>Huyu msee ni malaya</td>\n",
|
62 |
+
" <td>1</td>\n",
|
63 |
+
" <td>0</td>\n",
|
64 |
+
" <td>0</td>\n",
|
65 |
+
" <td>1.0</td>\n",
|
66 |
+
" <td>0</td>\n",
|
67 |
+
" </tr>\n",
|
68 |
+
" <tr>\n",
|
69 |
+
" <th>1</th>\n",
|
70 |
+
" <td>Zakayo ni mbwa</td>\n",
|
71 |
+
" <td>0</td>\n",
|
72 |
+
" <td>0</td>\n",
|
73 |
+
" <td>0</td>\n",
|
74 |
+
" <td>1.0</td>\n",
|
75 |
+
" <td>0</td>\n",
|
76 |
+
" </tr>\n",
|
77 |
+
" <tr>\n",
|
78 |
+
" <th>2</th>\n",
|
79 |
+
" <td>Chira ni shoga</td>\n",
|
80 |
+
" <td>1</td>\n",
|
81 |
+
" <td>1</td>\n",
|
82 |
+
" <td>0</td>\n",
|
83 |
+
" <td>1.0</td>\n",
|
84 |
+
" <td>0</td>\n",
|
85 |
+
" </tr>\n",
|
86 |
+
" <tr>\n",
|
87 |
+
" <th>3</th>\n",
|
88 |
+
" <td>Akinirushia mamdomo mi ntamuua</td>\n",
|
89 |
+
" <td>1</td>\n",
|
90 |
+
" <td>1</td>\n",
|
91 |
+
" <td>1</td>\n",
|
92 |
+
" <td>0.0</td>\n",
|
93 |
+
" <td>0</td>\n",
|
94 |
+
" </tr>\n",
|
95 |
+
" <tr>\n",
|
96 |
+
" <th>4</th>\n",
|
97 |
+
" <td>Haga sana</td>\n",
|
98 |
+
" <td>0</td>\n",
|
99 |
+
" <td>0</td>\n",
|
100 |
+
" <td>0</td>\n",
|
101 |
+
" <td>1.0</td>\n",
|
102 |
+
" <td>0</td>\n",
|
103 |
+
" </tr>\n",
|
104 |
+
" <tr>\n",
|
105 |
+
" <th>5</th>\n",
|
106 |
+
" <td>Kuma ya mamako</td>\n",
|
107 |
+
" <td>1</td>\n",
|
108 |
+
" <td>1</td>\n",
|
109 |
+
" <td>0</td>\n",
|
110 |
+
" <td>1.0</td>\n",
|
111 |
+
" <td>0</td>\n",
|
112 |
+
" </tr>\n",
|
113 |
+
" <tr>\n",
|
114 |
+
" <th>6</th>\n",
|
115 |
+
" <td>Wewe ni mjinga sana!</td>\n",
|
116 |
+
" <td>1</td>\n",
|
117 |
+
" <td>0</td>\n",
|
118 |
+
" <td>0</td>\n",
|
119 |
+
" <td>1.0</td>\n",
|
120 |
+
" <td>0</td>\n",
|
121 |
+
" </tr>\n",
|
122 |
+
" <tr>\n",
|
123 |
+
" <th>7</th>\n",
|
124 |
+
" <td>Nataka nikuchape vibaya!</td>\n",
|
125 |
+
" <td>0</td>\n",
|
126 |
+
" <td>1</td>\n",
|
127 |
+
" <td>0</td>\n",
|
128 |
+
" <td>0.0</td>\n",
|
129 |
+
" <td>0</td>\n",
|
130 |
+
" </tr>\n",
|
131 |
+
" <tr>\n",
|
132 |
+
" <th>8</th>\n",
|
133 |
+
" <td>Huo ni upuuzi mtupu!</td>\n",
|
134 |
+
" <td>1</td>\n",
|
135 |
+
" <td>0</td>\n",
|
136 |
+
" <td>0</td>\n",
|
137 |
+
" <td>0.0</td>\n",
|
138 |
+
" <td>0</td>\n",
|
139 |
+
" </tr>\n",
|
140 |
+
" <tr>\n",
|
141 |
+
" <th>9</th>\n",
|
142 |
+
" <td>Wewe ni takataka!</td>\n",
|
143 |
+
" <td>0</td>\n",
|
144 |
+
" <td>0</td>\n",
|
145 |
+
" <td>0</td>\n",
|
146 |
+
" <td>0.0</td>\n",
|
147 |
+
" <td>1</td>\n",
|
148 |
+
" </tr>\n",
|
149 |
+
" </tbody>\n",
|
150 |
+
"</table>\n",
|
151 |
+
"</div>"
|
152 |
+
],
|
153 |
+
"text/plain": [
|
154 |
+
" comment toxic severe_toxic obscene threat \\\n",
|
155 |
+
"0 Huyu msee ni malaya 1 0 0 \n",
|
156 |
+
"1 Zakayo ni mbwa 0 0 0 \n",
|
157 |
+
"2 Chira ni shoga 1 1 0 \n",
|
158 |
+
"3 Akinirushia mamdomo mi ntamuua 1 1 1 \n",
|
159 |
+
"4 Haga sana 0 0 0 \n",
|
160 |
+
"5 Kuma ya mamako 1 1 0 \n",
|
161 |
+
"6 Wewe ni mjinga sana! 1 0 0 \n",
|
162 |
+
"7 Nataka nikuchape vibaya! 0 1 0 \n",
|
163 |
+
"8 Huo ni upuuzi mtupu! 1 0 0 \n",
|
164 |
+
"9 Wewe ni takataka! 0 0 0 \n",
|
165 |
+
"\n",
|
166 |
+
" insult identity_hate \n",
|
167 |
+
"0 1.0 0 \n",
|
168 |
+
"1 1.0 0 \n",
|
169 |
+
"2 1.0 0 \n",
|
170 |
+
"3 0.0 0 \n",
|
171 |
+
"4 1.0 0 \n",
|
172 |
+
"5 1.0 0 \n",
|
173 |
+
"6 1.0 0 \n",
|
174 |
+
"7 0.0 0 \n",
|
175 |
+
"8 0.0 0 \n",
|
176 |
+
"9 0.0 1 "
|
177 |
+
]
|
178 |
+
},
|
179 |
+
"execution_count": 13,
|
180 |
+
"metadata": {},
|
181 |
+
"output_type": "execute_result"
|
182 |
+
}
|
183 |
+
],
|
184 |
+
"source": [
|
185 |
+
"dataframe = pd.read_csv('swahili.csv')\n",
|
186 |
+
"\n",
|
187 |
+
"texts = dataframe['comment'].values\n",
|
188 |
+
"labels = dataframe[['toxic', 'severe_toxic', 'obscene threat', 'insult', 'identity_hate']].values\n",
|
189 |
+
"\n",
|
190 |
+
"dataframe.head(10)"
|
191 |
+
]
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"cell_type": "code",
|
195 |
+
"execution_count": null,
|
196 |
+
"metadata": {},
|
197 |
+
"outputs": [],
|
198 |
+
"source": [
|
199 |
+
"#Preprocess and Prepare Data for Training:\n",
|
200 |
+
"max_len = 200\n",
|
201 |
+
"\n",
|
202 |
+
"# Load the tokenizer\n",
|
203 |
+
"with open('tokenizer.pickle', 'rb') as handle:\n",
|
204 |
+
" tokenizer = pickle.load(handle)\n",
|
205 |
+
"\n",
|
206 |
+
"# Pad & Tokenize Data\n",
|
207 |
+
"sequences = tokenizer.texts_to_sequences(texts)\n",
|
208 |
+
"padded_sequences = pad_sequences(sequences, maxlen=max_len)\n",
|
209 |
+
"\n",
|
210 |
+
"# Data spliting\n",
|
211 |
+
"X_train, X_val, y_train, y_val = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)\n",
|
212 |
+
"\n",
|
213 |
+
"# Create TensorFlow datasets\n",
|
214 |
+
"train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)\n",
|
215 |
+
"val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)\n"
|
216 |
+
]
|
217 |
+
}
|
218 |
+
],
|
219 |
+
"metadata": {
|
220 |
+
"kernelspec": {
|
221 |
+
"display_name": "base",
|
222 |
+
"language": "python",
|
223 |
+
"name": "python3"
|
224 |
+
},
|
225 |
+
"language_info": {
|
226 |
+
"codemirror_mode": {
|
227 |
+
"name": "ipython",
|
228 |
+
"version": 3
|
229 |
+
},
|
230 |
+
"file_extension": ".py",
|
231 |
+
"mimetype": "text/x-python",
|
232 |
+
"name": "python",
|
233 |
+
"nbconvert_exporter": "python",
|
234 |
+
"pygments_lexer": "ipython3",
|
235 |
+
"version": "3.11.7"
|
236 |
+
}
|
237 |
+
},
|
238 |
+
"nbformat": 4,
|
239 |
+
"nbformat_minor": 2
|
240 |
+
}
|