Spaces:

HMPhuoc
/

toxic

Running

HMPhuoc commited on May 30, 2024

Commit

924cabe

1 Parent(s): 6844ad4

word tokenize

Files changed (2) hide show

app.py CHANGED Viewed

@@ -83,6 +83,7 @@ def judge(x):
   judge_result = []
   x = ud.normalize('NFKC', x)
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
@@ -106,6 +107,7 @@ def judgePlus(x):
   judge_result = []
   x = ud.normalize('NFKC', x)
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)

   judge_result = []
   x = ud.normalize('NFKC', x)
+  x = word_tokenize(x, format="text")
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)
   judge_result = []
   x = ud.normalize('NFKC', x)
+  x = word_tokenize(x, format="text")
   lstm_pred = LSTM_predict(x)
   gru_pred = GRU_predict(x)

phoBERT.py CHANGED Viewed

@@ -69,7 +69,7 @@ def tokenize(data):
   return output
 def BERT_predict(text):
-    text = word_tokenize(text)
     text = [text]
     token = tokenize(text)

   return output
 def BERT_predict(text):
     text = [text]
     token = tokenize(text)