kollera commited on
Commit
56863df
·
verified ·
1 Parent(s): 6382463

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -1,17 +1,21 @@
1
  import gradio as gr
2
  from transformers import pipeline, Trainer, TrainingArguments, DistilBertForSequenceClassification, DistilBertTokenizer
3
  from datasets import load_dataset
 
4
 
5
  # Carica il dataset di spam detection da Hugging Face
6
  dataset = load_dataset("tanquangduong/spam-detection-dataset-splits")
7
 
 
 
 
8
  # Carica il tokenizer e il modello pre-addestrato
9
  tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
10
  model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
11
 
12
- # Tokenizza il dataset
13
  def tokenize_function(examples):
14
- return tokenizer(examples['message'], truncation=True, padding="max_length", max_length=128)
15
 
16
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
17
 
 
1
  import gradio as gr
2
  from transformers import pipeline, Trainer, TrainingArguments, DistilBertForSequenceClassification, DistilBertTokenizer
3
  from datasets import load_dataset
4
+ import torch
5
 
6
  # Carica il dataset di spam detection da Hugging Face
7
  dataset = load_dataset("tanquangduong/spam-detection-dataset-splits")
8
 
9
+ # Visualizza i nomi delle colonne per verificare quale contiene il testo delle email
10
+ print(dataset['train'].column_names)
11
+
12
  # Carica il tokenizer e il modello pre-addestrato
13
  tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
14
  model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
15
 
16
+ # Aggiorna il nome della colonna con il nome corretto
17
  def tokenize_function(examples):
18
+ return tokenizer(examples['text'], truncation=True, padding="max_length", max_length=128)
19
 
20
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
21