|
import gradio as gr |
|
from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer |
|
from datasets import load_dataset |
|
from huggingface_hub import login |
|
from huggingface_hub import InferenceClient |
|
import torch |
|
|
|
|
|
login() |
|
|
|
|
|
|
|
dataset = load_dataset("imdb") |
|
|
|
|
|
model_name = "bert-base-uncased" |
|
tokenizer = BertTokenizer.from_pretrained(model_name) |
|
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2) |
|
|
|
|
|
def preprocess_function(examples): |
|
return tokenizer(examples['text'], padding="max_length", truncation=True) |
|
|
|
|
|
tokenized_datasets = dataset.map(preprocess_function, batched=True) |
|
|
|
|
|
train_dataset = tokenized_datasets["train"] |
|
eval_dataset = tokenized_datasets["test"] |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir="./results", |
|
num_train_epochs=3, |
|
per_device_train_batch_size=8, |
|
per_device_eval_batch_size=16, |
|
warmup_steps=500, |
|
weight_decay=0.01, |
|
logging_dir="./logs", |
|
logging_steps=10, |
|
evaluation_strategy="epoch", |
|
save_strategy="epoch", |
|
) |
|
|
|
|
|
trainer = Trainer( |
|
model=model, |
|
args=training_args, |
|
train_dataset=train_dataset, |
|
eval_dataset=eval_dataset, |
|
) |
|
|
|
|
|
trainer.train() |
|
|
|
|
|
model.save_pretrained("./password_sniffer_model") |
|
tokenizer.save_pretrained("./password_sniffer_tokenizer") |
|
|
|
|
|
model = BertForSequenceClassification.from_pretrained("./password_sniffer_model") |
|
tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer") |
|
|
|
|
|
client = InferenceClient("password_sniffer_model") |
|
|
|
def detect_passwords(text): |
|
""" |
|
Detect potential passwords using the trained BERT model. |
|
""" |
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
outputs = model(**inputs) |
|
predictions = torch.softmax(outputs.logits, dim=-1) |
|
predicted_class = torch.argmax(predictions, dim=-1).item() |
|
|
|
if predicted_class == 1: |
|
return "Potential password detected." |
|
else: |
|
return "No password detected." |
|
|
|
|
|
def respond(message, history, system_message, max_tokens, temperature, top_p): |
|
detected_passwords = detect_passwords(message) |
|
return detected_passwords |
|
|
|
demo = gr.Interface( |
|
fn=respond, |
|
inputs=[ |
|
gr.Textbox(value="You are a password detection chatbot.", label="System message"), |
|
gr.Textbox(value="Hello, your password might be 12345!", label="User input"), |
|
], |
|
outputs="text", |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|