File size: 3,590 Bytes
eae8d55 41f9a4d eae8d55 41f9a4d eae8d55 41f9a4d 535152f 41f9a4d eae8d55 41f9a4d eae8d55 41f9a4d 535152f 41f9a4d 535152f 41f9a4d eae8d55 41f9a4d eae8d55 41f9a4d 535152f 41f9a4d eae8d55 41f9a4d 535152f 41f9a4d eae8d55 41f9a4d eae8d55 535152f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer
from datasets import load_dataset
from huggingface_hub import login
from huggingface_hub import InferenceClient
import torch
# Authenticate with Hugging Face
login()
# Load Dataset from Kaggle (you can change this to your specific Kaggle dataset)
# Example: Load a dataset related to password classification, or any text classification dataset
dataset = load_dataset("imdb") # Replace with your own dataset, e.g., Kaggle dataset
# Load Tokenizer and Model
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Preprocess the Dataset
def preprocess_function(examples):
return tokenizer(examples['text'], padding="max_length", truncation=True)
# Apply preprocessing to dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)
# Split into training and evaluation datasets
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]
# Define Training Arguments
training_args = TrainingArguments(
output_dir="./results", # output directory
num_train_epochs=3, # number of training epochs
per_device_train_batch_size=8, # batch size for training
per_device_eval_batch_size=16, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir="./logs", # directory for storing logs
logging_steps=10,
evaluation_strategy="epoch", # evaluate each epoch
save_strategy="epoch", # save model each epoch
)
# Initialize Trainer
trainer = Trainer(
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=eval_dataset, # evaluation dataset
)
# Train the Model
trainer.train()
# Save the Model and Tokenizer
model.save_pretrained("./password_sniffer_model")
tokenizer.save_pretrained("./password_sniffer_tokenizer")
# Load the fine-tuned model and tokenizer
model = BertForSequenceClassification.from_pretrained("./password_sniffer_model")
tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer")
# Setup Hugging Face Inference Client
client = InferenceClient("password_sniffer_model")
def detect_passwords(text):
"""
Detect potential passwords using the trained BERT model.
"""
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
outputs = model(**inputs)
predictions = torch.softmax(outputs.logits, dim=-1)
predicted_class = torch.argmax(predictions, dim=-1).item()
if predicted_class == 1: # Assuming '1' represents potential password
return "Potential password detected."
else:
return "No password detected."
# Gradio Interface
def respond(message, history, system_message, max_tokens, temperature, top_p):
detected_passwords = detect_passwords(message)
return detected_passwords
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(value="You are a password detection chatbot.", label="System message"),
gr.Textbox(value="Hello, your password might be 12345!", label="User input"),
],
outputs="text",
)
if __name__ == "__main__":
demo.launch()
|