Spaces:

Canstralian
/

bert_password_sniffer

Running

App Files Files Community

bert_password_sniffer / app.py

Canstralian

Update app.py

41f9a4d verified 3 months ago

raw

history blame contribute delete

3.59 kB

	import gradio as gr
	from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer
	from datasets import load_dataset
	from huggingface_hub import login
	from huggingface_hub import InferenceClient
	import torch

	# Authenticate with Hugging Face
	login()

	# Load Dataset from Kaggle (you can change this to your specific Kaggle dataset)
	# Example: Load a dataset related to password classification, or any text classification dataset
	dataset = load_dataset("imdb") # Replace with your own dataset, e.g., Kaggle dataset

	# Load Tokenizer and Model
	model_name = "bert-base-uncased"
	tokenizer = BertTokenizer.from_pretrained(model_name)
	model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

	# Preprocess the Dataset
	def preprocess_function(examples):
	return tokenizer(examples['text'], padding="max_length", truncation=True)

	# Apply preprocessing to dataset
	tokenized_datasets = dataset.map(preprocess_function, batched=True)

	# Split into training and evaluation datasets
	train_dataset = tokenized_datasets["train"]
	eval_dataset = tokenized_datasets["test"]

	# Define Training Arguments
	training_args = TrainingArguments(
	output_dir="./results", # output directory
	num_train_epochs=3, # number of training epochs
	per_device_train_batch_size=8, # batch size for training
	per_device_eval_batch_size=16, # batch size for evaluation
	warmup_steps=500, # number of warmup steps for learning rate scheduler
	weight_decay=0.01, # strength of weight decay
	logging_dir="./logs", # directory for storing logs
	logging_steps=10,
	evaluation_strategy="epoch", # evaluate each epoch
	save_strategy="epoch", # save model each epoch
	)

	# Initialize Trainer
	trainer = Trainer(
	model=model, # the instantiated 🤗 Transformers model to be trained
	args=training_args, # training arguments, defined above
	train_dataset=train_dataset, # training dataset
	eval_dataset=eval_dataset, # evaluation dataset
	)

	# Train the Model
	trainer.train()

	# Save the Model and Tokenizer
	model.save_pretrained("./password_sniffer_model")
	tokenizer.save_pretrained("./password_sniffer_tokenizer")

	# Load the fine-tuned model and tokenizer
	model = BertForSequenceClassification.from_pretrained("./password_sniffer_model")
	tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer")

	# Setup Hugging Face Inference Client
	client = InferenceClient("password_sniffer_model")

	def detect_passwords(text):
	"""
	Detect potential passwords using the trained BERT model.
	"""
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
	outputs = model(**inputs)
	predictions = torch.softmax(outputs.logits, dim=-1)
	predicted_class = torch.argmax(predictions, dim=-1).item()

	if predicted_class == 1: # Assuming '1' represents potential password
	return "Potential password detected."
	else:
	return "No password detected."

	# Gradio Interface
	def respond(message, history, system_message, max_tokens, temperature, top_p):
	detected_passwords = detect_passwords(message)
	return detected_passwords

	demo = gr.Interface(
	fn=respond,
	inputs=[
	gr.Textbox(value="You are a password detection chatbot.", label="System message"),
	gr.Textbox(value="Hello, your password might be 12345!", label="User input"),
	],
	outputs="text",
	)

	if __name__ == "__main__":
	demo.launch()