Spaces:

Shariar00
/

Voice-Enabled-ERP-Assistant

Build error

App Files Files Community

Voice-Enabled-ERP-Assistant / test_NLU.py

Shariar00

initial commit

d483661 verified 11 months ago

raw

history blame

2.86 kB



	import torch
	from transformers import BertTokenizerFast,BertForTokenClassification
	import numpy as np


	tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased")
	model = BertForTokenClassification.from_pretrained("./results/checkpoint-100")


	slot_label_map = {
	0: "O", 1: "B-project_id", 2: "I-project_id", 3: "B-reason", 4: "I-reason",
	5: "B-amount", 6: "I-amount", 7: "B-project_name", 8: "I-project_name",
	9: "B-status", 10: "I-status",11: "B-riyals", 12: "I-riyals"
	}



	def decode_slots(tokens, predictions, slot_label_map):
	slots = {}
	current_slot = None
	current_value = []

	for token, pred_id in zip(tokens, predictions):
	label = slot_label_map[pred_id]

	# Handle B- and I- slots
	if label.startswith("B-"): # Beginning of a new slot
	if current_slot:

	slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
	current_slot = label[2:] # Extract slot name
	current_value = [token] # Start a new slot
	elif label.startswith("I-") and current_slot == label[2:]: # Continuation of the current slot
	current_value.append(token)
	else: # No slot or "O"
	if current_slot:

	slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)
	current_slot = None
	current_value = []

	if current_slot:
	slots[current_slot] = tokenizer.convert_tokens_to_string(current_value)

	return slots


	def predict_intent_and_slots(text, model, tokenizer, slot_label_map):
	encoding = tokenizer(
	text,
	truncation=True,
	padding="max_length",
	max_length=128, # Same as during training
	return_tensors="pt"
	)
	input_ids = encoding["input_ids"]
	attention_mask = encoding["attention_mask"]

	with torch.no_grad():
	outputs = model(input_ids, attention_mask=attention_mask)
	logits = outputs.logits
	predictions = torch.argmax(logits, dim=2).squeeze().tolist()

	tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
	predictions = predictions[:len(tokens)]


	slots = decode_slots(tokens, predictions, slot_label_map)


	intent = "mock_intent"

	return {"utterance": text, "slots": slots}

	def get_slots(text):
	result = predict_intent_and_slots(text, model, tokenizer, slot_label_map)
	slots=result['slots']
	return slots

	# Test the model
	test_text = "Hey, I need to request money for a project name Abha University and id is 123 and the amount is 500 riyals"
	result = predict_intent_and_slots(test_text, model, tokenizer, slot_label_map)

	print("Prediction Result:")
	print(result)