Spaces:
Build error
Build error
| import torch | |
| from transformers import BertTokenizerFast,BertForTokenClassification | |
| import numpy as np | |
| tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-cased") | |
| model = BertForTokenClassification.from_pretrained("./results/checkpoint-100") | |
| slot_label_map = { | |
| 0: "O", 1: "B-project_id", 2: "I-project_id", 3: "B-reason", 4: "I-reason", | |
| 5: "B-amount", 6: "I-amount", 7: "B-project_name", 8: "I-project_name", | |
| 9: "B-status", 10: "I-status",11: "B-riyals", 12: "I-riyals" | |
| } | |
| def decode_slots(tokens, predictions, slot_label_map): | |
| slots = {} | |
| current_slot = None | |
| current_value = [] | |
| for token, pred_id in zip(tokens, predictions): | |
| label = slot_label_map[pred_id] | |
| # Handle B- and I- slots | |
| if label.startswith("B-"): # Beginning of a new slot | |
| if current_slot: | |
| slots[current_slot] = tokenizer.convert_tokens_to_string(current_value) | |
| current_slot = label[2:] # Extract slot name | |
| current_value = [token] # Start a new slot | |
| elif label.startswith("I-") and current_slot == label[2:]: # Continuation of the current slot | |
| current_value.append(token) | |
| else: # No slot or "O" | |
| if current_slot: | |
| slots[current_slot] = tokenizer.convert_tokens_to_string(current_value) | |
| current_slot = None | |
| current_value = [] | |
| if current_slot: | |
| slots[current_slot] = tokenizer.convert_tokens_to_string(current_value) | |
| return slots | |
| def predict_intent_and_slots(text, model, tokenizer, slot_label_map): | |
| encoding = tokenizer( | |
| text, | |
| truncation=True, | |
| padding="max_length", | |
| max_length=128, # Same as during training | |
| return_tensors="pt" | |
| ) | |
| input_ids = encoding["input_ids"] | |
| attention_mask = encoding["attention_mask"] | |
| with torch.no_grad(): | |
| outputs = model(input_ids, attention_mask=attention_mask) | |
| logits = outputs.logits | |
| predictions = torch.argmax(logits, dim=2).squeeze().tolist() | |
| tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist()) | |
| predictions = predictions[:len(tokens)] | |
| slots = decode_slots(tokens, predictions, slot_label_map) | |
| intent = "mock_intent" | |
| return {"utterance": text, "slots": slots} | |
| def get_slots(text): | |
| result = predict_intent_and_slots(text, model, tokenizer, slot_label_map) | |
| slots=result['slots'] | |
| return slots | |
| # Test the model | |
| test_text = "Hey, I need to request money for a project name Abha University and id is 123 and the amount is 500 riyals" | |
| result = predict_intent_and_slots(test_text, model, tokenizer, slot_label_map) | |
| print("Prediction Result:") | |
| print(result) | |