Angelakeke commited on
Commit
9345a63
·
verified ·
1 Parent(s): 5606661

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +94 -4
README.md CHANGED
@@ -26,13 +26,103 @@ This model also can be used to extract **Abnormality, Non-Abnormality, Anatomy,
26
  in medical radiology reports.
27
 
28
  ## Usage
29
- ```python
 
 
 
 
30
  from transformers import AutoTokenizer, AutoModelForTokenClassification
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- ner_labels = ['B-ABNORMALITY', 'I-ABNORMALITY', 'B-NON-ABNORMALITY', 'I-NON-ABNORMALITY', 'B-DISEASE', 'I-DISEASE', 'B-NON-DISEASE', 'I-NON-DISEASE', 'B-ANATOMY', 'I-ANATOMY', 'O']
 
33
 
34
- tokenizer = AutoTokenizer.from_pretrained("Angelakeke/RaTE-NER-Deberta")
35
- model = AutoModelForTokenClassification.from_pretrained("Angelakeke/RaTE-NER-Deberta")
 
 
 
 
 
 
 
 
 
 
36
 
37
  ```
38
 
 
26
  in medical radiology reports.
27
 
28
  ## Usage
29
+
30
+ <details>
31
+ <summary> Click to expand the usage of this model. </summary>
32
+ <pre><code>
33
+
34
  from transformers import AutoTokenizer, AutoModelForTokenClassification
35
+ import torch
36
+ def post_process(tokenized_text, predicted_entities, tokenizer):
37
+ entity_spans = []
38
+ start = end = None
39
+ entity_type = None
40
+ for i, (token, label) in enumerate(zip(tokenized_text, predicted_entities[:len(tokenized_text)])):
41
+ if token in ["[CLS]", "[SEP]"]:
42
+ continue
43
+ if label != "O" and i < len(predicted_entities) - 1:
44
+ if label.startswith("B-") and predicted_entities[i+1].startswith("I-"):
45
+ start = i
46
+ entity_type = label[2:]
47
+ elif label.startswith("B-") and predicted_entities[i+1].startswith("B-"):
48
+ start = i
49
+ end = i
50
+ entity_spans.append((start, end, label[2:]))
51
+ start = i
52
+ entity_type = label[2:]
53
+ elif label.startswith("B-") and predicted_entities[i+1].startswith("O"):
54
+ start = i
55
+ end = i
56
+ entity_spans.append((start, end, label[2:]))
57
+ start = end = None
58
+ entity_type = None
59
+ elif label.startswith("I-") and predicted_entities[i+1].startswith("B-"):
60
+ end = i
61
+ if start is not None:
62
+ entity_spans.append((start, end, entity_type))
63
+ start = i
64
+ entity_type = label[2:]
65
+ elif label.startswith("I-") and predicted_entities[i+1].startswith("O"):
66
+ end = i
67
+ if start is not None:
68
+ entity_spans.append((start, end, entity_type))
69
+ start = end = None
70
+ entity_type = None
71
+ if start is not None and end is None:
72
+ end = len(tokenized_text) - 2
73
+ entity_spans.append((start, end, entity_type))
74
+ save_pair = []
75
+ for start, end, entity_type in entity_spans:
76
+ entity_str = tokenizer.convert_tokens_to_string(tokenized_text[start:end+1])
77
+ save_pair.append((entity_str, entity_type))
78
+ return save_pair
79
+
80
+ def run_ner(texts, idx2label, tokenizer, model, device):
81
+ inputs = tokenizer(texts,
82
+ max_length=512,
83
+ padding=True,
84
+ truncation=True,
85
+ return_tensors="pt").to(device)
86
+ with torch.no_grad():
87
+ outputs = model(**inputs)
88
+ predicted_labels = torch.argmax(outputs.logits, dim=2).tolist()
89
+ save_pairs = []
90
+ for i in range(len(texts)):
91
+ predicted_entities = [idx2label[label] for label in predicted_labels[i]]
92
+ non_pad_mask = inputs["input_ids"][i] != tokenizer.pad_token_id
93
+ non_pad_length = non_pad_mask.sum().item()
94
+ non_pad_input_ids = inputs["input_ids"][i][:non_pad_length]
95
+ tokenized_text = tokenizer.convert_ids_to_tokens(non_pad_input_ids)
96
+ save_pair = post_process(tokenized_text, predicted_entities, tokenizer)
97
+ if i == 0:
98
+ save_pairs = save_pair
99
+ else:
100
+ save_pairs.extend(save_pair)
101
+ return save_pairs
102
+
103
+ ner_labels = ['B-ABNORMALITY', 'I-ABNORMALITY',
104
+ 'B-NON-ABNORMALITY', 'I-NON-ABNORMALITY',
105
+ 'B-DISEASE', 'I-DISEASE',
106
+ 'B-NON-DISEASE', 'I-NON-DISEASE',
107
+ 'B-ANATOMY', 'I-ANATOMY',
108
+ 'O']
109
+ idx2label = {i: label for i, label in enumerate(ner_labels)}
110
 
111
+ tokenizer = AutoTokenizer.from_pretrained('Angelakeke/RaTE-NER-Deberta')
112
+ model = AutoModelForTokenClassification.from_pretrained('Angelakeke/RaTE-NER-Deberta')
113
 
114
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
+ model.to(device)
116
+ model.eval()
117
+
118
+ texts = ['Status median sternotomy.']
119
+ save_pair = run_ner(texts, idx2label, tokenizer, model, device)
120
+
121
+ </code></pre>
122
+
123
+ </details>
124
+
125
+ ```python
126
 
127
  ```
128