Update README.md
Browse files
README.md
CHANGED
@@ -26,13 +26,103 @@ This model also can be used to extract **Abnormality, Non-Abnormality, Anatomy,
|
|
26 |
in medical radiology reports.
|
27 |
|
28 |
## Usage
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
|
|
33 |
|
34 |
-
|
35 |
-
model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
```
|
38 |
|
|
|
26 |
in medical radiology reports.
|
27 |
|
28 |
## Usage
|
29 |
+
|
30 |
+
<details>
|
31 |
+
<summary> Click to expand the usage of this model. </summary>
|
32 |
+
<pre><code>
|
33 |
+
|
34 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
35 |
+
import torch
|
36 |
+
def post_process(tokenized_text, predicted_entities, tokenizer):
|
37 |
+
entity_spans = []
|
38 |
+
start = end = None
|
39 |
+
entity_type = None
|
40 |
+
for i, (token, label) in enumerate(zip(tokenized_text, predicted_entities[:len(tokenized_text)])):
|
41 |
+
if token in ["[CLS]", "[SEP]"]:
|
42 |
+
continue
|
43 |
+
if label != "O" and i < len(predicted_entities) - 1:
|
44 |
+
if label.startswith("B-") and predicted_entities[i+1].startswith("I-"):
|
45 |
+
start = i
|
46 |
+
entity_type = label[2:]
|
47 |
+
elif label.startswith("B-") and predicted_entities[i+1].startswith("B-"):
|
48 |
+
start = i
|
49 |
+
end = i
|
50 |
+
entity_spans.append((start, end, label[2:]))
|
51 |
+
start = i
|
52 |
+
entity_type = label[2:]
|
53 |
+
elif label.startswith("B-") and predicted_entities[i+1].startswith("O"):
|
54 |
+
start = i
|
55 |
+
end = i
|
56 |
+
entity_spans.append((start, end, label[2:]))
|
57 |
+
start = end = None
|
58 |
+
entity_type = None
|
59 |
+
elif label.startswith("I-") and predicted_entities[i+1].startswith("B-"):
|
60 |
+
end = i
|
61 |
+
if start is not None:
|
62 |
+
entity_spans.append((start, end, entity_type))
|
63 |
+
start = i
|
64 |
+
entity_type = label[2:]
|
65 |
+
elif label.startswith("I-") and predicted_entities[i+1].startswith("O"):
|
66 |
+
end = i
|
67 |
+
if start is not None:
|
68 |
+
entity_spans.append((start, end, entity_type))
|
69 |
+
start = end = None
|
70 |
+
entity_type = None
|
71 |
+
if start is not None and end is None:
|
72 |
+
end = len(tokenized_text) - 2
|
73 |
+
entity_spans.append((start, end, entity_type))
|
74 |
+
save_pair = []
|
75 |
+
for start, end, entity_type in entity_spans:
|
76 |
+
entity_str = tokenizer.convert_tokens_to_string(tokenized_text[start:end+1])
|
77 |
+
save_pair.append((entity_str, entity_type))
|
78 |
+
return save_pair
|
79 |
+
|
80 |
+
def run_ner(texts, idx2label, tokenizer, model, device):
|
81 |
+
inputs = tokenizer(texts,
|
82 |
+
max_length=512,
|
83 |
+
padding=True,
|
84 |
+
truncation=True,
|
85 |
+
return_tensors="pt").to(device)
|
86 |
+
with torch.no_grad():
|
87 |
+
outputs = model(**inputs)
|
88 |
+
predicted_labels = torch.argmax(outputs.logits, dim=2).tolist()
|
89 |
+
save_pairs = []
|
90 |
+
for i in range(len(texts)):
|
91 |
+
predicted_entities = [idx2label[label] for label in predicted_labels[i]]
|
92 |
+
non_pad_mask = inputs["input_ids"][i] != tokenizer.pad_token_id
|
93 |
+
non_pad_length = non_pad_mask.sum().item()
|
94 |
+
non_pad_input_ids = inputs["input_ids"][i][:non_pad_length]
|
95 |
+
tokenized_text = tokenizer.convert_ids_to_tokens(non_pad_input_ids)
|
96 |
+
save_pair = post_process(tokenized_text, predicted_entities, tokenizer)
|
97 |
+
if i == 0:
|
98 |
+
save_pairs = save_pair
|
99 |
+
else:
|
100 |
+
save_pairs.extend(save_pair)
|
101 |
+
return save_pairs
|
102 |
+
|
103 |
+
ner_labels = ['B-ABNORMALITY', 'I-ABNORMALITY',
|
104 |
+
'B-NON-ABNORMALITY', 'I-NON-ABNORMALITY',
|
105 |
+
'B-DISEASE', 'I-DISEASE',
|
106 |
+
'B-NON-DISEASE', 'I-NON-DISEASE',
|
107 |
+
'B-ANATOMY', 'I-ANATOMY',
|
108 |
+
'O']
|
109 |
+
idx2label = {i: label for i, label in enumerate(ner_labels)}
|
110 |
|
111 |
+
tokenizer = AutoTokenizer.from_pretrained('Angelakeke/RaTE-NER-Deberta')
|
112 |
+
model = AutoModelForTokenClassification.from_pretrained('Angelakeke/RaTE-NER-Deberta')
|
113 |
|
114 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
115 |
+
model.to(device)
|
116 |
+
model.eval()
|
117 |
+
|
118 |
+
texts = ['Status median sternotomy.']
|
119 |
+
save_pair = run_ner(texts, idx2label, tokenizer, model, device)
|
120 |
+
|
121 |
+
</code></pre>
|
122 |
+
|
123 |
+
</details>
|
124 |
+
|
125 |
+
```python
|
126 |
|
127 |
```
|
128 |
|