Update model & tokenizer load snippet
Browse files
README.md
CHANGED
@@ -41,6 +41,66 @@ We recommend using the latest version of HF Transformers, or any `transformers>=
|
|
41 |
Below we provide a code snippet demonstrating how to load the tokenizer and model and score a candidate instruction. We strongly recommend to format the instruction input as shown to maintain consistency with the format of the data used during training of MDCureRM. As the model outputs values normalized to the 0-1 range, we scale outputted scores up to the 1-5 range for more interpretable results. Relative weighting of fine-grained rewards may be configured as desired to obtain the final score; we reproduce the weights used in our implementation in `reward_weights` below.
|
42 |
|
43 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
model = AutoModel.from_pretrained("yale-nlp/MDCureRM").to(torch.device("cuda"))
|
45 |
tokenizer = AutoTokenizer.from_pretrained("yale-nlp/MDCureRM", use_fast=True)
|
46 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
41 |
Below we provide a code snippet demonstrating how to load the tokenizer and model and score a candidate instruction. We strongly recommend to format the instruction input as shown to maintain consistency with the format of the data used during training of MDCureRM. As the model outputs values normalized to the 0-1 range, we scale outputted scores up to the 1-5 range for more interpretable results. Relative weighting of fine-grained rewards may be configured as desired to obtain the final score; we reproduce the weights used in our implementation in `reward_weights` below.
|
42 |
|
43 |
```python
|
44 |
+
from transformers import AutoTokenizer, AutoModel, LlamaConfig, PreTrainedModel, LlamaForSequenceClassification
|
45 |
+
import torch.nn as nn
|
46 |
+
import torch
|
47 |
+
|
48 |
+
# Login to HF to access LLAMA model
|
49 |
+
from huggingface_hub import login
|
50 |
+
login("") # HF token
|
51 |
+
|
52 |
+
class RewardModelConfig(LlamaConfig):
|
53 |
+
model_type = "RewardModel"
|
54 |
+
|
55 |
+
def __init__(self, reward_dim=None, base_model_name=None, **kwargs):
|
56 |
+
super().__init__(**kwargs)
|
57 |
+
|
58 |
+
self.reward_dim = reward_dim
|
59 |
+
self.base_model_name = base_model_name
|
60 |
+
|
61 |
+
class RewardModel(PreTrainedModel):
|
62 |
+
config_class = RewardModelConfig
|
63 |
+
|
64 |
+
def create_base_model(self):
|
65 |
+
|
66 |
+
# use sequence classification model for consistency with https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1
|
67 |
+
BACKBONE_MODEL = LlamaForSequenceClassification.from_pretrained(
|
68 |
+
self.config.base_model_name,
|
69 |
+
config=LlamaConfig.from_pretrained(self.config.base_model_name),
|
70 |
+
)
|
71 |
+
BACKBONE_MODEL.config.pad_token_id = BACKBONE_MODEL.config.eos_token_id
|
72 |
+
BACKBONE_MODEL.config.output_hidden_states = True
|
73 |
+
|
74 |
+
for param in BACKBONE_MODEL.parameters():
|
75 |
+
param.requires_grad = False
|
76 |
+
|
77 |
+
return BACKBONE_MODEL
|
78 |
+
|
79 |
+
def __init__(self, config):
|
80 |
+
super(RewardModel, self).__init__(config)
|
81 |
+
|
82 |
+
# use .base_model to remove lm_head
|
83 |
+
self.BASE_MODEL = self.create_base_model().base_model
|
84 |
+
|
85 |
+
# regression head for reward prediction
|
86 |
+
self.regression_head = nn.Linear(self.BASE_MODEL.config.hidden_size, config.reward_dim)
|
87 |
+
|
88 |
+
def forward(self, input_ids, attention_mask=None, rewards=None, **kwargs):
|
89 |
+
|
90 |
+
# forward pass through the base model
|
91 |
+
outputs = self.BASE_MODEL(input_ids, attention_mask=attention_mask, **kwargs)
|
92 |
+
|
93 |
+
hidden_states = outputs.hidden_states[-1]
|
94 |
+
|
95 |
+
# access hidden state corresponding to the last token in each sequence across the batch
|
96 |
+
last_token_hidden_state = hidden_states[:, -1, :]
|
97 |
+
reward_predictions = self.regression_head(last_token_hidden_state)
|
98 |
+
|
99 |
+
return reward_predictions
|
100 |
+
|
101 |
+
def prepare_inputs_for_generation(self, *args, **kwargs):
|
102 |
+
return self.BASE_MODEL.prepare_inputs_for_generation(*args, **kwargs)
|
103 |
+
|
104 |
model = AutoModel.from_pretrained("yale-nlp/MDCureRM").to(torch.device("cuda"))
|
105 |
tokenizer = AutoTokenizer.from_pretrained("yale-nlp/MDCureRM", use_fast=True)
|
106 |
tokenizer.pad_token = tokenizer.eos_token
|