pybeebee commited on
Commit
2c1cff5
·
verified ·
1 Parent(s): 89f6471

Update model & tokenizer load snippet

Browse files
Files changed (1) hide show
  1. README.md +60 -0
README.md CHANGED
@@ -41,6 +41,66 @@ We recommend using the latest version of HF Transformers, or any `transformers>=
41
  Below we provide a code snippet demonstrating how to load the tokenizer and model and score a candidate instruction. We strongly recommend to format the instruction input as shown to maintain consistency with the format of the data used during training of MDCureRM. As the model outputs values normalized to the 0-1 range, we scale outputted scores up to the 1-5 range for more interpretable results. Relative weighting of fine-grained rewards may be configured as desired to obtain the final score; we reproduce the weights used in our implementation in `reward_weights` below.
42
 
43
  ```python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  model = AutoModel.from_pretrained("yale-nlp/MDCureRM").to(torch.device("cuda"))
45
  tokenizer = AutoTokenizer.from_pretrained("yale-nlp/MDCureRM", use_fast=True)
46
  tokenizer.pad_token = tokenizer.eos_token
 
41
  Below we provide a code snippet demonstrating how to load the tokenizer and model and score a candidate instruction. We strongly recommend to format the instruction input as shown to maintain consistency with the format of the data used during training of MDCureRM. As the model outputs values normalized to the 0-1 range, we scale outputted scores up to the 1-5 range for more interpretable results. Relative weighting of fine-grained rewards may be configured as desired to obtain the final score; we reproduce the weights used in our implementation in `reward_weights` below.
42
 
43
  ```python
44
+ from transformers import AutoTokenizer, AutoModel, LlamaConfig, PreTrainedModel, LlamaForSequenceClassification
45
+ import torch.nn as nn
46
+ import torch
47
+
48
+ # Login to HF to access LLAMA model
49
+ from huggingface_hub import login
50
+ login("") # HF token
51
+
52
+ class RewardModelConfig(LlamaConfig):
53
+ model_type = "RewardModel"
54
+
55
+ def __init__(self, reward_dim=None, base_model_name=None, **kwargs):
56
+ super().__init__(**kwargs)
57
+
58
+ self.reward_dim = reward_dim
59
+ self.base_model_name = base_model_name
60
+
61
+ class RewardModel(PreTrainedModel):
62
+ config_class = RewardModelConfig
63
+
64
+ def create_base_model(self):
65
+
66
+ # use sequence classification model for consistency with https://huggingface.co/sfairXC/FsfairX-LLaMA3-RM-v0.1
67
+ BACKBONE_MODEL = LlamaForSequenceClassification.from_pretrained(
68
+ self.config.base_model_name,
69
+ config=LlamaConfig.from_pretrained(self.config.base_model_name),
70
+ )
71
+ BACKBONE_MODEL.config.pad_token_id = BACKBONE_MODEL.config.eos_token_id
72
+ BACKBONE_MODEL.config.output_hidden_states = True
73
+
74
+ for param in BACKBONE_MODEL.parameters():
75
+ param.requires_grad = False
76
+
77
+ return BACKBONE_MODEL
78
+
79
+ def __init__(self, config):
80
+ super(RewardModel, self).__init__(config)
81
+
82
+ # use .base_model to remove lm_head
83
+ self.BASE_MODEL = self.create_base_model().base_model
84
+
85
+ # regression head for reward prediction
86
+ self.regression_head = nn.Linear(self.BASE_MODEL.config.hidden_size, config.reward_dim)
87
+
88
+ def forward(self, input_ids, attention_mask=None, rewards=None, **kwargs):
89
+
90
+ # forward pass through the base model
91
+ outputs = self.BASE_MODEL(input_ids, attention_mask=attention_mask, **kwargs)
92
+
93
+ hidden_states = outputs.hidden_states[-1]
94
+
95
+ # access hidden state corresponding to the last token in each sequence across the batch
96
+ last_token_hidden_state = hidden_states[:, -1, :]
97
+ reward_predictions = self.regression_head(last_token_hidden_state)
98
+
99
+ return reward_predictions
100
+
101
+ def prepare_inputs_for_generation(self, *args, **kwargs):
102
+ return self.BASE_MODEL.prepare_inputs_for_generation(*args, **kwargs)
103
+
104
  model = AutoModel.from_pretrained("yale-nlp/MDCureRM").to(torch.device("cuda"))
105
  tokenizer = AutoTokenizer.from_pretrained("yale-nlp/MDCureRM", use_fast=True)
106
  tokenizer.pad_token = tokenizer.eos_token