Crystalcareai
/

Quiet-Mistral

@@ -1098,18 +1098,27 @@ class QuietForCausalLM(QuietPreTrainedModel):
         self.remove_negative_rewards = True
         self.post_init()
     def _generate_thoughts(self, hidden_states, max_length):
         batch_size = hidden_states.size(0)
         thought_ids = torch.zeros((batch_size, self.config.max_thoughts, max_length), dtype=torch.long, device=hidden_states.device)
         thought_embeddings = []
-        # Create an instance of QuietForCausalLM using the current model's configuration
-        causal_lm_model = QuietForCausalLM(self.config)
-        causal_lm_model.eval()  # Set the model to evaluation mode
         for i in range(self.config.max_thoughts):
             thought_input_ids = torch.zeros((batch_size, 1), dtype=torch.long, device=hidden_states.device)
-            thought_outputs = causal_lm_model.generate(
                 input_ids=thought_input_ids,
                 max_length=max_length,
                 do_sample=True,
@@ -1124,21 +1133,6 @@ class QuietForCausalLM(QuietPreTrainedModel):
         thought_embeddings = torch.stack(thought_embeddings, dim=1)
         return thought_ids, thought_embeddings
-    def calculate_policy_loss(self, thoughts, rewards):
-        thought_log_probs = []
-        for thought in thoughts:
-            thought_log_prob = self.lm_head(thought).log_softmax(dim=-1)
-            thought_log_probs.append(thought_log_prob)
-        thought_log_probs = torch.stack(thought_log_probs, dim=1)  # (batch_size, num_thoughts, seq_length, vocab_size)
-        thought_probs = torch.exp(thought_log_probs)
-        policy_loss = -torch.mean(thought_log_probs * rewards.unsqueeze(-1).unsqueeze(-1))
-        return policy_loss
     def get_input_embeddings(self):
         return self.model.embed_tokens
@@ -1214,13 +1208,11 @@ class QuietForCausalLM(QuietPreTrainedModel):
             use_cache=use_cache,
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
-            return_dict=True,  # Set return_dict=True
         )
         hidden_states = outputs.last_hidden_state
         logits = self.lm_head(hidden_states)
         thought_ids, thought_embeddings = self._generate_thoughts(hidden_states, max_length=self.config.thought_length)
         thought_hidden_states = self.model(inputs_embeds=thought_embeddings).last_hidden_state
@@ -1230,7 +1222,7 @@ class QuietForCausalLM(QuietPreTrainedModel):
         # Mix base and thought logits
         mixed_logits = logits.unsqueeze(1) + self.mixing_head(thought_logits)
         mixed_logits = mixed_logits.view(-1, mixed_logits.size(-1))
         loss = None
         if labels is not None:
             # Shift so that tokens < n predict n

         self.remove_negative_rewards = True
         self.post_init()
+    def calculate_policy_loss(self, thoughts, rewards):
+        thought_log_probs = []
+        for thought in thoughts:
+            thought_log_prob = self.lm_head(thought).log_softmax(dim=-1)
+            thought_log_probs.append(thought_log_prob)
+        thought_log_probs = torch.stack(thought_log_probs, dim=1)  # (batch_size, num_thoughts, seq_length, vocab_size)
+        thought_probs = torch.exp(thought_log_probs)
+        policy_loss = -torch.mean(thought_log_probs * rewards.unsqueeze(-1).unsqueeze(-1))
+        return policy_loss
     def _generate_thoughts(self, hidden_states, max_length):
         batch_size = hidden_states.size(0)
         thought_ids = torch.zeros((batch_size, self.config.max_thoughts, max_length), dtype=torch.long, device=hidden_states.device)
         thought_embeddings = []
         for i in range(self.config.max_thoughts):
             thought_input_ids = torch.zeros((batch_size, 1), dtype=torch.long, device=hidden_states.device)
+            thought_outputs = self.generate(
                 input_ids=thought_input_ids,
                 max_length=max_length,
                 do_sample=True,
         thought_embeddings = torch.stack(thought_embeddings, dim=1)
         return thought_ids, thought_embeddings
     def get_input_embeddings(self):
         return self.model.embed_tokens
             use_cache=use_cache,
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
+            return_dict=True,
         )
         hidden_states = outputs.last_hidden_state
         logits = self.lm_head(hidden_states)
         thought_ids, thought_embeddings = self._generate_thoughts(hidden_states, max_length=self.config.thought_length)
         thought_hidden_states = self.model(inputs_embeds=thought_embeddings).last_hidden_state
         # Mix base and thought logits
         mixed_logits = logits.unsqueeze(1) + self.mixing_head(thought_logits)
         mixed_logits = mixed_logits.view(-1, mixed_logits.size(-1))
         loss = None
         if labels is not None:
             # Shift so that tokens < n predict n