IDEA-FinAI
/

chartmoe

Image-Text-to-Text

feature-extraction

Model card Files Files and versions Community

Coobiw commited on Feb 13

Commit

8040fe5

·

verified ·

1 Parent(s): dafc80f

add flash-attn support

Files changed (1) hide show

configuration_chartmoe.py +5 -0

configuration_chartmoe.py CHANGED Viewed

@@ -53,6 +53,7 @@ class ChartMoEConfig(PretrainedConfig):
         rope_scaling=None,
         num_experts=4,
         num_selected=2,
         **kwargs,
     ):
         self.num_experts = num_experts
@@ -77,6 +78,10 @@ class ChartMoEConfig(PretrainedConfig):
         self.rope_theta = rope_theta
         self.rope_scaling = rope_scaling
         self._rope_scaling_validation()
         super().__init__(
             pad_token_id=pad_token_id,
             bos_token_id=bos_token_id,

         rope_scaling=None,
         num_experts=4,
         num_selected=2,
+        attn_implementation=None,
         **kwargs,
     ):
         self.num_experts = num_experts
         self.rope_theta = rope_theta
         self.rope_scaling = rope_scaling
         self._rope_scaling_validation()
+        self.attn_implementation = attn_implementation
+        if self.attn_implementation is None:
+            self.attn_implementation = "eager"
         super().__init__(
             pad_token_id=pad_token_id,
             bos_token_id=bos_token_id,