Spaces:

lym0302
/

DeepSound-V1

Running

lym0302123 commited on Mar 26

Commit

77dc150

1 Parent(s): d3e19f4

load_8bit=True

Files changed (1) hide show

third_party/VideoLLaMA2/videollama2/model/__init__.py CHANGED Viewed

@@ -52,7 +52,7 @@ VLLMConfigs = {
-def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda", use_flash_attn=False, **kwargs):
     print("00000000000000000000000000: ", device, use_flash_attn)
     if 'token' in kwargs:
         token = kwargs['token']
@@ -76,8 +76,8 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
             bnb_4bit_quant_type='nf4'
         )
     else:
-        # kwargs['torch_dtype'] = torch.float16
-        kwargs['torch_dtype'] = torch.bfloat16
     if use_flash_attn:
         kwargs['attn_implementation'] = 'flash_attention_2'

+def load_pretrained_model(model_path, model_base, model_name, load_8bit=True, load_4bit=False, device_map="auto", device="cuda", use_flash_attn=False, **kwargs):
     print("00000000000000000000000000: ", device, use_flash_attn)
     if 'token' in kwargs:
         token = kwargs['token']
             bnb_4bit_quant_type='nf4'
         )
     else:
+        kwargs['torch_dtype'] = torch.float16
+        # kwargs['torch_dtype'] = torch.bfloat16
     if use_flash_attn:
         kwargs['attn_implementation'] = 'flash_attention_2'