Spaces:

fffiloni
/

YuE

Paused

fffiloni commited on Jan 28

Commit

d4b2e40

verified ·

1 Parent(s): e7ce4db

Update inference/infer.py

Files changed (1) hide show

inference/infer.py CHANGED Viewed

@@ -68,10 +68,8 @@ os.makedirs(stage1_output_dir, exist_ok=True)
 os.makedirs(stage2_output_dir, exist_ok=True)
 # load tokenizer and model
-#device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
-# Check if CUDA is available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Now you can use `device` to move your tensors or models to the GPU (if available)
 print(f"Using device: {device}")
@@ -80,10 +78,7 @@ model = AutoModelForCausalLM.from_pretrained(
     stage1_model,
     torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
-    )
-# to device, if gpu is available
-model.to(device)
-model.eval()
 codectool = CodecManipulator("xcodec", 0, 1)
 codectool_stage2 = CodecManipulator("xcodec", 0, 8)

 os.makedirs(stage2_output_dir, exist_ok=True)
 # load tokenizer and model
+device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
 # Now you can use `device` to move your tensors or models to the GPU (if available)
 print(f"Using device: {device}")
     stage1_model,
     torch_dtype=torch.bfloat16,
     attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
+    ).to(device).eval()
 codectool = CodecManipulator("xcodec", 0, 1)
 codectool_stage2 = CodecManipulator("xcodec", 0, 8)