Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	Update inference/infer.py
Browse files- inference/infer.py +2 -7
    	
        inference/infer.py
    CHANGED
    
    | @@ -68,10 +68,8 @@ os.makedirs(stage1_output_dir, exist_ok=True) | |
| 68 | 
             
            os.makedirs(stage2_output_dir, exist_ok=True)
         | 
| 69 |  | 
| 70 | 
             
            # load tokenizer and model
         | 
| 71 | 
            -
             | 
| 72 |  | 
| 73 | 
            -
            # Check if CUDA is available
         | 
| 74 | 
            -
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 75 | 
             
            # Now you can use `device` to move your tensors or models to the GPU (if available)
         | 
| 76 | 
             
            print(f"Using device: {device}")
         | 
| 77 |  | 
| @@ -80,10 +78,7 @@ model = AutoModelForCausalLM.from_pretrained( | |
| 80 | 
             
                stage1_model, 
         | 
| 81 | 
             
                torch_dtype=torch.bfloat16,
         | 
| 82 | 
             
                attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
         | 
| 83 | 
            -
                )
         | 
| 84 | 
            -
            # to device, if gpu is available
         | 
| 85 | 
            -
            model.to(device)
         | 
| 86 | 
            -
            model.eval()
         | 
| 87 |  | 
| 88 | 
             
            codectool = CodecManipulator("xcodec", 0, 1)
         | 
| 89 | 
             
            codectool_stage2 = CodecManipulator("xcodec", 0, 8)
         | 
|  | |
| 68 | 
             
            os.makedirs(stage2_output_dir, exist_ok=True)
         | 
| 69 |  | 
| 70 | 
             
            # load tokenizer and model
         | 
| 71 | 
            +
            device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
         | 
| 72 |  | 
|  | |
|  | |
| 73 | 
             
            # Now you can use `device` to move your tensors or models to the GPU (if available)
         | 
| 74 | 
             
            print(f"Using device: {device}")
         | 
| 75 |  | 
|  | |
| 78 | 
             
                stage1_model, 
         | 
| 79 | 
             
                torch_dtype=torch.bfloat16,
         | 
| 80 | 
             
                attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
         | 
| 81 | 
            +
                ).to(device).eval()
         | 
|  | |
|  | |
|  | |
| 82 |  | 
| 83 | 
             
            codectool = CodecManipulator("xcodec", 0, 1)
         | 
| 84 | 
             
            codectool_stage2 = CodecManipulator("xcodec", 0, 8)
         | 
