Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -49,7 +49,7 @@ device = ( | |
| 49 | 
             
            if device == "cuda":
         | 
| 50 | 
             
                dtype = torch.float16
         | 
| 51 | 
             
            elif device == "cpu":
         | 
| 52 | 
            -
                dtype = torch. | 
| 53 | 
             
            else:
         | 
| 54 | 
             
                dtype = torch.float32
         | 
| 55 |  | 
| @@ -61,7 +61,7 @@ print(f"Using device: {device}, dtype: {dtype}") | |
| 61 | 
             
            pipe = pipeline(
         | 
| 62 | 
             
                "automatic-speech-recognition",
         | 
| 63 | 
             
                model="openai/whisper-large-v3-turbo",
         | 
| 64 | 
            -
                torch_dtype=torch. | 
| 65 | 
             
                device=device,
         | 
| 66 | 
             
            )
         | 
| 67 | 
             
            #vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
         | 
| @@ -242,7 +242,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, | |
| 242 | 
             
                    generated_waves.append(generated_wave)
         | 
| 243 | 
             
             #       spectrograms.append(generated_mel_spec[0].cpu().numpy())
         | 
| 244 | 
             
            # Ensure generated_mel_spec is in a compatible dtype (e.g., float32) before passing it to numpy
         | 
| 245 | 
            -
                    generated_mel_spec = generated_mel_spec.to(dtype=torch.float32)  # Convert to float32 if it's in bfloat16
         | 
| 246 |  | 
| 247 | 
             
            # Proceed with the rest of your operations
         | 
| 248 | 
             
                    spectrograms.append(generated_mel_spec[0].cpu().numpy())
         | 
|  | |
| 49 | 
             
            if device == "cuda":
         | 
| 50 | 
             
                dtype = torch.float16
         | 
| 51 | 
             
            elif device == "cpu":
         | 
| 52 | 
            +
                dtype = torch.float32
         | 
| 53 | 
             
            else:
         | 
| 54 | 
             
                dtype = torch.float32
         | 
| 55 |  | 
|  | |
| 61 | 
             
            pipe = pipeline(
         | 
| 62 | 
             
                "automatic-speech-recognition",
         | 
| 63 | 
             
                model="openai/whisper-large-v3-turbo",
         | 
| 64 | 
            +
                torch_dtype=torch.float32,
         | 
| 65 | 
             
                device=device,
         | 
| 66 | 
             
            )
         | 
| 67 | 
             
            #vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
         | 
|  | |
| 242 | 
             
                    generated_waves.append(generated_wave)
         | 
| 243 | 
             
             #       spectrograms.append(generated_mel_spec[0].cpu().numpy())
         | 
| 244 | 
             
            # Ensure generated_mel_spec is in a compatible dtype (e.g., float32) before passing it to numpy
         | 
| 245 | 
            +
            #        generated_mel_spec = generated_mel_spec.to(dtype=torch.float32)  # Convert to float32 if it's in bfloat16
         | 
| 246 |  | 
| 247 | 
             
            # Proceed with the rest of your operations
         | 
| 248 | 
             
                    spectrograms.append(generated_mel_spec[0].cpu().numpy())
         |