Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

App Files Files Community

Gregniuki commited on Nov 27, 2024

Commit

a591b69

verified ·

1 Parent(s): ef99879

Update infer/utils_infer.py

Browse files

Files changed (1) hide show

infer/utils_infer.py +6 -6

infer/utils_infer.py CHANGED Viewed

@@ -116,7 +116,7 @@ def load_vocoder(vocoder_name="vocos", is_local=False, local_path="", device=dev
         vocoder.load_state_dict(state_dict)
         # Convert vocoder to bfloat16 if using a compatible device
-        vocoder = vocoder.eval().to(device).to(torch.bfloat16)
     elif vocoder_name == "bigvgan":
         try:
@@ -132,7 +132,7 @@ def load_vocoder(vocoder_name="vocos", is_local=False, local_path="", device=dev
             vocoder = bigvgan.BigVGAN.from_pretrained(local_path, use_cuda_kernel=False)
         vocoder.remove_weight_norm()
-        vocoder = vocoder.eval().to(device).to(torch.bfloat16)  # Convert to bfloat16
     return vocoder
@@ -147,7 +147,7 @@ def initialize_asr_pipeline(device: str = device, dtype=None):
         if "cuda" in device and torch.cuda.get_device_properties(device).major >= 6:
             dtype = torch.float16
         elif "cpu" in device:
-            dtype = torch.bfloat16
         else:
             dtype = torch.float32
@@ -185,7 +185,7 @@ def load_checkpoint(model, ckpt_path, device: str, dtype=None, use_ema=True):
         if "cuda" in device and torch.cuda.get_device_properties(device).major >= 6:
             dtype = torch.float16
         elif "cpu" in device:
-            dtype = torch.bfloat16
         else:
             dtype = torch.float32
@@ -265,7 +265,7 @@ def load_model(
         vocab_char_map=vocab_char_map,
     ).to(device)
-    dtype = torch.bfloat16 if mel_spec_type == "bigvgan" else None
     model = load_checkpoint(model, ckpt_path, device, dtype=dtype, use_ema=use_ema)
     return model
@@ -471,7 +471,7 @@ def infer_batch_process(
                 sway_sampling_coef=sway_sampling_coef,
             )
-            generated = generated.to(torch.bfloat16)
             generated = generated[:, ref_audio_len:, :]
             generated_mel_spec = generated.permute(0, 2, 1)
             if mel_spec_type == "vocos":

         vocoder.load_state_dict(state_dict)
         # Convert vocoder to bfloat16 if using a compatible device
+        vocoder = vocoder.eval().to(device).to(torch.float32)
     elif vocoder_name == "bigvgan":
         try:
             vocoder = bigvgan.BigVGAN.from_pretrained(local_path, use_cuda_kernel=False)
         vocoder.remove_weight_norm()
+        vocoder = vocoder.eval().to(device).to(torch.float32)  # Convert to bfloat16
     return vocoder
         if "cuda" in device and torch.cuda.get_device_properties(device).major >= 6:
             dtype = torch.float16
         elif "cpu" in device:
+            dtype = torch.float32
         else:
             dtype = torch.float32
         if "cuda" in device and torch.cuda.get_device_properties(device).major >= 6:
             dtype = torch.float16
         elif "cpu" in device:
+            dtype = torch.float32
         else:
             dtype = torch.float32
         vocab_char_map=vocab_char_map,
     ).to(device)
+    dtype = torch.float32 if mel_spec_type == "bigvgan" else None
     model = load_checkpoint(model, ckpt_path, device, dtype=dtype, use_ema=use_ema)
     return model
                 sway_sampling_coef=sway_sampling_coef,
             )
+            generated = generated.to(torch.float32)
             generated = generated[:, ref_audio_len:, :]
             generated_mel_spec = generated.permute(0, 2, 1)
             if mel_spec_type == "vocos":