Spaces:
Running
on
Zero
Running
on
Zero
liuyang
commited on
Commit
·
5a14daf
1
Parent(s):
5b655f4
Update waveform handling in WhisperTranscriber to maintain channel dimension during embedding calculations. Adjust comments for clarity on input shape requirements.
Browse files
app.py
CHANGED
|
@@ -385,8 +385,8 @@ class WhisperTranscriber:
|
|
| 385 |
speaker_embeddings = {}
|
| 386 |
try:
|
| 387 |
embedder = self._load_embedder()
|
| 388 |
-
# waveform
|
| 389 |
-
emb = embedder({"waveform": waveform
|
| 390 |
speaker_embeddings["SPEAKER_00"] = emb.squeeze().tolist()
|
| 391 |
except Exception:
|
| 392 |
pass
|
|
@@ -431,7 +431,8 @@ class WhisperTranscriber:
|
|
| 431 |
start_sample = int(float(turn.start) * sample_rate)
|
| 432 |
end_sample = int(float(turn.end) * sample_rate)
|
| 433 |
if end_sample > start_sample:
|
| 434 |
-
|
|
|
|
| 435 |
emb = embedder({"waveform": seg_wav, "sample_rate": sample_rate})
|
| 436 |
spk_to_embs[speaker].append(emb.squeeze())
|
| 437 |
# average
|
|
|
|
| 385 |
speaker_embeddings = {}
|
| 386 |
try:
|
| 387 |
embedder = self._load_embedder()
|
| 388 |
+
# Provide waveform as (channel, time)
|
| 389 |
+
emb = embedder({"waveform": waveform, "sample_rate": sample_rate})
|
| 390 |
speaker_embeddings["SPEAKER_00"] = emb.squeeze().tolist()
|
| 391 |
except Exception:
|
| 392 |
pass
|
|
|
|
| 431 |
start_sample = int(float(turn.start) * sample_rate)
|
| 432 |
end_sample = int(float(turn.end) * sample_rate)
|
| 433 |
if end_sample > start_sample:
|
| 434 |
+
# Keep channel dimension: (channel, time)
|
| 435 |
+
seg_wav = waveform[:, start_sample:end_sample].contiguous()
|
| 436 |
emb = embedder({"waveform": seg_wav, "sample_rate": sample_rate})
|
| 437 |
spk_to_embs[speaker].append(emb.squeeze())
|
| 438 |
# average
|