AlexHung29629 commited on
Commit
8769079
·
verified ·
1 Parent(s): c1a531e

Update ultravox_processing.py

Browse files
Files changed (1) hide show
  1. ultravox_processing.py +2 -2
ultravox_processing.py CHANGED
@@ -171,14 +171,14 @@ class UltravoxProcessor(transformers.ProcessorMixin):
171
 
172
  start_idx = len(
173
  self.tokenizer.encode(
174
- t[: t.index(self.audio_placeholder)],
175
  add_special_tokens=False,
176
  )
177
  )
178
  data["audio_token_start_idx"].append(start_idx)
179
 
180
  # Replace the audio placeholder with the audio token.
181
- # e.g. "Transcribe\n<|audio|>" -> "Transcribe </s></s></s></s></s></s></s></s>"
182
  # where the number of </s> is the number of audio frames.
183
  t = t.replace(
184
  self.audio_placeholder,
 
171
 
172
  start_idx = len(
173
  self.tokenizer.encode(
174
+ t.split(self.audio_placeholder)[0],
175
  add_special_tokens=False,
176
  )
177
  )
178
  data["audio_token_start_idx"].append(start_idx)
179
 
180
  # Replace the audio placeholder with the audio token.
181
+ # e.g. "Transcribe <|audio|>" -> "Transcribe </s></s></s></s></s></s></s></s>"
182
  # where the number of </s> is the number of audio frames.
183
  t = t.replace(
184
  self.audio_placeholder,