Update onnx/builder.py
Browse files- onnx/builder.py +2 -2
onnx/builder.py
CHANGED
@@ -118,8 +118,8 @@ def build_vision(args):
|
|
118 |
def build_speech(args):
|
119 |
# Speech file:
|
120 |
prompt = f"{user_prompt}<|audio_1|>\n<|audio_2|>\nWhat are the stories that these audios come from?{prompt_suffix}{assistant_prompt}"
|
121 |
-
audio1 = soundfile.read(os.path.join(args.input, "examples", "
|
122 |
-
audio2 = soundfile.read(os.path.join(args.input, "examples", "
|
123 |
inputs = processor(prompt, audios=[audio1, audio2], return_tensors="pt").to(args.execution_provider.replace("dml", "cuda"))
|
124 |
inputs["input_audio_embeds"] = inputs["input_audio_embeds"].to(args.precision)
|
125 |
|
|
|
118 |
def build_speech(args):
|
119 |
# Speech file:
|
120 |
prompt = f"{user_prompt}<|audio_1|>\n<|audio_2|>\nWhat are the stories that these audios come from?{prompt_suffix}{assistant_prompt}"
|
121 |
+
audio1 = soundfile.read(os.path.join(args.input, "examples", "what_is_the_traffic_sign_in_the_image.wav"))
|
122 |
+
audio2 = soundfile.read(os.path.join(args.input, "examples", "what_is_shown_in_this_image.wav"))
|
123 |
inputs = processor(prompt, audios=[audio1, audio2], return_tensors="pt").to(args.execution_provider.replace("dml", "cuda"))
|
124 |
inputs["input_audio_embeds"] = inputs["input_audio_embeds"].to(args.precision)
|
125 |
|