Audio-Text-to-Text
Transformers
Safetensors
qwen2_audio
text2text-generation
Inference Endpoints
jimbozhang commited on
Commit
cefa606
·
verified ·
1 Parent(s): b74b317

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -1
README.md CHANGED
@@ -30,7 +30,7 @@ wav_path = "test-mini-audios/3fe64f3d-282c-4bc8-a753-68f8f6c35652.wav" # from M
30
  waveform, _ = torchaudio.load(wav_path) # 16KHz
31
  audios = [waveform[0].numpy()]
32
 
33
- # Make prompt
34
  question = "Based on the given audio, identify the source of the speaking voice."
35
  options = ["Man", "Woman", "Child", "Robot"]
36
  prompt = f"{question} Please choose the answer from the following options: {str(options)}. Output the final answer in <answer> </answer>."
@@ -40,6 +40,7 @@ message = [
40
  {"type": "text", "text": prompt}
41
  ]}
42
  ]
 
43
 
44
  # Process
45
  inputs = processor(text=texts, audios=audios, sampling_rate=16000, return_tensors="pt", padding=True).to(model.device)
 
30
  waveform, _ = torchaudio.load(wav_path) # 16KHz
31
  audios = [waveform[0].numpy()]
32
 
33
+ # Make prompt text
34
  question = "Based on the given audio, identify the source of the speaking voice."
35
  options = ["Man", "Woman", "Child", "Robot"]
36
  prompt = f"{question} Please choose the answer from the following options: {str(options)}. Output the final answer in <answer> </answer>."
 
40
  {"type": "text", "text": prompt}
41
  ]}
42
  ]
43
+ texts = processor.apply_chat_template(message, add_generation_prompt=True, tokenize=False)
44
 
45
  # Process
46
  inputs = processor(text=texts, audios=audios, sampling_rate=16000, return_tensors="pt", padding=True).to(model.device)