# Copyright (2024) Tsinghua University, Bytedance Ltd. and/or its affiliates | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
model: | |
# paths | |
llama_path: "DeepSeek-R1-Distill-Qwen-1.5B/" | |
whisper_path: "distil-whisper/distil-large-v3/" | |
beats_path: "BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt" | |
ckpt: "tiny_all_tasks_319.pth" | |
freeze_whisper: True | |
freeze_beats: True | |
# window-level Q-Former | |
use_speech_Qformer: True | |
freeze_speech_QFormer: False | |
window_level_Qformer: True | |
num_speech_query_token: 1 | |
second_per_window: 0.333333 | |
second_stride: 0.333333 | |
speech_llama_proj_model: "" | |
freeze_speech_llama_proj: False | |
# LoRA | |
lora: True | |
lora_rank: 8 | |
lora_alpha: 32 | |
lora_dropout: 0.1 | |
multi_prompt: True | |
prompt_template: "USER: {}\nASSISTANT:" | |
prompt_path: "prompts/train_prompt.json" | |
test_prompt_path: "prompts/test_prompt.json" | |
max_txt_len: 300 | |
end_sym: "</s>" | |
generate: | |
max_new_tokens: 200 | |
num_beams: 4 | |
do_sample: False | |
min_length: 1 | |
temperature: 1.0 | |
top_p: 0.9 | |
repetition_penalty: 1.0 | |
length_penalty: 1.0 |