# Copyright (2024) Tsinghua University, Bytedance Ltd. and/or its affiliates # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. model: # paths llama_path: "DeepSeek-R1-Distill-Qwen-1.5B/" whisper_path: "distil-whisper/distil-large-v3/" beats_path: "BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt" ckpt: "tiny_all_tasks_319.pth" freeze_whisper: True freeze_beats: True # window-level Q-Former use_speech_Qformer: True freeze_speech_QFormer: False window_level_Qformer: True num_speech_query_token: 1 second_per_window: 0.333333 second_stride: 0.333333 speech_llama_proj_model: "" freeze_speech_llama_proj: False # LoRA lora: True lora_rank: 8 lora_alpha: 32 lora_dropout: 0.1 multi_prompt: True prompt_template: "USER: {}\nASSISTANT:" prompt_path: "prompts/train_prompt.json" test_prompt_path: "prompts/test_prompt.json" max_txt_len: 300 end_sym: "" generate: max_new_tokens: 200 num_beams: 4 do_sample: False min_length: 1 temperature: 1.0 top_p: 0.9 repetition_penalty: 1.0 length_penalty: 1.0