Spaces:

jameshwade
/

axolotl-ui

Runtime error

App Files Files Community

jameshwadedow commited on Feb 6, 2024

Commit

4103281

1 Parent(s): 70ba9c2

[feat] tell user that button doesn't work

Browse files

Files changed (2) hide show

axolotl-config.md +411 -0
src/axolotl_ui/app.py +6 -2

axolotl-config.md CHANGED Viewed

	@@ -0,0 +1,411 @@

+# This is the huggingface model that contains *.pt, *.safetensors, or *.bin files
+# This can also be a relative path to a model on disk
+base_model: ./llama-7b-hf
+# You can specify an ignore pattern if the model repo contains more than 1 model type (*.pt, etc)
+base_model_ignore_patterns:
+# If the base_model repo on hf hub doesn't include configuration .json files,
+# You can set that here, or leave this empty to default to base_model
+base_model_config: ./llama-7b-hf
+# You can specify to choose a specific model revision from huggingface hub
+model_revision:
+# Optional tokenizer configuration override in case you want to use a different tokenizer
+# than the one defined in the base model
+tokenizer_config:
+# If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too
+model_type: AutoModelForCausalLM
+# Corresponding tokenizer for the model AutoTokenizer is a good choice
+tokenizer_type: AutoTokenizer
+# Trust remote code for untrusted source
+trust_remote_code:
+# use_fast option for tokenizer loading from_pretrained, default to True
+tokenizer_use_fast:
+# Whether to use the legacy tokenizer setting, defaults to True
+tokenizer_legacy:
+# Resize the model embeddings when new tokens are added to multiples of 32
+# This is reported to improve training speed on some models
+resize_token_embeddings_to_32x:
+# Used to identify which the model is based on
+is_falcon_derived_model:
+is_llama_derived_model:
+# Please note that if you set this to true, `padding_side` will be set to "left" by default
+is_mistral_derived_model:
+is_qwen_derived_model:
+# optional overrides to the base model configuration
+model_config:
+  # RoPE Scaling https://github.com/huggingface/transformers/pull/24653
+  rope_scaling:
+    type: # linear | dynamic
+    factor: # float
+# optional overrides to the bnb 4bit quantization configuration
+# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig
+bnb_config_kwargs:
+  # These are default values
+  llm_int8_has_fp16_weight: false
+  bnb_4bit_quant_type: nf4
+  bnb_4bit_use_double_quant: true
+# Whether you are training a 4-bit GPTQ quantized model
+gptq: true
+gptq_groupsize: 128 # group size
+gptq_model_v1: false # v1 or v2
+# This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
+load_in_8bit: true
+# Use bitsandbytes 4 bit
+load_in_4bit:
+# Use CUDA bf16
+bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere
+# Use CUDA fp16
+fp16: true
+# Use CUDA tf32
+tf32: true # require >=ampere
+# No AMP (automatic mixed precision)
+bfloat16: true # require >=ampere
+float16: true
+# Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset
+gpu_memory_limit: 20GiB
+# Do the LoRA/PEFT loading on CPU -- this is required if the base model is so large it takes up most or all of the available GPU VRAM, e.g. during a model and LoRA merge
+lora_on_cpu: true
+# A list of one or more datasets to finetune the model with
+datasets:
+  # HuggingFace dataset repo | s3://,gs:// path | "json" for local dataset, make sure to fill data_files
+  - path: vicgalle/alpaca-gpt4
+  # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection]
+    type: alpaca # format | format:<prompt_style> (chat/instruct) | <prompt_strategies>.load_<load_fn>
+    ds_type: # Optional[str] (json|arrow|parquet|text|csv) defines the datatype when path is a file
+    data_files: # Optional[str] path to source data files
+    shards: # Optional[int] number of shards to split data into
+    name: # Optional[str] name of dataset configuration to load
+    train_on_split: train # Optional[str] name of dataset split to load from
+    # Optional[str] fastchat conversation type, only used with type: sharegpt
+    conversation:  # Options (see Conversation 'name'): https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
+    field_human: # Optional[str]. Human key to use for conversation.
+    field_model: # Optional[str]. Assistant key to use for conversation.
+  # Custom user instruction prompt
+  - path: repo
+    type:
+      # The below are defaults. only set what's needed if you use a different column name.
+      system_prompt: ""
+      system_format: "{system}"
+      field_system: system
+      field_instruction: instruction
+      field_input: input
+      field_output: output
+      # Customizable to be single line or multi-line
+      # Use {instruction}/{input} as key to be replaced
+      # 'format' can include {input}
+      format: |-
+        User: {instruction} {input}
+        Assistant:
+      # 'no_input_format' cannot include {input}
+      no_input_format: "{instruction} "
+      # For `completion` datsets only, uses the provided field instead of `text` column
+      field:
+# A list of one or more datasets to eval the model with.
+# You can use either test_datasets, or val_set_size, but not both.
+test_datasets:
+  - path: /workspace/data/eval.jsonl
+    ds_type: json
+    # You need to specify a split. For "json" datasets the default split is called "train".
+    split: train
+    type: completion
+    data_files:
+      - /workspace/data/eval.jsonl
+# use RL training: dpo, ipo, kto_pair
+rl:
+# Saves the desired chat template to the tokenizer_config.json for easier inferencing
+# Currently supports chatml and inst (mistral/mixtral)
+chat_template: chatml
+# Changes the default system message
+default_system_message: You are a helpful assistant. Please give a long and detailed answer. # Currently only supports chatml.
+# Axolotl attempts to save the dataset as an arrow after packing the data together so
+# subsequent training attempts load faster, relative path
+dataset_prepared_path: data/last_run_prepared
+# Push prepared dataset to hub
+push_dataset_to_hub: # repo path
+# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()`
+# if not set.
+dataset_processes: # defaults to os.cpu_count() if not set
+# Keep dataset in memory while preprocessing
+# Only needed if cached dataset is taking too much storage
+dataset_keep_in_memory:
+# push checkpoints to hub
+hub_model_id: # repo path to push finetuned model
+# how to push checkpoints to hub
+# https://huggingface.co/docs/transformers/v4.31.0/en/main_classes/trainer#transformers.TrainingArguments.hub_strategy
+hub_strategy:
+# Whether to use hf `use_auth_token` for loading datasets. Useful for fetching private datasets
+# Required to be true when used in combination with `push_dataset_to_hub`
+hf_use_auth_token: # boolean
+# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc. 0 for no eval.
+val_set_size: 0.04
+# Num shards for whole dataset
+dataset_shard_num:
+# Index of shard to use for whole dataset
+dataset_shard_idx:
+# The maximum length of an input to train with, this should typically be less than 2048
+# as most models have a token/context limit of 2048
+sequence_len: 2048
+# Pad inputs so each step uses constant sized buffers
+# This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently
+pad_to_sequence_len:
+# Use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true'
+sample_packing:
+# Set to 'false' if getting errors during eval with sample_packing on.
+eval_sample_packing:
+# You can set these packing optimizations AFTER starting a training at least once.
+# The trainer will provide recommended values for these values.
+sample_packing_eff_est:
+total_num_tokens:
+# Passed through to transformers when loading the model when launched without accelerate
+# Use `sequential` when training w/ model parallelism to limit memory
+device_map:
+# Defines the max memory usage per gpu on the system. Passed through to transformers when loading the model.
+max_memory:
+# If you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
+adapter: lora
+# If you already have a lora model trained that you want to load, put that here.
+# This means after training, if you want to test the model, you should set this to the value of `output_dir`.
+# Note that if you merge an adapter to the base model, a new subdirectory `merged` will be created under the `output_dir`.
+lora_model_dir:
+# LoRA hyperparameters
+# For more details about the following options, see:
+# https://www.anyscale.com/blog/fine-tuning-llms-lora-or-full-parameter-an-in-depth-analysis-with-llama-2
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+  - q_proj
+  - v_proj
+#  - k_proj
+#  - o_proj
+#  - gate_proj
+#  - down_proj
+#  - up_proj
+lora_target_linear: # If true, will target all linear modules
+peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers
+# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
+# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
+# `embed_tokens` converts tokens to embeddings, and `lm_head` converts embeddings to token probabilities.
+# https://github.com/huggingface/peft/issues/334#issuecomment-1561727994
+lora_modules_to_save:
+#  - embed_tokens
+#  - lm_head
+lora_fan_in_fan_out: false
+peft:
+  # Configuration options for loftq initialization for LoRA
+  # https://huggingface.co/docs/peft/developer_guides/quantization#loftq-initialization
+  loftq_config:
+    loftq_bits:  # typically 4 bits
+# ReLoRA configuration
+# Must use either 'lora' or 'qlora' adapter, and does not support fsdp or deepspeed
+relora_steps: # Number of steps per ReLoRA restart
+relora_warmup_steps: # Number of per-restart warmup steps
+relora_cpu_offload: # True to perform lora weight merges on cpu during restarts, for modest gpu memory savings
+# wandb configuration if you're using it
+# Make sure your `WANDB_API_KEY` environment variable is set (recommended) or you login to wandb with `wandb login`.
+wandb_mode: # "offline" to save run metadata locally and not sync to the server, "disabled" to turn off wandb
+wandb_project: # Your wandb project name
+wandb_entity: # A wandb Team name if using a Team
+wandb_watch:
+wandb_name: # Set the name of your wandb run
+wandb_run_id: # Set the ID of your wandb run
+wandb_log_model: # "checkpoint" to log model to wandb Artifacts every `save_steps` or "end" to log only at the end of training
+# mlflow configuration if you're using it
+mlflow_tracking_uri: # URI to mlflow
+mlflow_experiment_name: # Your experiment name
+# Where to save the full-finetuned model to
+output_dir: ./completed-model
+# Whether to use torch.compile and which backend to use
+torch_compile:  # bool
+torch_compile_backend:  # Optional[str]
+# Training hyperparameters
+# If greater than 1, backpropagation will be skipped and the gradients will be accumulated for the given number of steps.
+gradient_accumulation_steps: 1
+# The number of samples to include in each batch. This is the number of samples sent to each GPU.
+micro_batch_size: 2
+eval_batch_size:
+num_epochs: 4
+warmup_steps: 100  # cannot use with warmup_ratio
+warmup_ratio: 0.05  # cannot use with warmup_steps
+learning_rate: 0.00003
+lr_quadratic_warmup:
+logging_steps:
+eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps
+evals_per_epoch: # number of times per epoch to run evals, mutually exclusive with eval_steps
+save_strategy: # Set to `no` to skip checkpoint saves
+save_steps: # Leave empty to save at each epoch
+saves_per_epoch: # number of times per epoch to save a checkpoint, mutually exclusive with save_steps
+save_total_limit: # Checkpoints saved at a time
+# Maximum number of iterations to train for. It precedes num_epochs which means that
+# if both are set, num_epochs will not be guaranteed.
+# e.g., when 1 epoch is 1000 steps => `num_epochs: 2` and `max_steps: 100` will train for 100 steps
+max_steps:
+eval_table_size: # Approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0
+eval_table_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
+loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
+loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
+# Save model as safetensors (require safetensors package)
+save_safetensors:
+# Whether to mask out or include the human's prompt from the training labels
+train_on_inputs: false
+# Group similarly sized data to minimize padding.
+# May be slower to start, as it must download and sort the entire dataset.
+# Note that training loss may have an oscillating pattern with this enabled.
+group_by_length: false
+# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
+gradient_checkpointing: false
+# additional kwargs to pass to the trainer for gradient checkpointing
+# gradient_checkpointing_kwargs:
+#   use_reentrant: false
+# Stop training after this many evaluation losses have increased in a row
+# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
+early_stopping_patience: 3
+# Specify a scheduler and kwargs to use with the optimizer
+lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine
+lr_scheduler_kwargs:
+cosine_min_lr_ratio: # decay lr to some percentage of the peak lr, e.g. cosine_min_lr_ratio=0.1 for 10% of peak lr
+# For one_cycle optim
+lr_div_factor: # Learning rate div factor
+# For log_sweep optim
+log_sweep_min_lr:
+log_sweep_max_lr:
+# Specify optimizer
+# Valid values are driven by the Transformers OptimizerNames class, see:
+# https://github.com/huggingface/transformers/blob/95b374952dc27d8511541d6f5a4e22c9ec11fb24/src/transformers/training_args.py#L134
+#
+# Note that not all optimizers may be available in your environment, ex: 'adamw_anyprecision' is part of
+# torchdistx, 'adamw_bnb_8bit' is part of bnb.optim.Adam8bit, etc. When in doubt, it is recommended to start with the optimizer used
+# in the examples/ for your model and fine-tuning use case.
+#
+# Valid values for 'optimizer' include:
+# - adamw_hf
+# - adamw_torch
+# - adamw_torch_fused
+# - adamw_torch_xla
+# - adamw_apex_fused
+# - adafactor
+# - adamw_anyprecision
+# - sgd
+# - adagrad
+# - adamw_bnb_8bit
+# - lion_8bit
+# - lion_32bit
+# - paged_adamw_32bit
+# - paged_adamw_8bit
+# - paged_lion_32bit
+# - paged_lion_8bit
+optimizer:
+# Specify weight decay
+weight_decay:
+# adamw hyperparams
+adam_beta1:
+adam_beta2:
+adam_epsilon:
+# Gradient clipping max norm
+max_grad_norm:
+# Augmentation techniques
+# NEFT https://arxiv.org/abs/2310.05914, set this to a number (paper default is 5) to add noise to embeddings
+# currently only supported on Llama and Mistral
+neftune_noise_alpha:
+# Whether to bettertransformers
+flash_optimum:
+# Whether to use xformers attention patch https://github.com/facebookresearch/xformers:
+xformers_attention:
+# Whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
+flash_attention:
+flash_attn_cross_entropy:  # Whether to use flash-attention cross entropy implementation - advanced use only
+flash_attn_rms_norm:  # Whether to use flash-attention rms norm implementation - advanced use only
+flash_attn_fuse_qkv: # Whether to fuse QKV into a single operation
+flash_attn_fuse_mlp: # Whether to fuse part of the MLP into a single operation
+# Whether to use scaled-dot-product attention
+# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
+sdp_attention:
+# Shifted-sparse attention (only llama) - https://arxiv.org/pdf/2309.12307.pdf
+s2_attention:
+# Resume from a specific checkpoint dir
+resume_from_checkpoint:
+# If resume_from_checkpoint isn't set and you simply want it to start where it left off.
+# Be careful with this being turned on between different models.
+auto_resume_from_checkpoints: false
+# Don't mess with this, it's here for accelerate and torchrun
+local_rank:
+# Add or change special tokens.
+# If you add tokens here, you don't need to add them to the `tokens` list.
+special_tokens:
+  # bos_token: "<s>"
+  # eos_token: "</s>"
+  # unk_token: "<unk>"
+# Add extra tokens.
+tokens:
+# FSDP
+fsdp:
+fsdp_config:
+# Deepspeed config path. e.g., deepspeed_configs/zero3.json
+deepspeed:
+# Advanced DDP Arguments
+ddp_timeout:
+ddp_bucket_cap_mb:
+ddp_broadcast_buffers:
+# Path to torch distx for optim 'adamw_anyprecision'
+torchdistx_path:
+# Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize
+pretraining_dataset:
+# Debug mode
+debug:
+# Seed
+seed:
+# Allow overwrite yml config using from cli
+strict:

src/axolotl_ui/app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from pathlib import Path
-from shiny import App, Inputs, Outputs, Session, ui
 import shinyswatch
 from htmltools import HTML
@@ -103,7 +103,11 @@ app_ui = ui.page_fillable(
 def server(input: Inputs, output: Outputs, session: Session):
-    return ()
 app = App(

 from pathlib import Path
+from shiny import App, Inputs, Outputs, Session, ui, reactive
 import shinyswatch
 from htmltools import HTML
 def server(input: Inputs, output: Outputs, session: Session):
+    @reactive.Effect
+    @reactive.event(input.create_space)
+    def _():
+        ui.notification_show("This is not yet implemented.", type="warning")
 app = App(