trained 8 rank LoRA finetuned model, saving train and validation datasets to disk
Browse files- fine-tuned-model-8-diff/checkpoint-236/README.md +202 -0
- fine-tuned-model-8-diff/checkpoint-236/adapter_config.json +39 -0
- fine-tuned-model-8-diff/checkpoint-236/adapter_model.safetensors +3 -0
- fine-tuned-model-8-diff/checkpoint-236/optimizer.pt +3 -0
- fine-tuned-model-8-diff/checkpoint-236/rng_state.pth +3 -0
- fine-tuned-model-8-diff/checkpoint-236/scheduler.pt +3 -0
- fine-tuned-model-8-diff/checkpoint-236/special_tokens_map.json +26 -0
- fine-tuned-model-8-diff/checkpoint-236/tokenizer.json +0 -0
- fine-tuned-model-8-diff/checkpoint-236/tokenizer_config.json +206 -0
- fine-tuned-model-8-diff/checkpoint-236/trainer_state.json +103 -0
- fine-tuned-model-8-diff/checkpoint-236/training_args.bin +3 -0
- fine-tuned-model-8-diff/checkpoint-290/README.md +202 -0
- fine-tuned-model-8-diff/checkpoint-290/adapter_config.json +39 -0
- fine-tuned-model-8-diff/checkpoint-290/adapter_model.safetensors +3 -0
- fine-tuned-model-8-diff/checkpoint-290/optimizer.pt +3 -0
- fine-tuned-model-8-diff/checkpoint-290/rng_state.pth +3 -0
- fine-tuned-model-8-diff/checkpoint-290/scheduler.pt +3 -0
- fine-tuned-model-8-diff/checkpoint-290/special_tokens_map.json +26 -0
- fine-tuned-model-8-diff/checkpoint-290/tokenizer.json +0 -0
- fine-tuned-model-8-diff/checkpoint-290/tokenizer_config.json +206 -0
- fine-tuned-model-8-diff/checkpoint-290/trainer_state.json +118 -0
- fine-tuned-model-8-diff/checkpoint-290/training_args.bin +3 -0
- fine-tuned-model-8-diff/config.json +48 -0
- fine-tuned-model-8-diff/generation_config.json +6 -0
- fine-tuned-model-8-diff/model.safetensors +3 -0
- fine-tuned-model-8-diff/runs/Apr07_12-28-50_DESKTOP-SMJC97K/events.out.tfevents.1744054130.DESKTOP-SMJC97K.14268.0 +3 -0
- fine-tuned-model-8-diff/runs/Apr07_12-37-48_DESKTOP-SMJC97K/events.out.tfevents.1744054670.DESKTOP-SMJC97K.14268.1 +3 -0
- fine-tuned-model-8-diff/runs/Apr07_12-48-05_DESKTOP-SMJC97K/events.out.tfevents.1744055285.DESKTOP-SMJC97K.21244.0 +3 -0
- fine-tuned-model-8-diff/special_tokens_map.json +26 -0
- fine-tuned-model-8-diff/tokenizer.json +0 -0
- fine-tuned-model-8-diff/tokenizer_config.json +206 -0
- finetune_model.ipynb +185 -83
- train.hf/data-00000-of-00001.arrow +3 -0
- train.hf/dataset_info.json +29 -0
- train.hf/state.json +13 -0
- val.hf/data-00000-of-00001.arrow +3 -0
- val.hf/dataset_info.json +29 -0
- val.hf/state.json +13 -0
    	
        fine-tuned-model-8-diff/checkpoint-236/README.md
    ADDED
    
    | @@ -0,0 +1,202 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            base_model: ./deepseek-coder-1.3b-instruct
         | 
| 3 | 
            +
            library_name: peft
         | 
| 4 | 
            +
            ---
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Model Card for Model ID
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            <!-- Provide a quick summary of what the model is/does. -->
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            ## Model Details
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            ### Model Description
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            <!-- Provide a longer summary of what this model is. -->
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            - **Developed by:** [More Information Needed]
         | 
| 21 | 
            +
            - **Funded by [optional]:** [More Information Needed]
         | 
| 22 | 
            +
            - **Shared by [optional]:** [More Information Needed]
         | 
| 23 | 
            +
            - **Model type:** [More Information Needed]
         | 
| 24 | 
            +
            - **Language(s) (NLP):** [More Information Needed]
         | 
| 25 | 
            +
            - **License:** [More Information Needed]
         | 
| 26 | 
            +
            - **Finetuned from model [optional]:** [More Information Needed]
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ### Model Sources [optional]
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            <!-- Provide the basic links for the model. -->
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            - **Repository:** [More Information Needed]
         | 
| 33 | 
            +
            - **Paper [optional]:** [More Information Needed]
         | 
| 34 | 
            +
            - **Demo [optional]:** [More Information Needed]
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            ## Uses
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            ### Direct Use
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            [More Information Needed]
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            ### Downstream Use [optional]
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            [More Information Needed]
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            ### Out-of-Scope Use
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            [More Information Needed]
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            ## Bias, Risks, and Limitations
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            <!-- This section is meant to convey both technical and sociotechnical limitations. -->
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            [More Information Needed]
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            ### Recommendations
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            ## How to Get Started with the Model
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            Use the code below to get started with the model.
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            [More Information Needed]
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            ## Training Details
         | 
| 77 | 
            +
             | 
| 78 | 
            +
            ### Training Data
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            [More Information Needed]
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            ### Training Procedure
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            #### Preprocessing [optional]
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            [More Information Needed]
         | 
| 91 | 
            +
             | 
| 92 | 
            +
             | 
| 93 | 
            +
            #### Training Hyperparameters
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            #### Speeds, Sizes, Times [optional]
         | 
| 98 | 
            +
             | 
| 99 | 
            +
            <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            [More Information Needed]
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            ## Evaluation
         | 
| 104 | 
            +
             | 
| 105 | 
            +
            <!-- This section describes the evaluation protocols and provides the results. -->
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            ### Testing Data, Factors & Metrics
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            #### Testing Data
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            <!-- This should link to a Dataset Card if possible. -->
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            [More Information Needed]
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            #### Factors
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            [More Information Needed]
         | 
| 120 | 
            +
             | 
| 121 | 
            +
            #### Metrics
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            <!-- These are the evaluation metrics being used, ideally with a description of why. -->
         | 
| 124 | 
            +
             | 
| 125 | 
            +
            [More Information Needed]
         | 
| 126 | 
            +
             | 
| 127 | 
            +
            ### Results
         | 
| 128 | 
            +
             | 
| 129 | 
            +
            [More Information Needed]
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            #### Summary
         | 
| 132 | 
            +
             | 
| 133 | 
            +
             | 
| 134 | 
            +
             | 
| 135 | 
            +
            ## Model Examination [optional]
         | 
| 136 | 
            +
             | 
| 137 | 
            +
            <!-- Relevant interpretability work for the model goes here -->
         | 
| 138 | 
            +
             | 
| 139 | 
            +
            [More Information Needed]
         | 
| 140 | 
            +
             | 
| 141 | 
            +
            ## Environmental Impact
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
         | 
| 144 | 
            +
             | 
| 145 | 
            +
            Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
         | 
| 146 | 
            +
             | 
| 147 | 
            +
            - **Hardware Type:** [More Information Needed]
         | 
| 148 | 
            +
            - **Hours used:** [More Information Needed]
         | 
| 149 | 
            +
            - **Cloud Provider:** [More Information Needed]
         | 
| 150 | 
            +
            - **Compute Region:** [More Information Needed]
         | 
| 151 | 
            +
            - **Carbon Emitted:** [More Information Needed]
         | 
| 152 | 
            +
             | 
| 153 | 
            +
            ## Technical Specifications [optional]
         | 
| 154 | 
            +
             | 
| 155 | 
            +
            ### Model Architecture and Objective
         | 
| 156 | 
            +
             | 
| 157 | 
            +
            [More Information Needed]
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            ### Compute Infrastructure
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            [More Information Needed]
         | 
| 162 | 
            +
             | 
| 163 | 
            +
            #### Hardware
         | 
| 164 | 
            +
             | 
| 165 | 
            +
            [More Information Needed]
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            #### Software
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            [More Information Needed]
         | 
| 170 | 
            +
             | 
| 171 | 
            +
            ## Citation [optional]
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
         | 
| 174 | 
            +
             | 
| 175 | 
            +
            **BibTeX:**
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            [More Information Needed]
         | 
| 178 | 
            +
             | 
| 179 | 
            +
            **APA:**
         | 
| 180 | 
            +
             | 
| 181 | 
            +
            [More Information Needed]
         | 
| 182 | 
            +
             | 
| 183 | 
            +
            ## Glossary [optional]
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
         | 
| 186 | 
            +
             | 
| 187 | 
            +
            [More Information Needed]
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            ## More Information [optional]
         | 
| 190 | 
            +
             | 
| 191 | 
            +
            [More Information Needed]
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            ## Model Card Authors [optional]
         | 
| 194 | 
            +
             | 
| 195 | 
            +
            [More Information Needed]
         | 
| 196 | 
            +
             | 
| 197 | 
            +
            ## Model Card Contact
         | 
| 198 | 
            +
             | 
| 199 | 
            +
            [More Information Needed]
         | 
| 200 | 
            +
            ### Framework versions
         | 
| 201 | 
            +
             | 
| 202 | 
            +
            - PEFT 0.15.1
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/adapter_config.json
    ADDED
    
    | @@ -0,0 +1,39 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "alpha_pattern": {},
         | 
| 3 | 
            +
              "auto_mapping": null,
         | 
| 4 | 
            +
              "base_model_name_or_path": "./deepseek-coder-1.3b-instruct",
         | 
| 5 | 
            +
              "bias": "none",
         | 
| 6 | 
            +
              "corda_config": null,
         | 
| 7 | 
            +
              "eva_config": null,
         | 
| 8 | 
            +
              "exclude_modules": null,
         | 
| 9 | 
            +
              "fan_in_fan_out": false,
         | 
| 10 | 
            +
              "inference_mode": true,
         | 
| 11 | 
            +
              "init_lora_weights": true,
         | 
| 12 | 
            +
              "layer_replication": null,
         | 
| 13 | 
            +
              "layers_pattern": null,
         | 
| 14 | 
            +
              "layers_to_transform": null,
         | 
| 15 | 
            +
              "loftq_config": {},
         | 
| 16 | 
            +
              "lora_alpha": 16,
         | 
| 17 | 
            +
              "lora_bias": false,
         | 
| 18 | 
            +
              "lora_dropout": 0.0,
         | 
| 19 | 
            +
              "megatron_config": null,
         | 
| 20 | 
            +
              "megatron_core": "megatron.core",
         | 
| 21 | 
            +
              "modules_to_save": null,
         | 
| 22 | 
            +
              "peft_type": "LORA",
         | 
| 23 | 
            +
              "r": 8,
         | 
| 24 | 
            +
              "rank_pattern": {},
         | 
| 25 | 
            +
              "revision": null,
         | 
| 26 | 
            +
              "target_modules": [
         | 
| 27 | 
            +
                "down_proj",
         | 
| 28 | 
            +
                "up_proj",
         | 
| 29 | 
            +
                "q_proj",
         | 
| 30 | 
            +
                "k_proj",
         | 
| 31 | 
            +
                "v_proj",
         | 
| 32 | 
            +
                "gate_proj",
         | 
| 33 | 
            +
                "o_proj"
         | 
| 34 | 
            +
              ],
         | 
| 35 | 
            +
              "task_type": "CAUSAL_LM",
         | 
| 36 | 
            +
              "trainable_token_indices": null,
         | 
| 37 | 
            +
              "use_dora": false,
         | 
| 38 | 
            +
              "use_rslora": false
         | 
| 39 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/adapter_model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:2960b053781abde9adf0f7fd24ff3034e0d46cabd373ef3998661cffaf289621
         | 
| 3 | 
            +
            size 292359512
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/optimizer.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:a7cfffc88492450fca8c1189f0eaa5b34b4a7ac1ab3f8a4b6c4eba7cae328d81
         | 
| 3 | 
            +
            size 60247362
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/rng_state.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ff39c56748cc5e85691d62bfd6a79e60ab92c1f59b698543eba03446c493cd5c
         | 
| 3 | 
            +
            size 14244
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/scheduler.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:467bdec2256dc387dcffd097dcd6b36f5d2302a72af9989678f2dab1c975a245
         | 
| 3 | 
            +
            size 1064
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,26 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                {
         | 
| 4 | 
            +
                  "content": "<|endofsql|>",
         | 
| 5 | 
            +
                  "lstrip": false,
         | 
| 6 | 
            +
                  "normalized": false,
         | 
| 7 | 
            +
                  "rstrip": false,
         | 
| 8 | 
            +
                  "single_word": false
         | 
| 9 | 
            +
                }
         | 
| 10 | 
            +
              ],
         | 
| 11 | 
            +
              "bos_token": {
         | 
| 12 | 
            +
                "content": "<|begin▁of▁sentence|>",
         | 
| 13 | 
            +
                "lstrip": false,
         | 
| 14 | 
            +
                "normalized": true,
         | 
| 15 | 
            +
                "rstrip": false,
         | 
| 16 | 
            +
                "single_word": false
         | 
| 17 | 
            +
              },
         | 
| 18 | 
            +
              "eos_token": "<|endofsql|>",
         | 
| 19 | 
            +
              "pad_token": {
         | 
| 20 | 
            +
                "content": "<|end▁of▁sentence|>",
         | 
| 21 | 
            +
                "lstrip": false,
         | 
| 22 | 
            +
                "normalized": true,
         | 
| 23 | 
            +
                "rstrip": false,
         | 
| 24 | 
            +
                "single_word": false
         | 
| 25 | 
            +
              }
         | 
| 26 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        fine-tuned-model-8-diff/checkpoint-236/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,206 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_bos_token": true,
         | 
| 3 | 
            +
              "add_eos_token": false,
         | 
| 4 | 
            +
              "add_prefix_space": null,
         | 
| 5 | 
            +
              "added_tokens_decoder": {
         | 
| 6 | 
            +
                "32000": {
         | 
| 7 | 
            +
                  "content": "õ",
         | 
| 8 | 
            +
                  "lstrip": false,
         | 
| 9 | 
            +
                  "normalized": true,
         | 
| 10 | 
            +
                  "rstrip": false,
         | 
| 11 | 
            +
                  "single_word": false,
         | 
| 12 | 
            +
                  "special": false
         | 
| 13 | 
            +
                },
         | 
| 14 | 
            +
                "32001": {
         | 
| 15 | 
            +
                  "content": "÷",
         | 
| 16 | 
            +
                  "lstrip": false,
         | 
| 17 | 
            +
                  "normalized": true,
         | 
| 18 | 
            +
                  "rstrip": false,
         | 
| 19 | 
            +
                  "single_word": false,
         | 
| 20 | 
            +
                  "special": false
         | 
| 21 | 
            +
                },
         | 
| 22 | 
            +
                "32002": {
         | 
| 23 | 
            +
                  "content": "Á",
         | 
| 24 | 
            +
                  "lstrip": false,
         | 
| 25 | 
            +
                  "normalized": true,
         | 
| 26 | 
            +
                  "rstrip": false,
         | 
| 27 | 
            +
                  "single_word": false,
         | 
| 28 | 
            +
                  "special": false
         | 
| 29 | 
            +
                },
         | 
| 30 | 
            +
                "32003": {
         | 
| 31 | 
            +
                  "content": "ý",
         | 
| 32 | 
            +
                  "lstrip": false,
         | 
| 33 | 
            +
                  "normalized": true,
         | 
| 34 | 
            +
                  "rstrip": false,
         | 
| 35 | 
            +
                  "single_word": false,
         | 
| 36 | 
            +
                  "special": false
         | 
| 37 | 
            +
                },
         | 
| 38 | 
            +
                "32004": {
         | 
| 39 | 
            +
                  "content": "À",
         | 
| 40 | 
            +
                  "lstrip": false,
         | 
| 41 | 
            +
                  "normalized": true,
         | 
| 42 | 
            +
                  "rstrip": false,
         | 
| 43 | 
            +
                  "single_word": false,
         | 
| 44 | 
            +
                  "special": false
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                "32005": {
         | 
| 47 | 
            +
                  "content": "ÿ",
         | 
| 48 | 
            +
                  "lstrip": false,
         | 
| 49 | 
            +
                  "normalized": true,
         | 
| 50 | 
            +
                  "rstrip": false,
         | 
| 51 | 
            +
                  "single_word": false,
         | 
| 52 | 
            +
                  "special": false
         | 
| 53 | 
            +
                },
         | 
| 54 | 
            +
                "32006": {
         | 
| 55 | 
            +
                  "content": "ø",
         | 
| 56 | 
            +
                  "lstrip": false,
         | 
| 57 | 
            +
                  "normalized": true,
         | 
| 58 | 
            +
                  "rstrip": false,
         | 
| 59 | 
            +
                  "single_word": false,
         | 
| 60 | 
            +
                  "special": false
         | 
| 61 | 
            +
                },
         | 
| 62 | 
            +
                "32007": {
         | 
| 63 | 
            +
                  "content": "ú",
         | 
| 64 | 
            +
                  "lstrip": false,
         | 
| 65 | 
            +
                  "normalized": true,
         | 
| 66 | 
            +
                  "rstrip": false,
         | 
| 67 | 
            +
                  "single_word": false,
         | 
| 68 | 
            +
                  "special": false
         | 
| 69 | 
            +
                },
         | 
| 70 | 
            +
                "32008": {
         | 
| 71 | 
            +
                  "content": "þ",
         | 
| 72 | 
            +
                  "lstrip": false,
         | 
| 73 | 
            +
                  "normalized": true,
         | 
| 74 | 
            +
                  "rstrip": false,
         | 
| 75 | 
            +
                  "single_word": false,
         | 
| 76 | 
            +
                  "special": false
         | 
| 77 | 
            +
                },
         | 
| 78 | 
            +
                "32009": {
         | 
| 79 | 
            +
                  "content": "ü",
         | 
| 80 | 
            +
                  "lstrip": false,
         | 
| 81 | 
            +
                  "normalized": true,
         | 
| 82 | 
            +
                  "rstrip": false,
         | 
| 83 | 
            +
                  "single_word": false,
         | 
| 84 | 
            +
                  "special": false
         | 
| 85 | 
            +
                },
         | 
| 86 | 
            +
                "32010": {
         | 
| 87 | 
            +
                  "content": "ù",
         | 
| 88 | 
            +
                  "lstrip": false,
         | 
| 89 | 
            +
                  "normalized": true,
         | 
| 90 | 
            +
                  "rstrip": false,
         | 
| 91 | 
            +
                  "single_word": false,
         | 
| 92 | 
            +
                  "special": false
         | 
| 93 | 
            +
                },
         | 
| 94 | 
            +
                "32011": {
         | 
| 95 | 
            +
                  "content": "ö",
         | 
| 96 | 
            +
                  "lstrip": false,
         | 
| 97 | 
            +
                  "normalized": true,
         | 
| 98 | 
            +
                  "rstrip": false,
         | 
| 99 | 
            +
                  "single_word": false,
         | 
| 100 | 
            +
                  "special": false
         | 
| 101 | 
            +
                },
         | 
| 102 | 
            +
                "32012": {
         | 
| 103 | 
            +
                  "content": "û",
         | 
| 104 | 
            +
                  "lstrip": false,
         | 
| 105 | 
            +
                  "normalized": true,
         | 
| 106 | 
            +
                  "rstrip": false,
         | 
| 107 | 
            +
                  "single_word": false,
         | 
| 108 | 
            +
                  "special": false
         | 
| 109 | 
            +
                },
         | 
| 110 | 
            +
                "32013": {
         | 
| 111 | 
            +
                  "content": "<|begin▁of▁sentence|>",
         | 
| 112 | 
            +
                  "lstrip": false,
         | 
| 113 | 
            +
                  "normalized": true,
         | 
| 114 | 
            +
                  "rstrip": false,
         | 
| 115 | 
            +
                  "single_word": false,
         | 
| 116 | 
            +
                  "special": true
         | 
| 117 | 
            +
                },
         | 
| 118 | 
            +
                "32014": {
         | 
| 119 | 
            +
                  "content": "<|end▁of▁sentence|>",
         | 
| 120 | 
            +
                  "lstrip": false,
         | 
| 121 | 
            +
                  "normalized": true,
         | 
| 122 | 
            +
                  "rstrip": false,
         | 
| 123 | 
            +
                  "single_word": false,
         | 
| 124 | 
            +
                  "special": true
         | 
| 125 | 
            +
                },
         | 
| 126 | 
            +
                "32015": {
         | 
| 127 | 
            +
                  "content": "<|fim▁hole|>",
         | 
| 128 | 
            +
                  "lstrip": false,
         | 
| 129 | 
            +
                  "normalized": true,
         | 
| 130 | 
            +
                  "rstrip": false,
         | 
| 131 | 
            +
                  "single_word": false,
         | 
| 132 | 
            +
                  "special": false
         | 
| 133 | 
            +
                },
         | 
| 134 | 
            +
                "32016": {
         | 
| 135 | 
            +
                  "content": "<|fim▁begin|>",
         | 
| 136 | 
            +
                  "lstrip": false,
         | 
| 137 | 
            +
                  "normalized": true,
         | 
| 138 | 
            +
                  "rstrip": false,
         | 
| 139 | 
            +
                  "single_word": false,
         | 
| 140 | 
            +
                  "special": false
         | 
| 141 | 
            +
                },
         | 
| 142 | 
            +
                "32017": {
         | 
| 143 | 
            +
                  "content": "<|fim▁end|>",
         | 
| 144 | 
            +
                  "lstrip": false,
         | 
| 145 | 
            +
                  "normalized": true,
         | 
| 146 | 
            +
                  "rstrip": false,
         | 
| 147 | 
            +
                  "single_word": false,
         | 
| 148 | 
            +
                  "special": false
         | 
| 149 | 
            +
                },
         | 
| 150 | 
            +
                "32018": {
         | 
| 151 | 
            +
                  "content": "<pad>",
         | 
| 152 | 
            +
                  "lstrip": false,
         | 
| 153 | 
            +
                  "normalized": true,
         | 
| 154 | 
            +
                  "rstrip": false,
         | 
| 155 | 
            +
                  "single_word": false,
         | 
| 156 | 
            +
                  "special": false
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                "32019": {
         | 
| 159 | 
            +
                  "content": "<|User|>",
         | 
| 160 | 
            +
                  "lstrip": false,
         | 
| 161 | 
            +
                  "normalized": true,
         | 
| 162 | 
            +
                  "rstrip": false,
         | 
| 163 | 
            +
                  "single_word": false,
         | 
| 164 | 
            +
                  "special": false
         | 
| 165 | 
            +
                },
         | 
| 166 | 
            +
                "32020": {
         | 
| 167 | 
            +
                  "content": "<|Assistant|>",
         | 
| 168 | 
            +
                  "lstrip": false,
         | 
| 169 | 
            +
                  "normalized": true,
         | 
| 170 | 
            +
                  "rstrip": false,
         | 
| 171 | 
            +
                  "single_word": false,
         | 
| 172 | 
            +
                  "special": false
         | 
| 173 | 
            +
                },
         | 
| 174 | 
            +
                "32021": {
         | 
| 175 | 
            +
                  "content": "<|EOT|>",
         | 
| 176 | 
            +
                  "lstrip": false,
         | 
| 177 | 
            +
                  "normalized": true,
         | 
| 178 | 
            +
                  "rstrip": false,
         | 
| 179 | 
            +
                  "single_word": false,
         | 
| 180 | 
            +
                  "special": true
         | 
| 181 | 
            +
                },
         | 
| 182 | 
            +
                "32022": {
         | 
| 183 | 
            +
                  "content": "<|endofsql|>",
         | 
| 184 | 
            +
                  "lstrip": false,
         | 
| 185 | 
            +
                  "normalized": false,
         | 
| 186 | 
            +
                  "rstrip": false,
         | 
| 187 | 
            +
                  "single_word": false,
         | 
| 188 | 
            +
                  "special": true
         | 
| 189 | 
            +
                }
         | 
| 190 | 
            +
              },
         | 
| 191 | 
            +
              "additional_special_tokens": [
         | 
| 192 | 
            +
                "<|endofsql|>"
         | 
| 193 | 
            +
              ],
         | 
| 194 | 
            +
              "bos_token": "<|begin▁of▁sentence|>",
         | 
| 195 | 
            +
              "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
         | 
| 196 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 197 | 
            +
              "eos_token": "<|endofsql|>",
         | 
| 198 | 
            +
              "extra_special_tokens": {},
         | 
| 199 | 
            +
              "legacy": true,
         | 
| 200 | 
            +
              "model_max_length": 16384,
         | 
| 201 | 
            +
              "pad_token": "<|end▁of▁sentence|>",
         | 
| 202 | 
            +
              "sp_model_kwargs": {},
         | 
| 203 | 
            +
              "tokenizer_class": "LlamaTokenizerFast",
         | 
| 204 | 
            +
              "unk_token": null,
         | 
| 205 | 
            +
              "use_default_system_prompt": false
         | 
| 206 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/trainer_state.json
    ADDED
    
    | @@ -0,0 +1,103 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "best_global_step": 236,
         | 
| 3 | 
            +
              "best_metric": 0.15796616673469543,
         | 
| 4 | 
            +
              "best_model_checkpoint": "./fine-tuned-model-8-diff\\checkpoint-236",
         | 
| 5 | 
            +
              "epoch": 4.0,
         | 
| 6 | 
            +
              "eval_steps": 500,
         | 
| 7 | 
            +
              "global_step": 236,
         | 
| 8 | 
            +
              "is_hyper_param_search": false,
         | 
| 9 | 
            +
              "is_local_process_zero": true,
         | 
| 10 | 
            +
              "is_world_process_zero": true,
         | 
| 11 | 
            +
              "log_history": [
         | 
| 12 | 
            +
                {
         | 
| 13 | 
            +
                  "epoch": 0.8519701810436635,
         | 
| 14 | 
            +
                  "grad_norm": 0.3280849754810333,
         | 
| 15 | 
            +
                  "learning_rate": 3.310344827586207e-05,
         | 
| 16 | 
            +
                  "loss": 0.7606,
         | 
| 17 | 
            +
                  "step": 50
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                {
         | 
| 20 | 
            +
                  "epoch": 1.0,
         | 
| 21 | 
            +
                  "eval_loss": 0.24083571135997772,
         | 
| 22 | 
            +
                  "eval_runtime": 209.8235,
         | 
| 23 | 
            +
                  "eval_samples_per_second": 0.5,
         | 
| 24 | 
            +
                  "eval_steps_per_second": 0.5,
         | 
| 25 | 
            +
                  "step": 59
         | 
| 26 | 
            +
                },
         | 
| 27 | 
            +
                {
         | 
| 28 | 
            +
                  "epoch": 1.698615548455804,
         | 
| 29 | 
            +
                  "grad_norm": 0.17877578735351562,
         | 
| 30 | 
            +
                  "learning_rate": 2.620689655172414e-05,
         | 
| 31 | 
            +
                  "loss": 0.2316,
         | 
| 32 | 
            +
                  "step": 100
         | 
| 33 | 
            +
                },
         | 
| 34 | 
            +
                {
         | 
| 35 | 
            +
                  "epoch": 2.0,
         | 
| 36 | 
            +
                  "eval_loss": 0.16867588460445404,
         | 
| 37 | 
            +
                  "eval_runtime": 210.4202,
         | 
| 38 | 
            +
                  "eval_samples_per_second": 0.499,
         | 
| 39 | 
            +
                  "eval_steps_per_second": 0.499,
         | 
| 40 | 
            +
                  "step": 118
         | 
| 41 | 
            +
                },
         | 
| 42 | 
            +
                {
         | 
| 43 | 
            +
                  "epoch": 2.545260915867945,
         | 
| 44 | 
            +
                  "grad_norm": 0.27560967206954956,
         | 
| 45 | 
            +
                  "learning_rate": 1.931034482758621e-05,
         | 
| 46 | 
            +
                  "loss": 0.1695,
         | 
| 47 | 
            +
                  "step": 150
         | 
| 48 | 
            +
                },
         | 
| 49 | 
            +
                {
         | 
| 50 | 
            +
                  "epoch": 3.0,
         | 
| 51 | 
            +
                  "eval_loss": 0.16012603044509888,
         | 
| 52 | 
            +
                  "eval_runtime": 209.4479,
         | 
| 53 | 
            +
                  "eval_samples_per_second": 0.501,
         | 
| 54 | 
            +
                  "eval_steps_per_second": 0.501,
         | 
| 55 | 
            +
                  "step": 177
         | 
| 56 | 
            +
                },
         | 
| 57 | 
            +
                {
         | 
| 58 | 
            +
                  "epoch": 3.3919062832800853,
         | 
| 59 | 
            +
                  "grad_norm": 0.21748089790344238,
         | 
| 60 | 
            +
                  "learning_rate": 1.2413793103448277e-05,
         | 
| 61 | 
            +
                  "loss": 0.1571,
         | 
| 62 | 
            +
                  "step": 200
         | 
| 63 | 
            +
                },
         | 
| 64 | 
            +
                {
         | 
| 65 | 
            +
                  "epoch": 4.0,
         | 
| 66 | 
            +
                  "eval_loss": 0.15796616673469543,
         | 
| 67 | 
            +
                  "eval_runtime": 210.9531,
         | 
| 68 | 
            +
                  "eval_samples_per_second": 0.498,
         | 
| 69 | 
            +
                  "eval_steps_per_second": 0.498,
         | 
| 70 | 
            +
                  "step": 236
         | 
| 71 | 
            +
                }
         | 
| 72 | 
            +
              ],
         | 
| 73 | 
            +
              "logging_steps": 50,
         | 
| 74 | 
            +
              "max_steps": 290,
         | 
| 75 | 
            +
              "num_input_tokens_seen": 0,
         | 
| 76 | 
            +
              "num_train_epochs": 5,
         | 
| 77 | 
            +
              "save_steps": 500,
         | 
| 78 | 
            +
              "stateful_callbacks": {
         | 
| 79 | 
            +
                "EarlyStoppingCallback": {
         | 
| 80 | 
            +
                  "args": {
         | 
| 81 | 
            +
                    "early_stopping_patience": 2,
         | 
| 82 | 
            +
                    "early_stopping_threshold": 0.0
         | 
| 83 | 
            +
                  },
         | 
| 84 | 
            +
                  "attributes": {
         | 
| 85 | 
            +
                    "early_stopping_patience_counter": 0
         | 
| 86 | 
            +
                  }
         | 
| 87 | 
            +
                },
         | 
| 88 | 
            +
                "TrainerControl": {
         | 
| 89 | 
            +
                  "args": {
         | 
| 90 | 
            +
                    "should_epoch_stop": false,
         | 
| 91 | 
            +
                    "should_evaluate": false,
         | 
| 92 | 
            +
                    "should_log": false,
         | 
| 93 | 
            +
                    "should_save": true,
         | 
| 94 | 
            +
                    "should_training_stop": false
         | 
| 95 | 
            +
                  },
         | 
| 96 | 
            +
                  "attributes": {}
         | 
| 97 | 
            +
                }
         | 
| 98 | 
            +
              },
         | 
| 99 | 
            +
              "total_flos": 9.15666871886807e+16,
         | 
| 100 | 
            +
              "train_batch_size": 1,
         | 
| 101 | 
            +
              "trial_name": null,
         | 
| 102 | 
            +
              "trial_params": null
         | 
| 103 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-236/training_args.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:bfbf933697a46f684c9469b3a12c6d925023fb9367017748f0863dd726e69bd6
         | 
| 3 | 
            +
            size 5368
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/README.md
    ADDED
    
    | @@ -0,0 +1,202 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            base_model: ./deepseek-coder-1.3b-instruct
         | 
| 3 | 
            +
            library_name: peft
         | 
| 4 | 
            +
            ---
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Model Card for Model ID
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            <!-- Provide a quick summary of what the model is/does. -->
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            ## Model Details
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            ### Model Description
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            <!-- Provide a longer summary of what this model is. -->
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            - **Developed by:** [More Information Needed]
         | 
| 21 | 
            +
            - **Funded by [optional]:** [More Information Needed]
         | 
| 22 | 
            +
            - **Shared by [optional]:** [More Information Needed]
         | 
| 23 | 
            +
            - **Model type:** [More Information Needed]
         | 
| 24 | 
            +
            - **Language(s) (NLP):** [More Information Needed]
         | 
| 25 | 
            +
            - **License:** [More Information Needed]
         | 
| 26 | 
            +
            - **Finetuned from model [optional]:** [More Information Needed]
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ### Model Sources [optional]
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            <!-- Provide the basic links for the model. -->
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            - **Repository:** [More Information Needed]
         | 
| 33 | 
            +
            - **Paper [optional]:** [More Information Needed]
         | 
| 34 | 
            +
            - **Demo [optional]:** [More Information Needed]
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            ## Uses
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            ### Direct Use
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            [More Information Needed]
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            ### Downstream Use [optional]
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            [More Information Needed]
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            ### Out-of-Scope Use
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            [More Information Needed]
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            ## Bias, Risks, and Limitations
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            <!-- This section is meant to convey both technical and sociotechnical limitations. -->
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            [More Information Needed]
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            ### Recommendations
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            ## How to Get Started with the Model
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            Use the code below to get started with the model.
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            [More Information Needed]
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            ## Training Details
         | 
| 77 | 
            +
             | 
| 78 | 
            +
            ### Training Data
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            [More Information Needed]
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            ### Training Procedure
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            #### Preprocessing [optional]
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            [More Information Needed]
         | 
| 91 | 
            +
             | 
| 92 | 
            +
             | 
| 93 | 
            +
            #### Training Hyperparameters
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            #### Speeds, Sizes, Times [optional]
         | 
| 98 | 
            +
             | 
| 99 | 
            +
            <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            [More Information Needed]
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            ## Evaluation
         | 
| 104 | 
            +
             | 
| 105 | 
            +
            <!-- This section describes the evaluation protocols and provides the results. -->
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            ### Testing Data, Factors & Metrics
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            #### Testing Data
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            <!-- This should link to a Dataset Card if possible. -->
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            [More Information Needed]
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            #### Factors
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            [More Information Needed]
         | 
| 120 | 
            +
             | 
| 121 | 
            +
            #### Metrics
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            <!-- These are the evaluation metrics being used, ideally with a description of why. -->
         | 
| 124 | 
            +
             | 
| 125 | 
            +
            [More Information Needed]
         | 
| 126 | 
            +
             | 
| 127 | 
            +
            ### Results
         | 
| 128 | 
            +
             | 
| 129 | 
            +
            [More Information Needed]
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            #### Summary
         | 
| 132 | 
            +
             | 
| 133 | 
            +
             | 
| 134 | 
            +
             | 
| 135 | 
            +
            ## Model Examination [optional]
         | 
| 136 | 
            +
             | 
| 137 | 
            +
            <!-- Relevant interpretability work for the model goes here -->
         | 
| 138 | 
            +
             | 
| 139 | 
            +
            [More Information Needed]
         | 
| 140 | 
            +
             | 
| 141 | 
            +
            ## Environmental Impact
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
         | 
| 144 | 
            +
             | 
| 145 | 
            +
            Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
         | 
| 146 | 
            +
             | 
| 147 | 
            +
            - **Hardware Type:** [More Information Needed]
         | 
| 148 | 
            +
            - **Hours used:** [More Information Needed]
         | 
| 149 | 
            +
            - **Cloud Provider:** [More Information Needed]
         | 
| 150 | 
            +
            - **Compute Region:** [More Information Needed]
         | 
| 151 | 
            +
            - **Carbon Emitted:** [More Information Needed]
         | 
| 152 | 
            +
             | 
| 153 | 
            +
            ## Technical Specifications [optional]
         | 
| 154 | 
            +
             | 
| 155 | 
            +
            ### Model Architecture and Objective
         | 
| 156 | 
            +
             | 
| 157 | 
            +
            [More Information Needed]
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            ### Compute Infrastructure
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            [More Information Needed]
         | 
| 162 | 
            +
             | 
| 163 | 
            +
            #### Hardware
         | 
| 164 | 
            +
             | 
| 165 | 
            +
            [More Information Needed]
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            #### Software
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            [More Information Needed]
         | 
| 170 | 
            +
             | 
| 171 | 
            +
            ## Citation [optional]
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
         | 
| 174 | 
            +
             | 
| 175 | 
            +
            **BibTeX:**
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            [More Information Needed]
         | 
| 178 | 
            +
             | 
| 179 | 
            +
            **APA:**
         | 
| 180 | 
            +
             | 
| 181 | 
            +
            [More Information Needed]
         | 
| 182 | 
            +
             | 
| 183 | 
            +
            ## Glossary [optional]
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
         | 
| 186 | 
            +
             | 
| 187 | 
            +
            [More Information Needed]
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            ## More Information [optional]
         | 
| 190 | 
            +
             | 
| 191 | 
            +
            [More Information Needed]
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            ## Model Card Authors [optional]
         | 
| 194 | 
            +
             | 
| 195 | 
            +
            [More Information Needed]
         | 
| 196 | 
            +
             | 
| 197 | 
            +
            ## Model Card Contact
         | 
| 198 | 
            +
             | 
| 199 | 
            +
            [More Information Needed]
         | 
| 200 | 
            +
            ### Framework versions
         | 
| 201 | 
            +
             | 
| 202 | 
            +
            - PEFT 0.15.1
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/adapter_config.json
    ADDED
    
    | @@ -0,0 +1,39 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "alpha_pattern": {},
         | 
| 3 | 
            +
              "auto_mapping": null,
         | 
| 4 | 
            +
              "base_model_name_or_path": "./deepseek-coder-1.3b-instruct",
         | 
| 5 | 
            +
              "bias": "none",
         | 
| 6 | 
            +
              "corda_config": null,
         | 
| 7 | 
            +
              "eva_config": null,
         | 
| 8 | 
            +
              "exclude_modules": null,
         | 
| 9 | 
            +
              "fan_in_fan_out": false,
         | 
| 10 | 
            +
              "inference_mode": true,
         | 
| 11 | 
            +
              "init_lora_weights": true,
         | 
| 12 | 
            +
              "layer_replication": null,
         | 
| 13 | 
            +
              "layers_pattern": null,
         | 
| 14 | 
            +
              "layers_to_transform": null,
         | 
| 15 | 
            +
              "loftq_config": {},
         | 
| 16 | 
            +
              "lora_alpha": 16,
         | 
| 17 | 
            +
              "lora_bias": false,
         | 
| 18 | 
            +
              "lora_dropout": 0.0,
         | 
| 19 | 
            +
              "megatron_config": null,
         | 
| 20 | 
            +
              "megatron_core": "megatron.core",
         | 
| 21 | 
            +
              "modules_to_save": null,
         | 
| 22 | 
            +
              "peft_type": "LORA",
         | 
| 23 | 
            +
              "r": 8,
         | 
| 24 | 
            +
              "rank_pattern": {},
         | 
| 25 | 
            +
              "revision": null,
         | 
| 26 | 
            +
              "target_modules": [
         | 
| 27 | 
            +
                "down_proj",
         | 
| 28 | 
            +
                "up_proj",
         | 
| 29 | 
            +
                "q_proj",
         | 
| 30 | 
            +
                "k_proj",
         | 
| 31 | 
            +
                "v_proj",
         | 
| 32 | 
            +
                "gate_proj",
         | 
| 33 | 
            +
                "o_proj"
         | 
| 34 | 
            +
              ],
         | 
| 35 | 
            +
              "task_type": "CAUSAL_LM",
         | 
| 36 | 
            +
              "trainable_token_indices": null,
         | 
| 37 | 
            +
              "use_dora": false,
         | 
| 38 | 
            +
              "use_rslora": false
         | 
| 39 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/adapter_model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6f713f001cee7f2fe95eecab5f99012e7939fd25828bafff4b5bf9abf84805d8
         | 
| 3 | 
            +
            size 292359512
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/optimizer.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:5f03d21205c5a7b514800ea39ff48c8a07439f4b8dabfe9dde17c2d67e2928de
         | 
| 3 | 
            +
            size 60247362
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/rng_state.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:96a595d40e3c39aebb5dc745b8a38deeea1f8e27f9a436252d7190ee10a8ca1b
         | 
| 3 | 
            +
            size 14244
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/scheduler.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:856089f63129062a09e04388e8e69637e1c99f4a786c2b36545b20fb72bd6f45
         | 
| 3 | 
            +
            size 1064
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,26 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                {
         | 
| 4 | 
            +
                  "content": "<|endofsql|>",
         | 
| 5 | 
            +
                  "lstrip": false,
         | 
| 6 | 
            +
                  "normalized": false,
         | 
| 7 | 
            +
                  "rstrip": false,
         | 
| 8 | 
            +
                  "single_word": false
         | 
| 9 | 
            +
                }
         | 
| 10 | 
            +
              ],
         | 
| 11 | 
            +
              "bos_token": {
         | 
| 12 | 
            +
                "content": "<|begin▁of▁sentence|>",
         | 
| 13 | 
            +
                "lstrip": false,
         | 
| 14 | 
            +
                "normalized": true,
         | 
| 15 | 
            +
                "rstrip": false,
         | 
| 16 | 
            +
                "single_word": false
         | 
| 17 | 
            +
              },
         | 
| 18 | 
            +
              "eos_token": "<|endofsql|>",
         | 
| 19 | 
            +
              "pad_token": {
         | 
| 20 | 
            +
                "content": "<|end▁of▁sentence|>",
         | 
| 21 | 
            +
                "lstrip": false,
         | 
| 22 | 
            +
                "normalized": true,
         | 
| 23 | 
            +
                "rstrip": false,
         | 
| 24 | 
            +
                "single_word": false
         | 
| 25 | 
            +
              }
         | 
| 26 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        fine-tuned-model-8-diff/checkpoint-290/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,206 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_bos_token": true,
         | 
| 3 | 
            +
              "add_eos_token": false,
         | 
| 4 | 
            +
              "add_prefix_space": null,
         | 
| 5 | 
            +
              "added_tokens_decoder": {
         | 
| 6 | 
            +
                "32000": {
         | 
| 7 | 
            +
                  "content": "õ",
         | 
| 8 | 
            +
                  "lstrip": false,
         | 
| 9 | 
            +
                  "normalized": true,
         | 
| 10 | 
            +
                  "rstrip": false,
         | 
| 11 | 
            +
                  "single_word": false,
         | 
| 12 | 
            +
                  "special": false
         | 
| 13 | 
            +
                },
         | 
| 14 | 
            +
                "32001": {
         | 
| 15 | 
            +
                  "content": "÷",
         | 
| 16 | 
            +
                  "lstrip": false,
         | 
| 17 | 
            +
                  "normalized": true,
         | 
| 18 | 
            +
                  "rstrip": false,
         | 
| 19 | 
            +
                  "single_word": false,
         | 
| 20 | 
            +
                  "special": false
         | 
| 21 | 
            +
                },
         | 
| 22 | 
            +
                "32002": {
         | 
| 23 | 
            +
                  "content": "Á",
         | 
| 24 | 
            +
                  "lstrip": false,
         | 
| 25 | 
            +
                  "normalized": true,
         | 
| 26 | 
            +
                  "rstrip": false,
         | 
| 27 | 
            +
                  "single_word": false,
         | 
| 28 | 
            +
                  "special": false
         | 
| 29 | 
            +
                },
         | 
| 30 | 
            +
                "32003": {
         | 
| 31 | 
            +
                  "content": "ý",
         | 
| 32 | 
            +
                  "lstrip": false,
         | 
| 33 | 
            +
                  "normalized": true,
         | 
| 34 | 
            +
                  "rstrip": false,
         | 
| 35 | 
            +
                  "single_word": false,
         | 
| 36 | 
            +
                  "special": false
         | 
| 37 | 
            +
                },
         | 
| 38 | 
            +
                "32004": {
         | 
| 39 | 
            +
                  "content": "À",
         | 
| 40 | 
            +
                  "lstrip": false,
         | 
| 41 | 
            +
                  "normalized": true,
         | 
| 42 | 
            +
                  "rstrip": false,
         | 
| 43 | 
            +
                  "single_word": false,
         | 
| 44 | 
            +
                  "special": false
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                "32005": {
         | 
| 47 | 
            +
                  "content": "ÿ",
         | 
| 48 | 
            +
                  "lstrip": false,
         | 
| 49 | 
            +
                  "normalized": true,
         | 
| 50 | 
            +
                  "rstrip": false,
         | 
| 51 | 
            +
                  "single_word": false,
         | 
| 52 | 
            +
                  "special": false
         | 
| 53 | 
            +
                },
         | 
| 54 | 
            +
                "32006": {
         | 
| 55 | 
            +
                  "content": "ø",
         | 
| 56 | 
            +
                  "lstrip": false,
         | 
| 57 | 
            +
                  "normalized": true,
         | 
| 58 | 
            +
                  "rstrip": false,
         | 
| 59 | 
            +
                  "single_word": false,
         | 
| 60 | 
            +
                  "special": false
         | 
| 61 | 
            +
                },
         | 
| 62 | 
            +
                "32007": {
         | 
| 63 | 
            +
                  "content": "ú",
         | 
| 64 | 
            +
                  "lstrip": false,
         | 
| 65 | 
            +
                  "normalized": true,
         | 
| 66 | 
            +
                  "rstrip": false,
         | 
| 67 | 
            +
                  "single_word": false,
         | 
| 68 | 
            +
                  "special": false
         | 
| 69 | 
            +
                },
         | 
| 70 | 
            +
                "32008": {
         | 
| 71 | 
            +
                  "content": "þ",
         | 
| 72 | 
            +
                  "lstrip": false,
         | 
| 73 | 
            +
                  "normalized": true,
         | 
| 74 | 
            +
                  "rstrip": false,
         | 
| 75 | 
            +
                  "single_word": false,
         | 
| 76 | 
            +
                  "special": false
         | 
| 77 | 
            +
                },
         | 
| 78 | 
            +
                "32009": {
         | 
| 79 | 
            +
                  "content": "ü",
         | 
| 80 | 
            +
                  "lstrip": false,
         | 
| 81 | 
            +
                  "normalized": true,
         | 
| 82 | 
            +
                  "rstrip": false,
         | 
| 83 | 
            +
                  "single_word": false,
         | 
| 84 | 
            +
                  "special": false
         | 
| 85 | 
            +
                },
         | 
| 86 | 
            +
                "32010": {
         | 
| 87 | 
            +
                  "content": "ù",
         | 
| 88 | 
            +
                  "lstrip": false,
         | 
| 89 | 
            +
                  "normalized": true,
         | 
| 90 | 
            +
                  "rstrip": false,
         | 
| 91 | 
            +
                  "single_word": false,
         | 
| 92 | 
            +
                  "special": false
         | 
| 93 | 
            +
                },
         | 
| 94 | 
            +
                "32011": {
         | 
| 95 | 
            +
                  "content": "ö",
         | 
| 96 | 
            +
                  "lstrip": false,
         | 
| 97 | 
            +
                  "normalized": true,
         | 
| 98 | 
            +
                  "rstrip": false,
         | 
| 99 | 
            +
                  "single_word": false,
         | 
| 100 | 
            +
                  "special": false
         | 
| 101 | 
            +
                },
         | 
| 102 | 
            +
                "32012": {
         | 
| 103 | 
            +
                  "content": "û",
         | 
| 104 | 
            +
                  "lstrip": false,
         | 
| 105 | 
            +
                  "normalized": true,
         | 
| 106 | 
            +
                  "rstrip": false,
         | 
| 107 | 
            +
                  "single_word": false,
         | 
| 108 | 
            +
                  "special": false
         | 
| 109 | 
            +
                },
         | 
| 110 | 
            +
                "32013": {
         | 
| 111 | 
            +
                  "content": "<|begin▁of▁sentence|>",
         | 
| 112 | 
            +
                  "lstrip": false,
         | 
| 113 | 
            +
                  "normalized": true,
         | 
| 114 | 
            +
                  "rstrip": false,
         | 
| 115 | 
            +
                  "single_word": false,
         | 
| 116 | 
            +
                  "special": true
         | 
| 117 | 
            +
                },
         | 
| 118 | 
            +
                "32014": {
         | 
| 119 | 
            +
                  "content": "<|end▁of▁sentence|>",
         | 
| 120 | 
            +
                  "lstrip": false,
         | 
| 121 | 
            +
                  "normalized": true,
         | 
| 122 | 
            +
                  "rstrip": false,
         | 
| 123 | 
            +
                  "single_word": false,
         | 
| 124 | 
            +
                  "special": true
         | 
| 125 | 
            +
                },
         | 
| 126 | 
            +
                "32015": {
         | 
| 127 | 
            +
                  "content": "<|fim▁hole|>",
         | 
| 128 | 
            +
                  "lstrip": false,
         | 
| 129 | 
            +
                  "normalized": true,
         | 
| 130 | 
            +
                  "rstrip": false,
         | 
| 131 | 
            +
                  "single_word": false,
         | 
| 132 | 
            +
                  "special": false
         | 
| 133 | 
            +
                },
         | 
| 134 | 
            +
                "32016": {
         | 
| 135 | 
            +
                  "content": "<|fim▁begin|>",
         | 
| 136 | 
            +
                  "lstrip": false,
         | 
| 137 | 
            +
                  "normalized": true,
         | 
| 138 | 
            +
                  "rstrip": false,
         | 
| 139 | 
            +
                  "single_word": false,
         | 
| 140 | 
            +
                  "special": false
         | 
| 141 | 
            +
                },
         | 
| 142 | 
            +
                "32017": {
         | 
| 143 | 
            +
                  "content": "<|fim▁end|>",
         | 
| 144 | 
            +
                  "lstrip": false,
         | 
| 145 | 
            +
                  "normalized": true,
         | 
| 146 | 
            +
                  "rstrip": false,
         | 
| 147 | 
            +
                  "single_word": false,
         | 
| 148 | 
            +
                  "special": false
         | 
| 149 | 
            +
                },
         | 
| 150 | 
            +
                "32018": {
         | 
| 151 | 
            +
                  "content": "<pad>",
         | 
| 152 | 
            +
                  "lstrip": false,
         | 
| 153 | 
            +
                  "normalized": true,
         | 
| 154 | 
            +
                  "rstrip": false,
         | 
| 155 | 
            +
                  "single_word": false,
         | 
| 156 | 
            +
                  "special": false
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                "32019": {
         | 
| 159 | 
            +
                  "content": "<|User|>",
         | 
| 160 | 
            +
                  "lstrip": false,
         | 
| 161 | 
            +
                  "normalized": true,
         | 
| 162 | 
            +
                  "rstrip": false,
         | 
| 163 | 
            +
                  "single_word": false,
         | 
| 164 | 
            +
                  "special": false
         | 
| 165 | 
            +
                },
         | 
| 166 | 
            +
                "32020": {
         | 
| 167 | 
            +
                  "content": "<|Assistant|>",
         | 
| 168 | 
            +
                  "lstrip": false,
         | 
| 169 | 
            +
                  "normalized": true,
         | 
| 170 | 
            +
                  "rstrip": false,
         | 
| 171 | 
            +
                  "single_word": false,
         | 
| 172 | 
            +
                  "special": false
         | 
| 173 | 
            +
                },
         | 
| 174 | 
            +
                "32021": {
         | 
| 175 | 
            +
                  "content": "<|EOT|>",
         | 
| 176 | 
            +
                  "lstrip": false,
         | 
| 177 | 
            +
                  "normalized": true,
         | 
| 178 | 
            +
                  "rstrip": false,
         | 
| 179 | 
            +
                  "single_word": false,
         | 
| 180 | 
            +
                  "special": true
         | 
| 181 | 
            +
                },
         | 
| 182 | 
            +
                "32022": {
         | 
| 183 | 
            +
                  "content": "<|endofsql|>",
         | 
| 184 | 
            +
                  "lstrip": false,
         | 
| 185 | 
            +
                  "normalized": false,
         | 
| 186 | 
            +
                  "rstrip": false,
         | 
| 187 | 
            +
                  "single_word": false,
         | 
| 188 | 
            +
                  "special": true
         | 
| 189 | 
            +
                }
         | 
| 190 | 
            +
              },
         | 
| 191 | 
            +
              "additional_special_tokens": [
         | 
| 192 | 
            +
                "<|endofsql|>"
         | 
| 193 | 
            +
              ],
         | 
| 194 | 
            +
              "bos_token": "<|begin▁of▁sentence|>",
         | 
| 195 | 
            +
              "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
         | 
| 196 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 197 | 
            +
              "eos_token": "<|endofsql|>",
         | 
| 198 | 
            +
              "extra_special_tokens": {},
         | 
| 199 | 
            +
              "legacy": true,
         | 
| 200 | 
            +
              "model_max_length": 16384,
         | 
| 201 | 
            +
              "pad_token": "<|end▁of▁sentence|>",
         | 
| 202 | 
            +
              "sp_model_kwargs": {},
         | 
| 203 | 
            +
              "tokenizer_class": "LlamaTokenizerFast",
         | 
| 204 | 
            +
              "unk_token": null,
         | 
| 205 | 
            +
              "use_default_system_prompt": false
         | 
| 206 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/trainer_state.json
    ADDED
    
    | @@ -0,0 +1,118 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "best_global_step": 290,
         | 
| 3 | 
            +
              "best_metric": 0.1572711169719696,
         | 
| 4 | 
            +
              "best_model_checkpoint": "./fine-tuned-model-8-diff\\checkpoint-290",
         | 
| 5 | 
            +
              "epoch": 4.920127795527156,
         | 
| 6 | 
            +
              "eval_steps": 500,
         | 
| 7 | 
            +
              "global_step": 290,
         | 
| 8 | 
            +
              "is_hyper_param_search": false,
         | 
| 9 | 
            +
              "is_local_process_zero": true,
         | 
| 10 | 
            +
              "is_world_process_zero": true,
         | 
| 11 | 
            +
              "log_history": [
         | 
| 12 | 
            +
                {
         | 
| 13 | 
            +
                  "epoch": 0.8519701810436635,
         | 
| 14 | 
            +
                  "grad_norm": 0.3280849754810333,
         | 
| 15 | 
            +
                  "learning_rate": 3.310344827586207e-05,
         | 
| 16 | 
            +
                  "loss": 0.7606,
         | 
| 17 | 
            +
                  "step": 50
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                {
         | 
| 20 | 
            +
                  "epoch": 1.0,
         | 
| 21 | 
            +
                  "eval_loss": 0.24083571135997772,
         | 
| 22 | 
            +
                  "eval_runtime": 209.8235,
         | 
| 23 | 
            +
                  "eval_samples_per_second": 0.5,
         | 
| 24 | 
            +
                  "eval_steps_per_second": 0.5,
         | 
| 25 | 
            +
                  "step": 59
         | 
| 26 | 
            +
                },
         | 
| 27 | 
            +
                {
         | 
| 28 | 
            +
                  "epoch": 1.698615548455804,
         | 
| 29 | 
            +
                  "grad_norm": 0.17877578735351562,
         | 
| 30 | 
            +
                  "learning_rate": 2.620689655172414e-05,
         | 
| 31 | 
            +
                  "loss": 0.2316,
         | 
| 32 | 
            +
                  "step": 100
         | 
| 33 | 
            +
                },
         | 
| 34 | 
            +
                {
         | 
| 35 | 
            +
                  "epoch": 2.0,
         | 
| 36 | 
            +
                  "eval_loss": 0.16867588460445404,
         | 
| 37 | 
            +
                  "eval_runtime": 210.4202,
         | 
| 38 | 
            +
                  "eval_samples_per_second": 0.499,
         | 
| 39 | 
            +
                  "eval_steps_per_second": 0.499,
         | 
| 40 | 
            +
                  "step": 118
         | 
| 41 | 
            +
                },
         | 
| 42 | 
            +
                {
         | 
| 43 | 
            +
                  "epoch": 2.545260915867945,
         | 
| 44 | 
            +
                  "grad_norm": 0.27560967206954956,
         | 
| 45 | 
            +
                  "learning_rate": 1.931034482758621e-05,
         | 
| 46 | 
            +
                  "loss": 0.1695,
         | 
| 47 | 
            +
                  "step": 150
         | 
| 48 | 
            +
                },
         | 
| 49 | 
            +
                {
         | 
| 50 | 
            +
                  "epoch": 3.0,
         | 
| 51 | 
            +
                  "eval_loss": 0.16012603044509888,
         | 
| 52 | 
            +
                  "eval_runtime": 209.4479,
         | 
| 53 | 
            +
                  "eval_samples_per_second": 0.501,
         | 
| 54 | 
            +
                  "eval_steps_per_second": 0.501,
         | 
| 55 | 
            +
                  "step": 177
         | 
| 56 | 
            +
                },
         | 
| 57 | 
            +
                {
         | 
| 58 | 
            +
                  "epoch": 3.3919062832800853,
         | 
| 59 | 
            +
                  "grad_norm": 0.21748089790344238,
         | 
| 60 | 
            +
                  "learning_rate": 1.2413793103448277e-05,
         | 
| 61 | 
            +
                  "loss": 0.1571,
         | 
| 62 | 
            +
                  "step": 200
         | 
| 63 | 
            +
                },
         | 
| 64 | 
            +
                {
         | 
| 65 | 
            +
                  "epoch": 4.0,
         | 
| 66 | 
            +
                  "eval_loss": 0.15796616673469543,
         | 
| 67 | 
            +
                  "eval_runtime": 210.9531,
         | 
| 68 | 
            +
                  "eval_samples_per_second": 0.498,
         | 
| 69 | 
            +
                  "eval_steps_per_second": 0.498,
         | 
| 70 | 
            +
                  "step": 236
         | 
| 71 | 
            +
                },
         | 
| 72 | 
            +
                {
         | 
| 73 | 
            +
                  "epoch": 4.238551650692226,
         | 
| 74 | 
            +
                  "grad_norm": 0.20169177651405334,
         | 
| 75 | 
            +
                  "learning_rate": 5.517241379310345e-06,
         | 
| 76 | 
            +
                  "loss": 0.1471,
         | 
| 77 | 
            +
                  "step": 250
         | 
| 78 | 
            +
                },
         | 
| 79 | 
            +
                {
         | 
| 80 | 
            +
                  "epoch": 4.920127795527156,
         | 
| 81 | 
            +
                  "eval_loss": 0.1572711169719696,
         | 
| 82 | 
            +
                  "eval_runtime": 215.9464,
         | 
| 83 | 
            +
                  "eval_samples_per_second": 0.486,
         | 
| 84 | 
            +
                  "eval_steps_per_second": 0.486,
         | 
| 85 | 
            +
                  "step": 290
         | 
| 86 | 
            +
                }
         | 
| 87 | 
            +
              ],
         | 
| 88 | 
            +
              "logging_steps": 50,
         | 
| 89 | 
            +
              "max_steps": 290,
         | 
| 90 | 
            +
              "num_input_tokens_seen": 0,
         | 
| 91 | 
            +
              "num_train_epochs": 5,
         | 
| 92 | 
            +
              "save_steps": 500,
         | 
| 93 | 
            +
              "stateful_callbacks": {
         | 
| 94 | 
            +
                "EarlyStoppingCallback": {
         | 
| 95 | 
            +
                  "args": {
         | 
| 96 | 
            +
                    "early_stopping_patience": 2,
         | 
| 97 | 
            +
                    "early_stopping_threshold": 0.0
         | 
| 98 | 
            +
                  },
         | 
| 99 | 
            +
                  "attributes": {
         | 
| 100 | 
            +
                    "early_stopping_patience_counter": 0
         | 
| 101 | 
            +
                  }
         | 
| 102 | 
            +
                },
         | 
| 103 | 
            +
                "TrainerControl": {
         | 
| 104 | 
            +
                  "args": {
         | 
| 105 | 
            +
                    "should_epoch_stop": false,
         | 
| 106 | 
            +
                    "should_evaluate": false,
         | 
| 107 | 
            +
                    "should_log": false,
         | 
| 108 | 
            +
                    "should_save": true,
         | 
| 109 | 
            +
                    "should_training_stop": true
         | 
| 110 | 
            +
                  },
         | 
| 111 | 
            +
                  "attributes": {}
         | 
| 112 | 
            +
                }
         | 
| 113 | 
            +
              },
         | 
| 114 | 
            +
              "total_flos": 1.1262995069534208e+17,
         | 
| 115 | 
            +
              "train_batch_size": 1,
         | 
| 116 | 
            +
              "trial_name": null,
         | 
| 117 | 
            +
              "trial_params": null
         | 
| 118 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/checkpoint-290/training_args.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:bfbf933697a46f684c9469b3a12c6d925023fb9367017748f0863dd726e69bd6
         | 
| 3 | 
            +
            size 5368
         | 
    	
        fine-tuned-model-8-diff/config.json
    ADDED
    
    | @@ -0,0 +1,48 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "architectures": [
         | 
| 3 | 
            +
                "LlamaForCausalLM"
         | 
| 4 | 
            +
              ],
         | 
| 5 | 
            +
              "attention_bias": false,
         | 
| 6 | 
            +
              "attention_dropout": 0.0,
         | 
| 7 | 
            +
              "bos_token_id": 32013,
         | 
| 8 | 
            +
              "eos_token_id": 32021,
         | 
| 9 | 
            +
              "head_dim": 128,
         | 
| 10 | 
            +
              "hidden_act": "silu",
         | 
| 11 | 
            +
              "hidden_size": 2048,
         | 
| 12 | 
            +
              "initializer_range": 0.02,
         | 
| 13 | 
            +
              "intermediate_size": 5504,
         | 
| 14 | 
            +
              "max_position_embeddings": 16384,
         | 
| 15 | 
            +
              "mlp_bias": false,
         | 
| 16 | 
            +
              "model_type": "llama",
         | 
| 17 | 
            +
              "num_attention_heads": 16,
         | 
| 18 | 
            +
              "num_hidden_layers": 24,
         | 
| 19 | 
            +
              "num_key_value_heads": 16,
         | 
| 20 | 
            +
              "pretraining_tp": 1,
         | 
| 21 | 
            +
              "quantization_config": {
         | 
| 22 | 
            +
                "_load_in_4bit": false,
         | 
| 23 | 
            +
                "_load_in_8bit": true,
         | 
| 24 | 
            +
                "bnb_4bit_compute_dtype": "float32",
         | 
| 25 | 
            +
                "bnb_4bit_quant_storage": "uint8",
         | 
| 26 | 
            +
                "bnb_4bit_quant_type": "fp4",
         | 
| 27 | 
            +
                "bnb_4bit_use_double_quant": false,
         | 
| 28 | 
            +
                "llm_int8_enable_fp32_cpu_offload": false,
         | 
| 29 | 
            +
                "llm_int8_has_fp16_weight": false,
         | 
| 30 | 
            +
                "llm_int8_skip_modules": null,
         | 
| 31 | 
            +
                "llm_int8_threshold": 6.0,
         | 
| 32 | 
            +
                "load_in_4bit": false,
         | 
| 33 | 
            +
                "load_in_8bit": true,
         | 
| 34 | 
            +
                "quant_method": "bitsandbytes"
         | 
| 35 | 
            +
              },
         | 
| 36 | 
            +
              "rms_norm_eps": 1e-06,
         | 
| 37 | 
            +
              "rope_scaling": {
         | 
| 38 | 
            +
                "factor": 4.0,
         | 
| 39 | 
            +
                "rope_type": "linear",
         | 
| 40 | 
            +
                "type": "linear"
         | 
| 41 | 
            +
              },
         | 
| 42 | 
            +
              "rope_theta": 100000,
         | 
| 43 | 
            +
              "tie_word_embeddings": false,
         | 
| 44 | 
            +
              "torch_dtype": "float16",
         | 
| 45 | 
            +
              "transformers_version": "4.50.3",
         | 
| 46 | 
            +
              "use_cache": true,
         | 
| 47 | 
            +
              "vocab_size": 32023
         | 
| 48 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/generation_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_from_model_config": true,
         | 
| 3 | 
            +
              "bos_token_id": 32013,
         | 
| 4 | 
            +
              "eos_token_id": 32021,
         | 
| 5 | 
            +
              "transformers_version": "4.50.3"
         | 
| 6 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:35e898f2cc274a3643faba76c1f7cf67c4b0dff32de783b651ae16eab211a0f5
         | 
| 3 | 
            +
            size 1478884408
         | 
    	
        fine-tuned-model-8-diff/runs/Apr07_12-28-50_DESKTOP-SMJC97K/events.out.tfevents.1744054130.DESKTOP-SMJC97K.14268.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:223a711057133f1c2144ec1f82748ba969e8d3108045f30347d8e7a84af00766
         | 
| 3 | 
            +
            size 5684
         | 
    	
        fine-tuned-model-8-diff/runs/Apr07_12-37-48_DESKTOP-SMJC97K/events.out.tfevents.1744054670.DESKTOP-SMJC97K.14268.1
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:19d02090dc8e7bf509e22fd48cf486d8d2043cfb97bf80ed75c7987dca31f743
         | 
| 3 | 
            +
            size 5683
         | 
    	
        fine-tuned-model-8-diff/runs/Apr07_12-48-05_DESKTOP-SMJC97K/events.out.tfevents.1744055285.DESKTOP-SMJC97K.21244.0
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b117af2bf31245e5e24208cbecbb079a3585cb2f1827a4d34e9229f1018d0bbb
         | 
| 3 | 
            +
            size 8429
         | 
    	
        fine-tuned-model-8-diff/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,26 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                {
         | 
| 4 | 
            +
                  "content": "<|endofsql|>",
         | 
| 5 | 
            +
                  "lstrip": false,
         | 
| 6 | 
            +
                  "normalized": false,
         | 
| 7 | 
            +
                  "rstrip": false,
         | 
| 8 | 
            +
                  "single_word": false
         | 
| 9 | 
            +
                }
         | 
| 10 | 
            +
              ],
         | 
| 11 | 
            +
              "bos_token": {
         | 
| 12 | 
            +
                "content": "<|begin▁of▁sentence|>",
         | 
| 13 | 
            +
                "lstrip": false,
         | 
| 14 | 
            +
                "normalized": true,
         | 
| 15 | 
            +
                "rstrip": false,
         | 
| 16 | 
            +
                "single_word": false
         | 
| 17 | 
            +
              },
         | 
| 18 | 
            +
              "eos_token": "<|endofsql|>",
         | 
| 19 | 
            +
              "pad_token": {
         | 
| 20 | 
            +
                "content": "<|end▁of▁sentence|>",
         | 
| 21 | 
            +
                "lstrip": false,
         | 
| 22 | 
            +
                "normalized": true,
         | 
| 23 | 
            +
                "rstrip": false,
         | 
| 24 | 
            +
                "single_word": false
         | 
| 25 | 
            +
              }
         | 
| 26 | 
            +
            }
         | 
    	
        fine-tuned-model-8-diff/tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        fine-tuned-model-8-diff/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,206 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_bos_token": true,
         | 
| 3 | 
            +
              "add_eos_token": false,
         | 
| 4 | 
            +
              "add_prefix_space": null,
         | 
| 5 | 
            +
              "added_tokens_decoder": {
         | 
| 6 | 
            +
                "32000": {
         | 
| 7 | 
            +
                  "content": "õ",
         | 
| 8 | 
            +
                  "lstrip": false,
         | 
| 9 | 
            +
                  "normalized": true,
         | 
| 10 | 
            +
                  "rstrip": false,
         | 
| 11 | 
            +
                  "single_word": false,
         | 
| 12 | 
            +
                  "special": false
         | 
| 13 | 
            +
                },
         | 
| 14 | 
            +
                "32001": {
         | 
| 15 | 
            +
                  "content": "÷",
         | 
| 16 | 
            +
                  "lstrip": false,
         | 
| 17 | 
            +
                  "normalized": true,
         | 
| 18 | 
            +
                  "rstrip": false,
         | 
| 19 | 
            +
                  "single_word": false,
         | 
| 20 | 
            +
                  "special": false
         | 
| 21 | 
            +
                },
         | 
| 22 | 
            +
                "32002": {
         | 
| 23 | 
            +
                  "content": "Á",
         | 
| 24 | 
            +
                  "lstrip": false,
         | 
| 25 | 
            +
                  "normalized": true,
         | 
| 26 | 
            +
                  "rstrip": false,
         | 
| 27 | 
            +
                  "single_word": false,
         | 
| 28 | 
            +
                  "special": false
         | 
| 29 | 
            +
                },
         | 
| 30 | 
            +
                "32003": {
         | 
| 31 | 
            +
                  "content": "ý",
         | 
| 32 | 
            +
                  "lstrip": false,
         | 
| 33 | 
            +
                  "normalized": true,
         | 
| 34 | 
            +
                  "rstrip": false,
         | 
| 35 | 
            +
                  "single_word": false,
         | 
| 36 | 
            +
                  "special": false
         | 
| 37 | 
            +
                },
         | 
| 38 | 
            +
                "32004": {
         | 
| 39 | 
            +
                  "content": "À",
         | 
| 40 | 
            +
                  "lstrip": false,
         | 
| 41 | 
            +
                  "normalized": true,
         | 
| 42 | 
            +
                  "rstrip": false,
         | 
| 43 | 
            +
                  "single_word": false,
         | 
| 44 | 
            +
                  "special": false
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                "32005": {
         | 
| 47 | 
            +
                  "content": "ÿ",
         | 
| 48 | 
            +
                  "lstrip": false,
         | 
| 49 | 
            +
                  "normalized": true,
         | 
| 50 | 
            +
                  "rstrip": false,
         | 
| 51 | 
            +
                  "single_word": false,
         | 
| 52 | 
            +
                  "special": false
         | 
| 53 | 
            +
                },
         | 
| 54 | 
            +
                "32006": {
         | 
| 55 | 
            +
                  "content": "ø",
         | 
| 56 | 
            +
                  "lstrip": false,
         | 
| 57 | 
            +
                  "normalized": true,
         | 
| 58 | 
            +
                  "rstrip": false,
         | 
| 59 | 
            +
                  "single_word": false,
         | 
| 60 | 
            +
                  "special": false
         | 
| 61 | 
            +
                },
         | 
| 62 | 
            +
                "32007": {
         | 
| 63 | 
            +
                  "content": "ú",
         | 
| 64 | 
            +
                  "lstrip": false,
         | 
| 65 | 
            +
                  "normalized": true,
         | 
| 66 | 
            +
                  "rstrip": false,
         | 
| 67 | 
            +
                  "single_word": false,
         | 
| 68 | 
            +
                  "special": false
         | 
| 69 | 
            +
                },
         | 
| 70 | 
            +
                "32008": {
         | 
| 71 | 
            +
                  "content": "þ",
         | 
| 72 | 
            +
                  "lstrip": false,
         | 
| 73 | 
            +
                  "normalized": true,
         | 
| 74 | 
            +
                  "rstrip": false,
         | 
| 75 | 
            +
                  "single_word": false,
         | 
| 76 | 
            +
                  "special": false
         | 
| 77 | 
            +
                },
         | 
| 78 | 
            +
                "32009": {
         | 
| 79 | 
            +
                  "content": "ü",
         | 
| 80 | 
            +
                  "lstrip": false,
         | 
| 81 | 
            +
                  "normalized": true,
         | 
| 82 | 
            +
                  "rstrip": false,
         | 
| 83 | 
            +
                  "single_word": false,
         | 
| 84 | 
            +
                  "special": false
         | 
| 85 | 
            +
                },
         | 
| 86 | 
            +
                "32010": {
         | 
| 87 | 
            +
                  "content": "ù",
         | 
| 88 | 
            +
                  "lstrip": false,
         | 
| 89 | 
            +
                  "normalized": true,
         | 
| 90 | 
            +
                  "rstrip": false,
         | 
| 91 | 
            +
                  "single_word": false,
         | 
| 92 | 
            +
                  "special": false
         | 
| 93 | 
            +
                },
         | 
| 94 | 
            +
                "32011": {
         | 
| 95 | 
            +
                  "content": "ö",
         | 
| 96 | 
            +
                  "lstrip": false,
         | 
| 97 | 
            +
                  "normalized": true,
         | 
| 98 | 
            +
                  "rstrip": false,
         | 
| 99 | 
            +
                  "single_word": false,
         | 
| 100 | 
            +
                  "special": false
         | 
| 101 | 
            +
                },
         | 
| 102 | 
            +
                "32012": {
         | 
| 103 | 
            +
                  "content": "û",
         | 
| 104 | 
            +
                  "lstrip": false,
         | 
| 105 | 
            +
                  "normalized": true,
         | 
| 106 | 
            +
                  "rstrip": false,
         | 
| 107 | 
            +
                  "single_word": false,
         | 
| 108 | 
            +
                  "special": false
         | 
| 109 | 
            +
                },
         | 
| 110 | 
            +
                "32013": {
         | 
| 111 | 
            +
                  "content": "<|begin▁of▁sentence|>",
         | 
| 112 | 
            +
                  "lstrip": false,
         | 
| 113 | 
            +
                  "normalized": true,
         | 
| 114 | 
            +
                  "rstrip": false,
         | 
| 115 | 
            +
                  "single_word": false,
         | 
| 116 | 
            +
                  "special": true
         | 
| 117 | 
            +
                },
         | 
| 118 | 
            +
                "32014": {
         | 
| 119 | 
            +
                  "content": "<|end▁of▁sentence|>",
         | 
| 120 | 
            +
                  "lstrip": false,
         | 
| 121 | 
            +
                  "normalized": true,
         | 
| 122 | 
            +
                  "rstrip": false,
         | 
| 123 | 
            +
                  "single_word": false,
         | 
| 124 | 
            +
                  "special": true
         | 
| 125 | 
            +
                },
         | 
| 126 | 
            +
                "32015": {
         | 
| 127 | 
            +
                  "content": "<|fim▁hole|>",
         | 
| 128 | 
            +
                  "lstrip": false,
         | 
| 129 | 
            +
                  "normalized": true,
         | 
| 130 | 
            +
                  "rstrip": false,
         | 
| 131 | 
            +
                  "single_word": false,
         | 
| 132 | 
            +
                  "special": false
         | 
| 133 | 
            +
                },
         | 
| 134 | 
            +
                "32016": {
         | 
| 135 | 
            +
                  "content": "<|fim▁begin|>",
         | 
| 136 | 
            +
                  "lstrip": false,
         | 
| 137 | 
            +
                  "normalized": true,
         | 
| 138 | 
            +
                  "rstrip": false,
         | 
| 139 | 
            +
                  "single_word": false,
         | 
| 140 | 
            +
                  "special": false
         | 
| 141 | 
            +
                },
         | 
| 142 | 
            +
                "32017": {
         | 
| 143 | 
            +
                  "content": "<|fim▁end|>",
         | 
| 144 | 
            +
                  "lstrip": false,
         | 
| 145 | 
            +
                  "normalized": true,
         | 
| 146 | 
            +
                  "rstrip": false,
         | 
| 147 | 
            +
                  "single_word": false,
         | 
| 148 | 
            +
                  "special": false
         | 
| 149 | 
            +
                },
         | 
| 150 | 
            +
                "32018": {
         | 
| 151 | 
            +
                  "content": "<pad>",
         | 
| 152 | 
            +
                  "lstrip": false,
         | 
| 153 | 
            +
                  "normalized": true,
         | 
| 154 | 
            +
                  "rstrip": false,
         | 
| 155 | 
            +
                  "single_word": false,
         | 
| 156 | 
            +
                  "special": false
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                "32019": {
         | 
| 159 | 
            +
                  "content": "<|User|>",
         | 
| 160 | 
            +
                  "lstrip": false,
         | 
| 161 | 
            +
                  "normalized": true,
         | 
| 162 | 
            +
                  "rstrip": false,
         | 
| 163 | 
            +
                  "single_word": false,
         | 
| 164 | 
            +
                  "special": false
         | 
| 165 | 
            +
                },
         | 
| 166 | 
            +
                "32020": {
         | 
| 167 | 
            +
                  "content": "<|Assistant|>",
         | 
| 168 | 
            +
                  "lstrip": false,
         | 
| 169 | 
            +
                  "normalized": true,
         | 
| 170 | 
            +
                  "rstrip": false,
         | 
| 171 | 
            +
                  "single_word": false,
         | 
| 172 | 
            +
                  "special": false
         | 
| 173 | 
            +
                },
         | 
| 174 | 
            +
                "32021": {
         | 
| 175 | 
            +
                  "content": "<|EOT|>",
         | 
| 176 | 
            +
                  "lstrip": false,
         | 
| 177 | 
            +
                  "normalized": true,
         | 
| 178 | 
            +
                  "rstrip": false,
         | 
| 179 | 
            +
                  "single_word": false,
         | 
| 180 | 
            +
                  "special": true
         | 
| 181 | 
            +
                },
         | 
| 182 | 
            +
                "32022": {
         | 
| 183 | 
            +
                  "content": "<|endofsql|>",
         | 
| 184 | 
            +
                  "lstrip": false,
         | 
| 185 | 
            +
                  "normalized": false,
         | 
| 186 | 
            +
                  "rstrip": false,
         | 
| 187 | 
            +
                  "single_word": false,
         | 
| 188 | 
            +
                  "special": true
         | 
| 189 | 
            +
                }
         | 
| 190 | 
            +
              },
         | 
| 191 | 
            +
              "additional_special_tokens": [
         | 
| 192 | 
            +
                "<|endofsql|>"
         | 
| 193 | 
            +
              ],
         | 
| 194 | 
            +
              "bos_token": "<|begin▁of▁sentence|>",
         | 
| 195 | 
            +
              "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
         | 
| 196 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 197 | 
            +
              "eos_token": "<|endofsql|>",
         | 
| 198 | 
            +
              "extra_special_tokens": {},
         | 
| 199 | 
            +
              "legacy": true,
         | 
| 200 | 
            +
              "model_max_length": 16384,
         | 
| 201 | 
            +
              "pad_token": "<|end▁of▁sentence|>",
         | 
| 202 | 
            +
              "sp_model_kwargs": {},
         | 
| 203 | 
            +
              "tokenizer_class": "LlamaTokenizerFast",
         | 
| 204 | 
            +
              "unk_token": null,
         | 
| 205 | 
            +
              "use_default_system_prompt": false
         | 
| 206 | 
            +
            }
         | 
    	
        finetune_model.ipynb
    CHANGED
    
    | @@ -220,14 +220,30 @@ | |
| 220 | 
             
              },
         | 
| 221 | 
             
              {
         | 
| 222 | 
             
               "cell_type": "code",
         | 
| 223 | 
            -
               "execution_count":  | 
| 224 | 
             
               "metadata": {},
         | 
| 225 | 
             
               "outputs": [
         | 
| 226 | 
             
                {
         | 
| 227 | 
             
                 "name": "stderr",
         | 
| 228 | 
             
                 "output_type": "stream",
         | 
| 229 | 
             
                 "text": [
         | 
| 230 | 
            -
                  " | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 231 | 
             
                  "  df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n"
         | 
| 232 | 
             
                 ]
         | 
| 233 | 
             
                },
         | 
| @@ -251,15 +267,14 @@ | |
| 251 | 
             
                  "4  SELECT AVG(ast_home) FROM game WHERE team_abbr...           26.51355662  \n",
         | 
| 252 | 
             
                  "adding!\n",
         | 
| 253 | 
             
                  "32022\n",
         | 
| 254 | 
            -
                  "32023\n" | 
| 255 | 
            -
                  "Max: 3156 | 95th percentile: 3002.85\n"
         | 
| 256 | 
             
                 ]
         | 
| 257 | 
             
                },
         | 
| 258 | 
             
                {
         | 
| 259 | 
             
                 "name": "stderr",
         | 
| 260 | 
             
                 "output_type": "stream",
         | 
| 261 | 
             
                 "text": [
         | 
| 262 | 
            -
                  "Map: 100%|██████████| 1044/1044 [ | 
| 263 | 
             
                 ]
         | 
| 264 | 
             
                },
         | 
| 265 | 
             
                {
         | 
| @@ -268,7 +283,7 @@ | |
| 268 | 
             
                 "text": [
         | 
| 269 | 
             
                  "939\n",
         | 
| 270 | 
             
                  "105\n",
         | 
| 271 | 
            -
                  "0\n"
         | 
| 272 | 
             
                 ]
         | 
| 273 | 
             
                },
         | 
| 274 | 
             
                {
         | 
| @@ -332,67 +347,42 @@ | |
| 332 | 
             
                "print(len(tokenizer)) \n",
         | 
| 333 | 
             
                "\n",
         | 
| 334 | 
             
                "tokenizer.truncation_side = \"left\"\n",
         | 
| 335 | 
            -
                "tokenizer.pad_token = tokenizer.eos_token\n",
         | 
| 336 | 
            -
                "model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
         | 
| 337 | 
            -
                "\n",
         | 
| 338 | 
            -
                "all_lengths = [len(tokenizer(f\"{input_prompt}{q}\\nSQLite: \\n{a}<|endofsql|>\")[\"input_ids\"])\n",
         | 
| 339 | 
            -
                "               for q, a in zip(df[\"natural_query\"], df[\"sql_query\"])]\n",
         | 
| 340 | 
            -
                "\n",
         | 
| 341 | 
            -
                "print(f\"Max: {max(all_lengths)} | 95th percentile: {np.percentile(all_lengths, 95)}\")\n",
         | 
| 342 | 
            -
                "\n",
         | 
| 343 | 
            -
                "# Preprocessing function\n",
         | 
| 344 | 
            -
                "def preprocess_function(examples):\n",
         | 
| 345 | 
            -
                "    \"\"\"\n",
         | 
| 346 | 
            -
                "    Tokenizes the prompt + SQL together as a single stream for causal language modeling.\n",
         | 
| 347 | 
            -
                "    Masks out the prompt portion from the loss.\n",
         | 
| 348 | 
            -
                "    \"\"\"\n",
         | 
| 349 | 
            -
                "    special_token = \"<|endofsql|>\"\n",
         | 
| 350 | 
            -
                "\n",
         | 
| 351 | 
            -
                "    prompt_texts = [\n",
         | 
| 352 | 
            -
                "        f\"{input_prompt}{natural_query}\\nSQLite: \\n{sql_query}{special_token}\"\n",
         | 
| 353 | 
            -
                "        for natural_query, sql_query in zip(examples[\"natural_query\"], examples[\"sql_query\"])\n",
         | 
| 354 | 
            -
                "    ]\n",
         | 
| 355 | 
            -
                "\n",
         | 
| 356 | 
            -
                "    # Tokenize everything in one shot\n",
         | 
| 357 | 
            -
                "    inputs = tokenizer(prompt_texts, truncation=True, padding=True, max_length=3156)\n",
         | 
| 358 | 
            -
                "    input_ids = inputs[\"input_ids\"]\n",
         | 
| 359 | 
            -
                "    labels = []\n",
         | 
| 360 | 
            -
                "\n",
         | 
| 361 | 
            -
                "    for i, input_id in enumerate(input_ids):\n",
         | 
| 362 | 
            -
                "        # Tokenize prompt portion (everything before the SQL query)\n",
         | 
| 363 | 
            -
                "        prompt_only = f\"{input_prompt}{examples['natural_query'][i]}\\nSQLite: \\n\"\n",
         | 
| 364 | 
            -
                "        prompt_ids = tokenizer(prompt_only, truncation=True, padding=True, max_length=3156)[\"input_ids\"]\n",
         | 
| 365 | 
             
                "\n",
         | 
| 366 | 
            -
                " | 
| 367 | 
            -
                " | 
|  | |
|  | |
| 368 | 
             
                "\n",
         | 
| 369 | 
            -
                " | 
| 370 | 
            -
                "        label[:len(prompt_ids)] = [-100] * len(prompt_ids)\n",
         | 
| 371 | 
            -
                "\n",
         | 
| 372 | 
            -
                "        # Sanity check: All label tokens must be valid or -100\n",
         | 
| 373 | 
            -
                "        for token in label:\n",
         | 
| 374 | 
            -
                "            assert token == -100 or (0 <= token < len(tokenizer)), f\"Invalid token ID {token}\"\n",
         | 
| 375 | 
            -
                "\n",
         | 
| 376 | 
            -
                "        labels.append(label)\n",
         | 
| 377 | 
            -
                "\n",
         | 
| 378 | 
            -
                "    inputs[\"labels\"] = labels\n",
         | 
| 379 | 
            -
                "    return inputs\n",
         | 
| 380 | 
            -
                "    \"\"\"\n",
         | 
| 381 | 
             
                "    tokenized = tokenizer(\n",
         | 
| 382 | 
            -
                "         | 
| 383 | 
            -
                "        padding=\"max_length\",\n",
         | 
| 384 | 
             
                "        truncation=True,\n",
         | 
| 385 | 
            -
                "        max_length | 
|  | |
| 386 | 
             
                "    )\n",
         | 
| 387 | 
             
                "\n",
         | 
| 388 | 
            -
                "     | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 389 | 
             
                "    return tokenized\n",
         | 
| 390 | 
            -
                "    \"\"\"\n",
         | 
| 391 | 
            -
                "# Convert to Hugging Face Dataset\n",
         | 
| 392 | 
            -
                "dataset = Dataset.from_pandas(df)\n",
         | 
| 393 | 
             
                "\n",
         | 
| 394 | 
            -
                "#  | 
| 395 | 
            -
                " | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 396 | 
             
                "\n",
         | 
| 397 | 
             
                "# Split into train/validation\n",
         | 
| 398 | 
             
                "split = int(0.9 * len(tokenized_dataset))  # 90% train, 10% validation\n",
         | 
| @@ -402,7 +392,7 @@ | |
| 402 | 
             
                "print(len(train_dataset))\n",
         | 
| 403 | 
             
                "print(len(val_dataset))\n",
         | 
| 404 | 
             
                "\n",
         | 
| 405 | 
            -
                "for v in  | 
| 406 | 
             
                "    print(v)\n",
         | 
| 407 | 
             
                "    break"
         | 
| 408 | 
             
               ]
         | 
| @@ -416,7 +406,7 @@ | |
| 416 | 
             
              },
         | 
| 417 | 
             
              {
         | 
| 418 | 
             
               "cell_type": "code",
         | 
| 419 | 
            -
               "execution_count":  | 
| 420 | 
             
               "metadata": {},
         | 
| 421 | 
             
               "outputs": [
         | 
| 422 | 
             
                {
         | 
| @@ -461,7 +451,7 @@ | |
| 461 | 
             
              },
         | 
| 462 | 
             
              {
         | 
| 463 | 
             
               "cell_type": "code",
         | 
| 464 | 
            -
               "execution_count":  | 
| 465 | 
             
               "metadata": {},
         | 
| 466 | 
             
               "outputs": [
         | 
| 467 | 
             
                {
         | 
| @@ -470,7 +460,7 @@ | |
| 470 | 
             
                 "text": [
         | 
| 471 | 
             
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
         | 
| 472 | 
             
                  "  warnings.warn(\n",
         | 
| 473 | 
            -
                  "C:\\Users\\Dean\\AppData\\Local\\Temp\\ | 
| 474 | 
             
                  "  trainer = Trainer(\n",
         | 
| 475 | 
             
                  "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
         | 
| 476 | 
             
                 ]
         | 
| @@ -478,13 +468,13 @@ | |
| 478 | 
             
               ],
         | 
| 479 | 
             
               "source": [
         | 
| 480 | 
             
                "training_args = TrainingArguments(\n",
         | 
| 481 | 
            -
                "    output_dir=\"./fine-tuned-model-8\",\n",
         | 
| 482 | 
             
                "    evaluation_strategy=\"epoch\",  # Evaluate at the end of each epoch\n",
         | 
| 483 | 
             
                "    save_strategy=\"epoch\",  # Save model every epoch\n",
         | 
| 484 | 
             
                "    per_device_train_batch_size=1,  # LoRA allows higher batch size\n",
         | 
| 485 | 
             
                "    per_device_eval_batch_size=1,\n",
         | 
| 486 | 
             
                "    gradient_accumulation_steps=16,\n",
         | 
| 487 | 
            -
                "    num_train_epochs= | 
| 488 | 
             
                "    learning_rate=4e-5,  # Higher LR since we're only training LoRA layers\n",
         | 
| 489 | 
             
                "    weight_decay=0.01,\n",
         | 
| 490 | 
             
                "    logging_steps=50,  # Print loss every 50 steps\n",
         | 
| @@ -516,13 +506,99 @@ | |
| 516 | 
             
              },
         | 
| 517 | 
             
              {
         | 
| 518 | 
             
               "cell_type": "code",
         | 
| 519 | 
            -
               "execution_count":  | 
| 520 | 
             
               "metadata": {},
         | 
| 521 | 
             
               "outputs": [
         | 
| 522 | 
             
                {
         | 
| 523 | 
             
                 "name": "stderr",
         | 
| 524 | 
             
                 "output_type": "stream",
         | 
| 525 | 
             
                 "text": [
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 526 | 
             
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\tuners\\lora\\bnb.py:85: UserWarning: Merge lora module to 8-bit linear may get different generations due to rounding errors.\n",
         | 
| 527 | 
             
                  "  warnings.warn(\n"
         | 
| 528 | 
             
                 ]
         | 
| @@ -530,24 +606,24 @@ | |
| 530 | 
             
                {
         | 
| 531 | 
             
                 "data": {
         | 
| 532 | 
             
                  "text/plain": [
         | 
| 533 | 
            -
                   "('./fine-tuned-model-8\\\\tokenizer_config.json',\n",
         | 
| 534 | 
            -
                   " './fine-tuned-model-8\\\\special_tokens_map.json',\n",
         | 
| 535 | 
            -
                   " './fine-tuned-model-8\\\\tokenizer.json')"
         | 
| 536 | 
             
                  ]
         | 
| 537 | 
             
                 },
         | 
| 538 | 
            -
                 "execution_count":  | 
| 539 | 
             
                 "metadata": {},
         | 
| 540 | 
             
                 "output_type": "execute_result"
         | 
| 541 | 
             
                }
         | 
| 542 | 
             
               ],
         | 
| 543 | 
             
               "source": [
         | 
| 544 | 
             
                "# Run training\n",
         | 
| 545 | 
            -
                " | 
| 546 | 
             
                "\n",
         | 
| 547 | 
             
                "# Merge LoRA adapters with the base model before saving\n",
         | 
| 548 | 
             
                "model = model.merge_and_unload()\n",
         | 
| 549 | 
            -
                "model.save_pretrained(\"./fine-tuned-model-8\")\n",
         | 
| 550 | 
            -
                "tokenizer.save_pretrained(\"./fine-tuned-model-8\")"
         | 
| 551 | 
             
               ]
         | 
| 552 | 
             
              },
         | 
| 553 | 
             
              {
         | 
| @@ -559,13 +635,15 @@ | |
| 559 | 
             
              },
         | 
| 560 | 
             
              {
         | 
| 561 | 
             
               "cell_type": "code",
         | 
| 562 | 
            -
               "execution_count":  | 
| 563 | 
             
               "metadata": {},
         | 
| 564 | 
             
               "outputs": [
         | 
| 565 | 
             
                {
         | 
| 566 | 
             
                 "name": "stderr",
         | 
| 567 | 
             
                 "output_type": "stream",
         | 
| 568 | 
             
                 "text": [
         | 
|  | |
|  | |
| 569 | 
             
                  "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
         | 
| 570 | 
             
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 571 | 
             
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
         | 
| @@ -582,22 +660,20 @@ | |
| 582 | 
             
                  "\n",
         | 
| 583 | 
             
                  "Explanation: The AVG() function is used to calculate the average of a set of values. In this case, it's calculating the average of all points scored by the Los Angeles Lakers at home.\n",
         | 
| 584 | 
             
                  "\n",
         | 
| 585 | 
            -
                  "Note: The query assumes that the pts_home  | 
| 586 | 
             
                  "\n",
         | 
| 587 | 
            -
                  " | 
| 588 | 
            -
                  "How many points to the Los Angeles Lakers average at home?\n",
         | 
| 589 | 
             
                  "\n",
         | 
| 590 | 
            -
                  " | 
| 591 | 
            -
                  "\n",
         | 
| 592 | 
            -
                  "Explanation: The AVG() function is used to calculate the average of a set of values. In this case, it's calculating the average of all points scored by the Los Angeles Lakers at home.\n",
         | 
| 593 | 
             
                  "\n",
         | 
| 594 | 
            -
                  " | 
|  | |
| 595 | 
             
                 ]
         | 
| 596 | 
             
                }
         | 
| 597 | 
             
               ],
         | 
| 598 | 
             
               "source": [
         | 
| 599 | 
            -
                "model = AutoModelForCausalLM.from_pretrained(\"./fine-tuned-model-8\", torch_dtype=torch.bfloat16, device_map=device)\n",
         | 
| 600 | 
            -
                "tokenizer = AutoTokenizer.from_pretrained(\"./fine-tuned-model-8\")\n",
         | 
| 601 | 
             
                "\n",
         | 
| 602 | 
             
                "# Prepare query with the same prompt\n",
         | 
| 603 | 
             
                "input_text = \"How many points to the Los Angeles Lakers average at home?\"\n",
         | 
| @@ -614,6 +690,32 @@ | |
| 614 | 
             
                "\n",
         | 
| 615 | 
             
                "print(\"Generated SQL:\", query_output)"
         | 
| 616 | 
             
               ]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 617 | 
             
              }
         | 
| 618 | 
             
             ],
         | 
| 619 | 
             
             "metadata": {
         | 
|  | |
| 220 | 
             
              },
         | 
| 221 | 
             
              {
         | 
| 222 | 
             
               "cell_type": "code",
         | 
| 223 | 
            +
               "execution_count": 2,
         | 
| 224 | 
             
               "metadata": {},
         | 
| 225 | 
             
               "outputs": [
         | 
| 226 | 
             
                {
         | 
| 227 | 
             
                 "name": "stderr",
         | 
| 228 | 
             
                 "output_type": "stream",
         | 
| 229 | 
             
                 "text": [
         | 
| 230 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
         | 
| 231 | 
            +
                  "  from .autonotebook import tqdm as notebook_tqdm\n"
         | 
| 232 | 
            +
                 ]
         | 
| 233 | 
            +
                },
         | 
| 234 | 
            +
                {
         | 
| 235 | 
            +
                 "name": "stdout",
         | 
| 236 | 
            +
                 "output_type": "stream",
         | 
| 237 | 
            +
                 "text": [
         | 
| 238 | 
            +
                  "WARNING:tensorflow:From c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
         | 
| 239 | 
            +
                  "\n"
         | 
| 240 | 
            +
                 ]
         | 
| 241 | 
            +
                },
         | 
| 242 | 
            +
                {
         | 
| 243 | 
            +
                 "name": "stderr",
         | 
| 244 | 
            +
                 "output_type": "stream",
         | 
| 245 | 
            +
                 "text": [
         | 
| 246 | 
            +
                  "C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_21244\\3393038659.py:14: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
         | 
| 247 | 
             
                  "  df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n"
         | 
| 248 | 
             
                 ]
         | 
| 249 | 
             
                },
         | 
|  | |
| 267 | 
             
                  "4  SELECT AVG(ast_home) FROM game WHERE team_abbr...           26.51355662  \n",
         | 
| 268 | 
             
                  "adding!\n",
         | 
| 269 | 
             
                  "32022\n",
         | 
| 270 | 
            +
                  "32023\n"
         | 
|  | |
| 271 | 
             
                 ]
         | 
| 272 | 
             
                },
         | 
| 273 | 
             
                {
         | 
| 274 | 
             
                 "name": "stderr",
         | 
| 275 | 
             
                 "output_type": "stream",
         | 
| 276 | 
             
                 "text": [
         | 
| 277 | 
            +
                  "Map: 100%|██████████| 1044/1044 [00:22<00:00, 47.37 examples/s]"
         | 
| 278 | 
             
                 ]
         | 
| 279 | 
             
                },
         | 
| 280 | 
             
                {
         | 
|  | |
| 283 | 
             
                 "text": [
         | 
| 284 | 
             
                  "939\n",
         | 
| 285 | 
             
                  "105\n",
         | 
| 286 | 
            +
                  "{'input_ids': [32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32013, 2042, 417, 274, 20926, 20391, 344, 6145, 1267, 3881, 4694, 12780, 878, 4629, 5975, 547, 12780, 13, 185, 14108, 11593, 732, 285, 2066, 11767, 715, 185, 185, 21598, 6922, 185, 50, 577, 379, 1748, 782, 461, 8443, 9474, 13, 185, 13403, 11866, 15787, 5787, 7449, 30862, 440, 21598, 1, 334, 185, 207, 440, 304, 1, 323, 13532, 24590, 14356, 11, 730, 1585, 1198, 2710, 21411, 327, 254, 2547, 185, 207, 440, 9875, 62, 1523, 1, 323, 13532, 11, 3137, 1585, 11417, 6270, 1208, 280, 254, 2547, 334, 68, 13, 70, 1787, 440, 43, 378, 14204, 412, 9961, 2456, 185, 207, 440, 356, 26321, 335, 1, 323, 13532, 11, 436, 1585, 15545, 2942, 2547, 1208, 334, 68, 13, 70, 1787, 440, 43, 1743, 2456, 185, 207, 440, 77, 767, 1523, 1, 323, 13532, 11, 655, 1585, 15389, 326, 1219, 25229, 1523, 327, 254, 2547, 334, 68, 13, 70, 1787, 440, 43, 9961, 2456, 185, 207, 440, 23861, 1, 323, 13532, 11, 1044, 1585, 5174, 1064, 254, 2547, 317, 2842, 185, 207, 440, 4968, 1, 323, 13532, 11, 4885, 1585, 4734, 1064, 254, 2547, 317, 6288, 185, 207, 440, 5456, 62, 10246, 271, 1, 5878, 1743, 294, 1585, 10971, 254, 2547, 438, 8143, 185, 477, 185, 185, 14641, 6922, 185, 29133, 9339, 13024, 327, 1317, 461, 8443, 2612, 11, 2837, 1712, 285, 2292, 2547, 3779, 13, 185, 13403, 11866, 15787, 5787, 7449, 30862, 440, 14641, 1, 334, 185, 207, 440, 21810, 62, 304, 1, 323, 13532, 11, 655, 1585, 23825, 21411, 11, 31131, 372, 440, 17, 19393, 19393, 1, 334, 68, 13, 70, 1787, 440, 17, 16, 24, 22, 15, 1, 327, 254, 207, 16, 24, 22, 15, 4314, 8, 185, 207, 440, 21598, 62, 304, 62, 5816, 1, 323, 13532, 11, 294, 1585, 4982, 280, 254, 1712, 2547, 334, 3101, 3238, 440, 304, 1, 279, 2547, 2365, 8, 185, 207, 440, 21598, 62, 356, 26321, 335, 62, 5816, 1, 323, 13532, 11, 1585, 4196, 26321, 335, 280, 254, 1712, 2547, 185, 207, 440, 21598, 62, 1523, 62, 5816, 1, 323, 13532, 11, 1032, 1585, 11417, 1208, 280, 254, 1712, 2547, 185, 207, 440, 14641, 62, 304, 1, 323, 13532, 24590, 14356, 11, 207, 1585, 1198, 2710, 21411, 327, 254, 2612, 185, 207, 440, 14641, 62, 1984, 1, 323, 10920, 1428, 17483, 11, 1032, 1585, 9312, 254, 2612, 438, 7226, 334, 19393, 19393, 12, 8213, 12, 7127, 4797, 8, 185, 207, 440, 10108, 393, 62, 5816, 1, 323, 13532, 11, 294, 1585, 23772, 393, 4283, 2837, 25999, 334, 68, 13, 70, 1787, 440, 43, 1743, 7617, 13, 380, 2951, 2456, 185, 207, 440, 13443, 62, 5816, 1, 323, 13532, 11, 3462, 1585, 440, 54, 1, 562, 254, 1712, 2547, 2103, 11, 440, 43, 1, 562, 653, 4726, 185, 207, 440, 1513, 1, 3379, 4463, 18924, 11, 4885, 1585, 19090, 4054, 7226, 279, 254, 2612, 185, 207, 440, 17982, 76, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 11739, 9054, 1396, 457, 254, 1712, 2547, 185, 207, 440, 69, 2417, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 11739, 9054, 18012, 457, 254, 1712, 2547, 185, 207, 440, 17982, 62, 79, 296, 62, 5816, 1, 5878, 1743, 11, 1574, 1585, 11739, 6206, 14986, 280, 254, 1712, 2547, 185, 207, 440, 17982, 18, 76, 62, 5816, 1, 5878, 1743, 11, 655, 1585, 14910, 12, 3772, 2010, 9054, 1396, 457, 254, 1712, 2547, 185, 207, 440, 17982, 18, 64, 62, 5816, 1, 5878, 1743, 11, 655, 1585, 14910, 12, 3772, 15343, 457, 254, 1712, 2547, 185, 207, 440, 17982, 18, 62, 79, 296, 62, 5816, 1, 5878, 1743, 11, 294, 1585, 14910, 12, 3772, 2010, 6206, 14986, 280, 254, 1712, 2547, 185, 207, 440, 659, 76, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 7264, 8474, 1396, 457, 254, 1712, 2547, 185, 207, 440, 659, 64, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 7264, 8474, 18012, 457, 254, 1712, 2547, 185, 207, 440, 659, 62, 79, 296, 62, 5816, 1, 5878, 1743, 11, 1574, 1585, 7264, 5245, 14986, 280, 254, 1712, 2547, 185, 207, 440, 419, 65, 62, 5816, 1, 5878, 1743, 11, 655, 1585, 6050, 4630, 11435, 5740, 457, 254, 1712, 2547, 185, 207, 440, 67, 248, 65, 62, 5816, 1, 5878, 1743, 11, 655, 1585, 5855, 4630, 11435, 5740, 457, 254, 1712, 2547, 185, 207, 440, 248, 65, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 19090, 11435, 5740, 457, 254, 1712, 2547, 185, 207, 440, 537, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 3512, 1923, 457, 254, 1712, 2547, 185, 207, 440, 292, 75, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 3725, 909, 457, 254, 1712, 2547, 185, 207, 440, 1638, 74, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 380, 19722, 457, 254, 1712, 2547, 185, 207, 440, 577, 85, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 13974, 17396, 457, 254, 1712, 2547, 185, 207, 440, 26862, 62, 5816, 1, 5878, 1743, 11, 3462, 1585, 20414, 3931, 2724, 457, 254, 1712, 2547, 185, 207, 440, 462, 82, 62, 5816, 1, 5878, 1743, 11, 2481, 1585, 19090, 3472, 18605, 457, 254, 1712, 2547, 185, 207, 440, 13289, 62, 10646, 62, 5816, 1, 3379, 4463, 18924, 11, 243, 1585, 14751, 14, 10646, 14026, 327, 254, 1712, 2547, 185, 207, 440, 12986, 62, 16647, 62, 5816, 1, 3379, 4463, 18924, 11, 1585, 2325, 278, 980, 3192, 3905, 317, 2315, 334, 16, 405, 7589, 11, 207, 15, 405, 2357, 8, 185, 207, 440, 21598, 62, 304, 62, 11507, 1, 323, 13532, 11, 294, 1585, 4982, 280, 254, 2292, 2547, 185, 207, 440, 21598, 62, 356, 26321, 335, 62, 11507, 1, 323, 13532, 11, 1585, 4196, 26321, 335, 280, 254, 2292, 2547, 185, 207, 440, 21598, 62, 1523, 62, 11507, 1, 323, 13532, 11, 1032, 1585, 11417, 1208, 280, 254, 2292, 2547, 185, 207, 440, 10108, 393, 62, 11507, 1, 323, 13532, 11, 294, 1585, 23772, 393, 4283, 473, 254, 2292, 2547, 486, 82, 12422, 185, 207, 440, 13443, 62, 11507, 1, 323, 13532, 11, 3462, 1585, 440, 54, 1, 562, 254, 2292, 2547, 2103, 11, 440, 43, 1, 562, 653, 4726, 185, 207, 440, 17982, 76, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 11739, 9054, 1396, 457, 254, 2292, 2547, 185, 207, 440, 69, 2417, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 11739, 9054, 18012, 457, 254, 2292, 2547, 185, 207, 440, 17982, 62, 79, 296, 62, 11507, 1, 5878, 1743, 11, 1574, 1585, 11739, 6206, 14986, 280, 254, 2292, 2547, 185, 207, 440, 17982, 18, 76, 62, 11507, 1, 5878, 1743, 11, 655, 1585, 14910, 12, 3772, 2010, 9054, 1396, 457, 254, 2292, 2547, 185, 207, 440, 17982, 18, 64, 62, 11507, 1, 5878, 1743, 11, 655, 1585, 14910, 12, 3772, 15343, 457, 254, 2292, 2547, 185, 207, 440, 17982, 18, 62, 79, 296, 62, 11507, 1, 5878, 1743, 11, 294, 1585, 14910, 12, 3772, 2010, 6206, 14986, 280, 254, 2292, 2547, 185, 207, 440, 659, 76, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 7264, 8474, 1396, 457, 254, 2292, 2547, 185, 207, 440, 659, 64, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 7264, 8474, 18012, 457, 254, 2292, 2547, 185, 207, 440, 659, 62, 79, 296, 62, 11507, 1, 5878, 1743, 11, 1574, 1585, 7264, 5245, 14986, 280, 254, 2292, 2547, 185, 207, 440, 419, 65, 62, 11507, 1, 5878, 1743, 11, 655, 1585, 6050, 4630, 11435, 5740, 457, 254, 2292, 2547, 185, 207, 440, 67, 248, 65, 62, 11507, 1, 5878, 1743, 11, 655, 1585, 5855, 4630, 11435, 5740, 457, 254, 2292, 2547, 185, 207, 440, 248, 65, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 19090, 11435, 5740, 457, 254, 2292, 2547, 185, 207, 440, 537, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 3512, 1923, 457, 254, 2292, 2547, 185, 207, 440, 292, 75, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 3725, 909, 457, 254, 2292, 2547, 185, 207, 440, 1638, 74, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 380, 19722, 457, 254, 2292, 2547, 185, 207, 440, 577, 85, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 13974, 17396, 457, 254, 2292, 2547, 185, 207, 440, 26862, 62, 11507, 1, 5878, 1743, 11, 3462, 1585, 20414, 3931, 2724, 457, 254, 2292, 2547, 185, 207, 440, 462, 82, 62, 11507, 1, 5878, 1743, 11, 2481, 1585, 19090, 3472, 18605, 457, 254, 2292, 2547, 185, 207, 440, 13289, 62, 10646, 62, 11507, 1, 3379, 4463, 18924, 11, 243, 1585, 14751, 14, 10646, 14026, 327, 254, 2292, 2547, 185, 207, 440, 12986, 62, 16647, 62, 11507, 1, 3379, 4463, 18924, 11, 1585, 2325, 278, 980, 3192, 3905, 317, 2315, 334, 16, 405, 7589, 11, 207, 15, 405, 2357, 8, 185, 207, 440, 21810, 62, 2139, 1, 323, 13532, 3137, 1585, 3980, 996, 4314, 409, 1530, 23836, 185, 477, 185, 185, 1156, 62, 16204, 6922, 185, 50, 577, 379, 4577, 13024, 11, 12144, 276, 254, 2612, 2365, 3752, 2612, 62, 304, 13, 185, 13403, 11866, 15787, 5787, 7449, 30862, 440, 1156, 62, 16204, 1, 334, 185, 207, 440, 14641, 62, 304, 1, 323, 13532, 11, 2481, 1585, 1198, 2710, 2612, 21411, 11, 12050, 1975, 3812, 473, 2612, 2365, 185, 207, 440, 275, 6006, 62, 304, 1, 323, 13532, 11, 3137, 1585, 13040, 21411, 185, 207, 440, 21598, 62, 304, 62, 5816, 1, 323, 13532, 11, 436, 1585, 7161, 2547, 21411, 185, 207, 440, 21598, 62, 356, 26321, 335, 62, 5816, 1, 323, 13532, 11, 1585, 7161, 2547, 31593, 335, 185, 207, 440, 21598, 62, 23861, 62, 5816, 1, 323, 13532, 11, 730, 1585, 7161, 2547, 3775, 185, 207, 440, 462, 82, 62, 79, 2994, 62, 5816, 1, 3379, 4463, 18924, 11, 243, 1585, 11119, 82, 279, 254, 7416, 457, 254, 1712, 2547, 185, 207, 440, 462, 82, 62, 17, 425, 62, 358, 645, 62, 5816, 1, 3379, 4463, 18924, 11, 1585, 11419, 5504, 3472, 457, 254, 1712, 2547, 185, 207, 440, 462, 82, 62, 19837, 62, 5816, 1, 3379, 4463, 18924, 11, 730, 1585, 19654, 2963, 3472, 457, 254, 1712, 2547, 185, 207, 440, 17819, 370, 62, 30953, 62, 5816, 1, 3379, 4463, 18924, 5200, 412, 1139, 370, 2012, 457, 254, 1712, 2547, 185, 207, 440, 30953, 62, 22054, 1, 3379, 4463, 18924, 11, 251, 1585, 11988, 280, 2012, 4177, 207, 185, 207, 440, 2969, 62, 83, 1050, 1, 3379, 4463, 18924, 11, 1032, 1585, 11988, 280, 2591, 254, 8129, 438, 16538, 185, 207, 440, 21598, 62, 788, 17396, 62, 5816, 1, 3379, 4463, 18924, 11, 1585, 7161, 2547, 1936, 17396, 185, 207, 440, 11695, 62, 788, 17396, 62, 5816, 1, 3379, 4463, 18924, 11, 1585, 19090, 1936, 17396, 457, 254, 1712, 2547, 185, 207, 440, 21598, 62, 248, 65, 5740, 62, 5816, 1, 3379, 4463, 18924, 11, 1585, 7161, 2547, 11435, 5740, 185, 207, 440, 462, 82, 62, 2959, 62, 577, 62, 5816, 1, 3379, 4463, 18924, 11, 207, 1585, 11119, 82, 838, 1936, 17396, 457, 254, 1712, 2547, 185, 207, 440, 21598, 62, 304, 62, 11507, 1, 323, 13532, 11, 436, 1585, 338, 1406, 2547, 21411, 185, 207, 440, 21598, 62, 356, 26321, 335, 62, 11507, 1, 323, 13532, 11, 207, 1585, 338, 1406, 2547, 31593, 335, 185, 207, 440, 462, 82, 62, 79, 2994, 62, 11507, 1, 3379, 4463, 18924, 11, 243, 1585, 11119, 82, 279, 254, 7416, 457, 254, 2292, 2547, 185, 207, 440, 462, 82, 62, 17, 425, 62, 358, 645, 62, 11507, 1, 3379, 4463, 18924, 11, 1585, 11419, 5504, 3472, 457, 254, 2292, 2547, 185, 207, 440, 462, 82, 62, 19837, 62, 11507, 1, 3379, 4463, 18924, 11, 730, 1585, 19654, 2963, 3472, 457, 254, 2292, 2547, 185, 207, 440, 17819, 370, 62, 30953, 62, 11507, 1, 3379, 4463, 18924, 5200, 412, 1139, 370, 2012, 457, 254, 2292, 2547, 185, 207, 440, 21598, 62, 788, 17396, 62, 11507, 1, 3379, 4463, 18924, 11, 1585, 338, 1406, 2547, 1936, 17396, 185, 207, 440, 11695, 62, 788, 17396, 62, 11507, 1, 3379, 4463, 18924, 11, 1585, 19090, 1936, 17396, 457, 254, 2292, 2547, 185, 207, 440, 21598, 62, 248, 65, 5740, 62, 11507, 1, 3379, 4463, 18924, 11, 1585, 338, 1406, 2547, 11435, 5740, 185, 207, 440, 462, 82, 62, 2959, 62, 577, 62, 11507, 1, 3379, 4463, 18924, 243, 1585, 11119, 82, 838, 1936, 17396, 457, 254, 2292, 2547, 185, 477, 185, 185, 185, 28440, 9715, 9843, 185, 769, 254, 8803, 818, 2664, 4301, 11, 885, 254, 2192, 2547, 4761, 540, 330, 1219, 11, 545, 279, 254, 12780, 340, 970, 931, 254, 2192, 2547, 4761, 409, 254, 31593, 715, 13, 207, 185, 546, 2192, 2547, 4761, 482, 330, 1219, 365, 254, 2612, 2365, 11, 1470, 254, 31593, 715, 1020, 330, 1219, 365, 254, 746, 62, 16204, 2365, 13, 185, 27298, 653, 417, 14843, 457, 254, 939, 3188, 279, 254, 1884, 1517, 25, 185, 185, 3554, 75, 9568, 12499, 705, 91, 1392, 43, 185, 33, 11885, 339, 3467, 959, 91, 33, 2951, 185, 34, 28412, 339, 22281, 4961, 91, 34, 1535, 185, 4843, 24270, 19871, 9017, 91, 45, 5080, 185, 1915, 10595, 21915, 82, 91, 3388, 40, 185, 35, 20315, 6658, 329, 6388, 91, 35, 1743, 185, 23559, 329, 461, 905, 16806, 91, 35, 1732, 185, 25884, 255, 4734, 6370, 25546, 91, 11096, 54, 185, 39, 264, 7664, 10602, 1542, 91, 11317, 52, 185, 43, 378, 14204, 1854, 515, 6474, 91, 43, 2585, 185, 43, 378, 14204, 412, 9961, 91, 43, 1743, 185, 44, 20452, 31410, 91, 44, 7183, 185, 26389, 86, 1766, 25032, 380, 14450, 91, 44, 4470, 185, 7729, 21603, 10389, 696, 86, 313, 1596, 91, 19293, 185, 22120, 541, 18679, 461, 1542, 91, 33, 42, 45, 185, 4843, 4420, 716, 5072, 705, 91, 25399, 42, 185, 3161, 30075, 24247, 91, 1692, 43, 185, 3283, 4659, 12167, 407, 91, 13547, 185, 24374, 19362, 207, 22, 21, 407, 91, 11914, 40, 185, 47, 1389, 23218, 324, 4103, 91, 11914, 55, 185, 9915, 1561, 27258, 380, 1419, 89, 407, 91, 47, 1692, 185, 50, 19524, 28899, 23646, 91, 50, 2585, 185, 23920, 16924, 2566, 2750, 91, 50, 3146, 185, 17917, 25062, 5174, 24022, 91, 9516, 34, 185, 25869, 16466, 432, 1870, 710, 91, 51, 1692, 185, 52, 23667, 565, 10534, 91, 3219, 32, 185, 44, 4522, 262, 452, 368, 4877, 9123, 91, 30695, 185, 54, 7599, 422, 529, 2539, 91, 54, 3146, 185, 7983, 22852, 375, 382, 875, 91, 35, 2421, 185, 8061, 23106, 31538, 1542, 91, 3388, 32, 185, 185, 5995, 2881, 15143, 185, 9138, 2547, 62, 1523, 62, 5816, 285, 2547, 62, 1523, 62, 11507, 276, 4168, 9474, 276, 254, 2612, 2365, 13, 7310, 2547, 62, 356, 26321, 335, 62, 5816, 285, 2547, 62, 356, 26321, 335, 2292, 276, 4168, 9474, 276, 254, 746, 62, 16204, 2365, 13, 185, 185, 1889, 6226, 457, 4314, 11, 931, 4314, 62, 304, 405, 651, 17, 19393, 19393, 6683, 185, 185, 15013, 25, 2147, 748, 13024, 473, 207, 17, 15, 15, 20, 11, 931, 245, 6158, 833, 25, 4314, 62, 304, 405, 651, 17, 17, 15, 15, 20, 6683, 2147, 748, 13024, 473, 207, 16, 24, 22, 17, 11, 931, 245, 6158, 833, 25, 4314, 62, 304, 405, 440, 17, 16, 24, 22, 17, 2770, 2147, 748, 13024, 473, 207, 17, 15, 16, 20, 11, 931, 245, 6158, 833, 25, 4314, 62, 304, 405, 440, 17, 17, 15, 16, 20, 2770, 185, 185, 2269, 18912, 12780, 967, 7688, 10115, 285, 4934, 20976, 29980, 13, 185, 185, 15013, 10481, 10413, 6074, 285, 5975, 547, 3130, 7486, 185, 4397, 25, 185, 1, 2628, 317, 254, 1093, 3472, 254, 10851, 14204, 412, 9961, 463, 2634, 18605, 429, 1712, 1956, 185, 6231, 547, 25, 185, 7507, 21234, 7, 462, 82, 62, 5816, 8, 7432, 2612, 11294, 2547, 62, 1523, 62, 5816, 405, 651, 43, 378, 14204, 412, 9961, 4057, 185, 185, 4397, 25, 185, 1, 15575, 9474, 417, 6288, 279, 254, 1967, 280, 8700, 1956, 185, 6231, 547, 25, 185, 7507, 2192, 62, 1523, 7432, 2547, 11294, 1967, 405, 651, 9517, 351, 8092, 4057, 185, 185, 4397, 25, 185, 1, 15575, 2547, 658, 254, 7495, 1594, 280, 2547, 1936, 17396, 279, 274, 2292, 2612, 1956, 185, 6231, 547, 25, 185, 7507, 2547, 62, 356, 26321, 335, 62, 11507, 7432, 746, 62, 16204, 25554, 9784, 2547, 62, 788, 17396, 62, 11507, 22187, 34, 29731, 207, 16, 26, 185, 185, 4397, 25, 185, 1, 15575, 9474, 773, 16316, 1321, 207, 16, 24, 22, 24, 1956, 185, 6231, 547, 25, 185, 7507, 2192, 62, 1523, 7432, 2547, 11294, 1008, 62, 10246, 271, 8086, 16, 24, 22, 24, 26, 185, 185, 4397, 25, 185, 1, 13000, 254, 13164, 339, 3467, 959, 8402, 1712, 14009, 7037, 279, 254, 207, 17, 15, 15, 23, 4314, 876, 185, 6231, 547, 25, 185, 7507, 21234, 7, 462, 82, 62, 5816, 567, 265, 1267, 62, 11507, 8, 4958, 10919, 62, 7541, 7432, 2612, 11294, 2547, 62, 1523, 62, 5816, 405, 651, 33, 11885, 339, 3467, 959, 6, 5584, 4314, 62, 304, 405, 651, 17, 17, 15, 15, 23, 4057, 185, 185, 7605, 387, 885, 254, 5975, 547, 5151, 3651, 3250, 457, 5975, 547, 25, 285, 637, 746, 2422, 11, 533, 441, 2816, 274, 11543, 280, 254, 5151, 13, 4195, 8297, 274, 5975, 547, 5151, 327, 254, 1884, 2664, 3092, 13, 17858, 25, 185, 2808, 1311, 3212, 3472, 1213, 254, 11738, 21915, 82, 8129, 2310, 254, 207, 16, 24, 24, 21, 4314, 30, 185, 6231, 547, 25, 185, 7507, 20861, 7, 462, 82, 8, 4958, 3212, 62, 12168, 7432, 334, 11789, 265, 1267, 62, 5816, 4958, 265, 1267, 7432, 2612, 11294, 2547, 62, 356, 26321, 335, 62, 5816, 405, 651, 3388, 40, 6, 5584, 4314, 62, 304, 405, 651, 17, 16, 24, 24, 21, 6, 8763, 2738, 14177, 11789, 265, 1267, 62, 11507, 4958, 265, 1267, 7432, 2612, 11294, 2547, 62, 356, 26321, 335, 62, 11507, 405, 651, 3388, 40, 6, 5584, 4314, 62, 304, 405, 651, 17, 16, 24, 24, 21, 6, 4363, 32022], 'attention_mask': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 62, 16204, 25554, 9784, 2547, 62, 788, 17396, 62, 11507, 22187, 34, 29731, 207, 16, 26, 185, 185, 4397, 25, 185, 1, 15575, 9474, 773, 16316, 1321, 207, 16, 24, 22, 24, 1956, 185, 6231, 547, 25, 185, 7507, 2192, 62, 1523, 7432, 2547, 11294, 1008, 62, 10246, 271, 8086, 16, 24, 22, 24, 26, 185, 185, 4397, 25, 185, 1, 13000, 254, 13164, 339, 3467, 959, 8402, 1712, 14009, 7037, 279, 254, 207, 17, 15, 15, 23, 4314, 876, 185, 6231, 547, 25, 185, 7507, 21234, 7, 462, 82, 62, 5816, 567, 265, 1267, 62, 11507, 8, 4958, 10919, 62, 7541, 7432, 2612, 11294, 2547, 62, 1523, 62, 5816, 405, 651, 33, 11885, 339, 3467, 959, 6, 5584, 4314, 62, 304, 405, 651, 17, 17, 15, 15, 23, 4057, 185, 185, 7605, 387, 885, 254, 5975, 547, 5151, 3651, 3250, 457, 5975, 547, 25, 285, 637, 746, 2422, 11, 533, 441, 2816, 274, 11543, 280, 254, 5151, 13, 4195, 8297, 274, 5975, 547, 5151, 327, 254, 1884, 2664, 3092, 13, 17858, 25, 185, 2808, 1311, 3212, 3472, 1213, 254, 11738, 21915, 82, 8129, 2310, 254, 207, 16, 24, 24, 21, 4314, 30, 185, 6231, 547, 25, 185, 7507, 20861, 7, 462, 82, 8, 4958, 3212, 62, 12168, 7432, 334, 11789, 265, 1267, 62, 5816, 4958, 265, 1267, 7432, 2612, 11294, 2547, 62, 356, 26321, 335, 62, 5816, 405, 651, 3388, 40, 6, 5584, 4314, 62, 304, 405, 651, 17, 16, 24, 24, 21, 6, 8763, 2738, 14177, 11789, 265, 1267, 62, 11507, 4958, 265, 1267, 7432, 2612, 11294, 2547, 62, 356, 26321, 335, 62, 11507, 405, 651, 3388, 40, 6, 5584, 4314, 62, 304, 405, 651, 17, 16, 24, 24, 21, 6, 4363, 32022]}\n"
         | 
| 287 | 
             
                 ]
         | 
| 288 | 
             
                },
         | 
| 289 | 
             
                {
         | 
|  | |
| 347 | 
             
                "print(len(tokenizer)) \n",
         | 
| 348 | 
             
                "\n",
         | 
| 349 | 
             
                "tokenizer.truncation_side = \"left\"\n",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 350 | 
             
                "\n",
         | 
| 351 | 
            +
                "def format_deepseek_chat(example, tokenizer, special_token=\"<|endofsql|>\"):\n",
         | 
| 352 | 
            +
                "    # Manually build the prompt as one flat string\n",
         | 
| 353 | 
            +
                "    prompt = f\"{input_prompt}{example['natural_query']}\\n\"\n",
         | 
| 354 | 
            +
                "    completion = f\"SQLite:\\n{example['sql_query']}{special_token}\"\n",
         | 
| 355 | 
             
                "\n",
         | 
| 356 | 
            +
                "    full_text = prompt + completion\n",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 357 | 
             
                "    tokenized = tokenizer(\n",
         | 
| 358 | 
            +
                "        full_text,\n",
         | 
|  | |
| 359 | 
             
                "        truncation=True,\n",
         | 
| 360 | 
            +
                "        padding=\"max_length\",\n",
         | 
| 361 | 
            +
                "        max_length=3156,  # or whatever your model can handle\n",
         | 
| 362 | 
             
                "    )\n",
         | 
| 363 | 
             
                "\n",
         | 
| 364 | 
            +
                "    # Mask out prompt tokens in the labels\n",
         | 
| 365 | 
            +
                "    prompt_len = len(tokenizer(prompt, truncation=True)[\"input_ids\"])\n",
         | 
| 366 | 
            +
                "    labels = tokenized[\"input_ids\"][:]\n",
         | 
| 367 | 
            +
                "    labels[:prompt_len] = [-100] * prompt_len\n",
         | 
| 368 | 
            +
                "    tokenized[\"labels\"] = labels\n",
         | 
| 369 | 
            +
                "\n",
         | 
| 370 | 
             
                "    return tokenized\n",
         | 
|  | |
|  | |
|  | |
| 371 | 
             
                "\n",
         | 
| 372 | 
            +
                "# Build dataset dict\n",
         | 
| 373 | 
            +
                "dataset_dict = {\n",
         | 
| 374 | 
            +
                "    \"natural_query\": df[\"natural_query\"].tolist(),\n",
         | 
| 375 | 
            +
                "    \"sql_query\": df[\"sql_query\"].tolist(),\n",
         | 
| 376 | 
            +
                "}\n",
         | 
| 377 | 
            +
                "\n",
         | 
| 378 | 
            +
                "# Create HuggingFace Dataset\n",
         | 
| 379 | 
            +
                "dataset = Dataset.from_dict(dataset_dict)\n",
         | 
| 380 | 
            +
                "\n",
         | 
| 381 | 
            +
                "# Apply formatting\n",
         | 
| 382 | 
            +
                "tokenized_dataset = dataset.map(\n",
         | 
| 383 | 
            +
                "    lambda x: format_deepseek_chat(x, tokenizer),\n",
         | 
| 384 | 
            +
                "    remove_columns=[\"natural_query\", \"sql_query\"]\n",
         | 
| 385 | 
            +
                ")\n",
         | 
| 386 | 
             
                "\n",
         | 
| 387 | 
             
                "# Split into train/validation\n",
         | 
| 388 | 
             
                "split = int(0.9 * len(tokenized_dataset))  # 90% train, 10% validation\n",
         | 
|  | |
| 392 | 
             
                "print(len(train_dataset))\n",
         | 
| 393 | 
             
                "print(len(val_dataset))\n",
         | 
| 394 | 
             
                "\n",
         | 
| 395 | 
            +
                "for v in val_dataset:\n",
         | 
| 396 | 
             
                "    print(v)\n",
         | 
| 397 | 
             
                "    break"
         | 
| 398 | 
             
               ]
         | 
|  | |
| 406 | 
             
              },
         | 
| 407 | 
             
              {
         | 
| 408 | 
             
               "cell_type": "code",
         | 
| 409 | 
            +
               "execution_count": 3,
         | 
| 410 | 
             
               "metadata": {},
         | 
| 411 | 
             
               "outputs": [
         | 
| 412 | 
             
                {
         | 
|  | |
| 451 | 
             
              },
         | 
| 452 | 
             
              {
         | 
| 453 | 
             
               "cell_type": "code",
         | 
| 454 | 
            +
               "execution_count": 4,
         | 
| 455 | 
             
               "metadata": {},
         | 
| 456 | 
             
               "outputs": [
         | 
| 457 | 
             
                {
         | 
|  | |
| 460 | 
             
                 "text": [
         | 
| 461 | 
             
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
         | 
| 462 | 
             
                  "  warnings.warn(\n",
         | 
| 463 | 
            +
                  "C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_21244\\719275035.py:21: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
         | 
| 464 | 
             
                  "  trainer = Trainer(\n",
         | 
| 465 | 
             
                  "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
         | 
| 466 | 
             
                 ]
         | 
|  | |
| 468 | 
             
               ],
         | 
| 469 | 
             
               "source": [
         | 
| 470 | 
             
                "training_args = TrainingArguments(\n",
         | 
| 471 | 
            +
                "    output_dir=\"./fine-tuned-model-8-diff\",\n",
         | 
| 472 | 
             
                "    evaluation_strategy=\"epoch\",  # Evaluate at the end of each epoch\n",
         | 
| 473 | 
             
                "    save_strategy=\"epoch\",  # Save model every epoch\n",
         | 
| 474 | 
             
                "    per_device_train_batch_size=1,  # LoRA allows higher batch size\n",
         | 
| 475 | 
             
                "    per_device_eval_batch_size=1,\n",
         | 
| 476 | 
             
                "    gradient_accumulation_steps=16,\n",
         | 
| 477 | 
            +
                "    num_train_epochs=5,  # Increase if needed\n",
         | 
| 478 | 
             
                "    learning_rate=4e-5,  # Higher LR since we're only training LoRA layers\n",
         | 
| 479 | 
             
                "    weight_decay=0.01,\n",
         | 
| 480 | 
             
                "    logging_steps=50,  # Print loss every 50 steps\n",
         | 
|  | |
| 506 | 
             
              },
         | 
| 507 | 
             
              {
         | 
| 508 | 
             
               "cell_type": "code",
         | 
| 509 | 
            +
               "execution_count": 5,
         | 
| 510 | 
             
               "metadata": {},
         | 
| 511 | 
             
               "outputs": [
         | 
| 512 | 
             
                {
         | 
| 513 | 
             
                 "name": "stderr",
         | 
| 514 | 
             
                 "output_type": "stream",
         | 
| 515 | 
             
                 "text": [
         | 
| 516 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\integrations\\sdpa_attention.py:54: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
         | 
| 517 | 
            +
                  "  attn_output = torch.nn.functional.scaled_dot_product_attention(\n",
         | 
| 518 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 519 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 520 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
         | 
| 521 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
         | 
| 522 | 
            +
                 ]
         | 
| 523 | 
            +
                },
         | 
| 524 | 
            +
                {
         | 
| 525 | 
            +
                 "data": {
         | 
| 526 | 
            +
                  "text/html": [
         | 
| 527 | 
            +
                   "\n",
         | 
| 528 | 
            +
                   "    <div>\n",
         | 
| 529 | 
            +
                   "      \n",
         | 
| 530 | 
            +
                   "      <progress value='290' max='290' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
         | 
| 531 | 
            +
                   "      [290/290 22:37:10, Epoch 4/5]\n",
         | 
| 532 | 
            +
                   "    </div>\n",
         | 
| 533 | 
            +
                   "    <table border=\"1\" class=\"dataframe\">\n",
         | 
| 534 | 
            +
                   "  <thead>\n",
         | 
| 535 | 
            +
                   " <tr style=\"text-align: left;\">\n",
         | 
| 536 | 
            +
                   "      <th>Epoch</th>\n",
         | 
| 537 | 
            +
                   "      <th>Training Loss</th>\n",
         | 
| 538 | 
            +
                   "      <th>Validation Loss</th>\n",
         | 
| 539 | 
            +
                   "    </tr>\n",
         | 
| 540 | 
            +
                   "  </thead>\n",
         | 
| 541 | 
            +
                   "  <tbody>\n",
         | 
| 542 | 
            +
                   "    <tr>\n",
         | 
| 543 | 
            +
                   "      <td>1</td>\n",
         | 
| 544 | 
            +
                   "      <td>0.760600</td>\n",
         | 
| 545 | 
            +
                   "      <td>0.240836</td>\n",
         | 
| 546 | 
            +
                   "    </tr>\n",
         | 
| 547 | 
            +
                   "    <tr>\n",
         | 
| 548 | 
            +
                   "      <td>2</td>\n",
         | 
| 549 | 
            +
                   "      <td>0.231600</td>\n",
         | 
| 550 | 
            +
                   "      <td>0.168676</td>\n",
         | 
| 551 | 
            +
                   "    </tr>\n",
         | 
| 552 | 
            +
                   "    <tr>\n",
         | 
| 553 | 
            +
                   "      <td>3</td>\n",
         | 
| 554 | 
            +
                   "      <td>0.169500</td>\n",
         | 
| 555 | 
            +
                   "      <td>0.160126</td>\n",
         | 
| 556 | 
            +
                   "    </tr>\n",
         | 
| 557 | 
            +
                   "    <tr>\n",
         | 
| 558 | 
            +
                   "      <td>4</td>\n",
         | 
| 559 | 
            +
                   "      <td>0.147100</td>\n",
         | 
| 560 | 
            +
                   "      <td>0.157271</td>\n",
         | 
| 561 | 
            +
                   "    </tr>\n",
         | 
| 562 | 
            +
                   "  </tbody>\n",
         | 
| 563 | 
            +
                   "</table><p>"
         | 
| 564 | 
            +
                  ],
         | 
| 565 | 
            +
                  "text/plain": [
         | 
| 566 | 
            +
                   "<IPython.core.display.HTML object>"
         | 
| 567 | 
            +
                  ]
         | 
| 568 | 
            +
                 },
         | 
| 569 | 
            +
                 "metadata": {},
         | 
| 570 | 
            +
                 "output_type": "display_data"
         | 
| 571 | 
            +
                },
         | 
| 572 | 
            +
                {
         | 
| 573 | 
            +
                 "name": "stderr",
         | 
| 574 | 
            +
                 "output_type": "stream",
         | 
| 575 | 
            +
                 "text": [
         | 
| 576 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\utils\\save_and_load.py:250: UserWarning: Setting `save_embedding_layers` to `True` as the embedding layer has been resized during finetuning.\n",
         | 
| 577 | 
            +
                  "  warnings.warn(\n",
         | 
| 578 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 579 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 580 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
         | 
| 581 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 582 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\utils\\save_and_load.py:250: UserWarning: Setting `save_embedding_layers` to `True` as the embedding layer has been resized during finetuning.\n",
         | 
| 583 | 
            +
                  "  warnings.warn(\n",
         | 
| 584 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 585 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 586 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
         | 
| 587 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 588 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\utils\\save_and_load.py:250: UserWarning: Setting `save_embedding_layers` to `True` as the embedding layer has been resized during finetuning.\n",
         | 
| 589 | 
            +
                  "  warnings.warn(\n",
         | 
| 590 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 591 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 592 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
         | 
| 593 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 594 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\utils\\save_and_load.py:250: UserWarning: Setting `save_embedding_layers` to `True` as the embedding layer has been resized during finetuning.\n",
         | 
| 595 | 
            +
                  "  warnings.warn(\n",
         | 
| 596 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 597 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 598 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
         | 
| 599 | 
            +
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
         | 
| 600 | 
            +
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\utils\\save_and_load.py:250: UserWarning: Setting `save_embedding_layers` to `True` as the embedding layer has been resized during finetuning.\n",
         | 
| 601 | 
            +
                  "  warnings.warn(\n",
         | 
| 602 | 
             
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\peft\\tuners\\lora\\bnb.py:85: UserWarning: Merge lora module to 8-bit linear may get different generations due to rounding errors.\n",
         | 
| 603 | 
             
                  "  warnings.warn(\n"
         | 
| 604 | 
             
                 ]
         | 
|  | |
| 606 | 
             
                {
         | 
| 607 | 
             
                 "data": {
         | 
| 608 | 
             
                  "text/plain": [
         | 
| 609 | 
            +
                   "('./fine-tuned-model-8-diff\\\\tokenizer_config.json',\n",
         | 
| 610 | 
            +
                   " './fine-tuned-model-8-diff\\\\special_tokens_map.json',\n",
         | 
| 611 | 
            +
                   " './fine-tuned-model-8-diff\\\\tokenizer.json')"
         | 
| 612 | 
             
                  ]
         | 
| 613 | 
             
                 },
         | 
| 614 | 
            +
                 "execution_count": 5,
         | 
| 615 | 
             
                 "metadata": {},
         | 
| 616 | 
             
                 "output_type": "execute_result"
         | 
| 617 | 
             
                }
         | 
| 618 | 
             
               ],
         | 
| 619 | 
             
               "source": [
         | 
| 620 | 
             
                "# Run training\n",
         | 
| 621 | 
            +
                "trainer.train()\n",
         | 
| 622 | 
             
                "\n",
         | 
| 623 | 
             
                "# Merge LoRA adapters with the base model before saving\n",
         | 
| 624 | 
             
                "model = model.merge_and_unload()\n",
         | 
| 625 | 
            +
                "model.save_pretrained(\"./fine-tuned-model-8-diff\")\n",
         | 
| 626 | 
            +
                "tokenizer.save_pretrained(\"./fine-tuned-model-8-diff\")"
         | 
| 627 | 
             
               ]
         | 
| 628 | 
             
              },
         | 
| 629 | 
             
              {
         | 
|  | |
| 635 | 
             
              },
         | 
| 636 | 
             
              {
         | 
| 637 | 
             
               "cell_type": "code",
         | 
| 638 | 
            +
               "execution_count": 6,
         | 
| 639 | 
             
               "metadata": {},
         | 
| 640 | 
             
               "outputs": [
         | 
| 641 | 
             
                {
         | 
| 642 | 
             
                 "name": "stderr",
         | 
| 643 | 
             
                 "output_type": "stream",
         | 
| 644 | 
             
                 "text": [
         | 
| 645 | 
            +
                  "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
         | 
| 646 | 
            +
                  "Setting `pad_token_id` to `eos_token_id`:32022 for open-end generation.\n",
         | 
| 647 | 
             
                  "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
         | 
| 648 | 
             
                  "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\bitsandbytes\\autograd\\_functions.py:315: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\n",
         | 
| 649 | 
             
                  "  warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n"
         | 
|  | |
| 660 | 
             
                  "\n",
         | 
| 661 | 
             
                  "Explanation: The AVG() function is used to calculate the average of a set of values. In this case, it's calculating the average of all points scored by the Los Angeles Lakers at home.\n",
         | 
| 662 | 
             
                  "\n",
         | 
| 663 | 
            +
                  "Note: The query assumes that the pts_home column in the game table represents the total points scored by the home team. If the column name is different, you'll need to adjust the query accordingly.\n",
         | 
| 664 | 
             
                  "\n",
         | 
| 665 | 
            +
                  "Also, this query does not take into account the season_id filter, which is a requirement in the original question. If you want to include the season filter, you'll need to adjust the query accordingly.\n",
         | 
|  | |
| 666 | 
             
                  "\n",
         | 
| 667 | 
            +
                  "For example, if you want to find the average points scored by the Los Angeles Lakers in the 2008 season, you would use:\n",
         | 
|  | |
|  | |
| 668 | 
             
                  "\n",
         | 
| 669 | 
            +
                  "SQLite:\n",
         | 
| 670 | 
            +
                  "SELECT AVG(pts_home) FROM game WHERE team_name_home = 'Los Angeles Lakers' AND season_\n"
         | 
| 671 | 
             
                 ]
         | 
| 672 | 
             
                }
         | 
| 673 | 
             
               ],
         | 
| 674 | 
             
               "source": [
         | 
| 675 | 
            +
                "model = AutoModelForCausalLM.from_pretrained(\"./fine-tuned-model-8-diff\", torch_dtype=torch.bfloat16, device_map=device)\n",
         | 
| 676 | 
            +
                "tokenizer = AutoTokenizer.from_pretrained(\"./fine-tuned-model-8-diff\")\n",
         | 
| 677 | 
             
                "\n",
         | 
| 678 | 
             
                "# Prepare query with the same prompt\n",
         | 
| 679 | 
             
                "input_text = \"How many points to the Los Angeles Lakers average at home?\"\n",
         | 
|  | |
| 690 | 
             
                "\n",
         | 
| 691 | 
             
                "print(\"Generated SQL:\", query_output)"
         | 
| 692 | 
             
               ]
         | 
| 693 | 
            +
              },
         | 
| 694 | 
            +
              {
         | 
| 695 | 
            +
               "cell_type": "markdown",
         | 
| 696 | 
            +
               "metadata": {},
         | 
| 697 | 
            +
               "source": [
         | 
| 698 | 
            +
                "## Save validation and test set to disk"
         | 
| 699 | 
            +
               ]
         | 
| 700 | 
            +
              },
         | 
| 701 | 
            +
              {
         | 
| 702 | 
            +
               "cell_type": "code",
         | 
| 703 | 
            +
               "execution_count": 7,
         | 
| 704 | 
            +
               "metadata": {},
         | 
| 705 | 
            +
               "outputs": [
         | 
| 706 | 
            +
                {
         | 
| 707 | 
            +
                 "name": "stderr",
         | 
| 708 | 
            +
                 "output_type": "stream",
         | 
| 709 | 
            +
                 "text": [
         | 
| 710 | 
            +
                  "Saving the dataset (1/1 shards): 100%|██████████| 939/939 [00:00<00:00, 18233.32 examples/s]\n",
         | 
| 711 | 
            +
                  "Saving the dataset (1/1 shards): 100%|██████████| 105/105 [00:00<00:00, 11667.82 examples/s]\n"
         | 
| 712 | 
            +
                 ]
         | 
| 713 | 
            +
                }
         | 
| 714 | 
            +
               ],
         | 
| 715 | 
            +
               "source": [
         | 
| 716 | 
            +
                "train_dataset.save_to_disk(\"train.hf\")\n",
         | 
| 717 | 
            +
                "val_dataset.save_to_disk(\"val.hf\")"
         | 
| 718 | 
            +
               ]
         | 
| 719 | 
             
              }
         | 
| 720 | 
             
             ],
         | 
| 721 | 
             
             "metadata": {
         | 
    	
        train.hf/data-00000-of-00001.arrow
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9dbea7350b9ce541dbc1abb69cc6383d1990f425dbf0e0a6300107f0f395dad0
         | 
| 3 | 
            +
            size 38537696
         | 
    	
        train.hf/dataset_info.json
    ADDED
    
    | @@ -0,0 +1,29 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "citation": "",
         | 
| 3 | 
            +
              "description": "",
         | 
| 4 | 
            +
              "features": {
         | 
| 5 | 
            +
                "input_ids": {
         | 
| 6 | 
            +
                  "feature": {
         | 
| 7 | 
            +
                    "dtype": "int32",
         | 
| 8 | 
            +
                    "_type": "Value"
         | 
| 9 | 
            +
                  },
         | 
| 10 | 
            +
                  "_type": "Sequence"
         | 
| 11 | 
            +
                },
         | 
| 12 | 
            +
                "attention_mask": {
         | 
| 13 | 
            +
                  "feature": {
         | 
| 14 | 
            +
                    "dtype": "int8",
         | 
| 15 | 
            +
                    "_type": "Value"
         | 
| 16 | 
            +
                  },
         | 
| 17 | 
            +
                  "_type": "Sequence"
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                "labels": {
         | 
| 20 | 
            +
                  "feature": {
         | 
| 21 | 
            +
                    "dtype": "int64",
         | 
| 22 | 
            +
                    "_type": "Value"
         | 
| 23 | 
            +
                  },
         | 
| 24 | 
            +
                  "_type": "Sequence"
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
              },
         | 
| 27 | 
            +
              "homepage": "",
         | 
| 28 | 
            +
              "license": ""
         | 
| 29 | 
            +
            }
         | 
    	
        train.hf/state.json
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_data_files": [
         | 
| 3 | 
            +
                {
         | 
| 4 | 
            +
                  "filename": "data-00000-of-00001.arrow"
         | 
| 5 | 
            +
                }
         | 
| 6 | 
            +
              ],
         | 
| 7 | 
            +
              "_fingerprint": "3046619c9c688db0",
         | 
| 8 | 
            +
              "_format_columns": null,
         | 
| 9 | 
            +
              "_format_kwargs": {},
         | 
| 10 | 
            +
              "_format_type": null,
         | 
| 11 | 
            +
              "_output_all_columns": false,
         | 
| 12 | 
            +
              "_split": null
         | 
| 13 | 
            +
            }
         | 
    	
        val.hf/data-00000-of-00001.arrow
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6ad8997fa6cd065bb65982a1aa7004a7decbb8a6bfeb85aae7aeadd49c292629
         | 
| 3 | 
            +
            size 4310336
         | 
    	
        val.hf/dataset_info.json
    ADDED
    
    | @@ -0,0 +1,29 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "citation": "",
         | 
| 3 | 
            +
              "description": "",
         | 
| 4 | 
            +
              "features": {
         | 
| 5 | 
            +
                "input_ids": {
         | 
| 6 | 
            +
                  "feature": {
         | 
| 7 | 
            +
                    "dtype": "int32",
         | 
| 8 | 
            +
                    "_type": "Value"
         | 
| 9 | 
            +
                  },
         | 
| 10 | 
            +
                  "_type": "Sequence"
         | 
| 11 | 
            +
                },
         | 
| 12 | 
            +
                "attention_mask": {
         | 
| 13 | 
            +
                  "feature": {
         | 
| 14 | 
            +
                    "dtype": "int8",
         | 
| 15 | 
            +
                    "_type": "Value"
         | 
| 16 | 
            +
                  },
         | 
| 17 | 
            +
                  "_type": "Sequence"
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                "labels": {
         | 
| 20 | 
            +
                  "feature": {
         | 
| 21 | 
            +
                    "dtype": "int64",
         | 
| 22 | 
            +
                    "_type": "Value"
         | 
| 23 | 
            +
                  },
         | 
| 24 | 
            +
                  "_type": "Sequence"
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
              },
         | 
| 27 | 
            +
              "homepage": "",
         | 
| 28 | 
            +
              "license": ""
         | 
| 29 | 
            +
            }
         | 
    	
        val.hf/state.json
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_data_files": [
         | 
| 3 | 
            +
                {
         | 
| 4 | 
            +
                  "filename": "data-00000-of-00001.arrow"
         | 
| 5 | 
            +
                }
         | 
| 6 | 
            +
              ],
         | 
| 7 | 
            +
              "_fingerprint": "a61ff11400c67304",
         | 
| 8 | 
            +
              "_format_columns": null,
         | 
| 9 | 
            +
              "_format_kwargs": {},
         | 
| 10 | 
            +
              "_format_type": null,
         | 
| 11 | 
            +
              "_output_all_columns": false,
         | 
| 12 | 
            +
              "_split": null
         | 
| 13 | 
            +
            }
         | 

