p1gm1 commited on
Commit
327c462
·
1 Parent(s): 57667ec

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
- license: apache-2.0
3
- base_model: HURIDOCS/mt5-small-spanish-es
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,14 +14,14 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # summary_naty_model
17
 
18
- This model is a fine-tuned version of [HURIDOCS/mt5-small-spanish-es](https://huggingface.co/HURIDOCS/mt5-small-spanish-es) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 2.0827
21
- - Rouge1: 0.2147
22
- - Rouge2: 0.0844
23
- - Rougel: 0.1768
24
- - Rougelsum: 0.1768
25
- - Gen Len: 19.0
26
 
27
  ## Model description
28
 
@@ -42,8 +41,8 @@ More information needed
42
 
43
  The following hyperparameters were used during training:
44
  - learning_rate: 2e-05
45
- - train_batch_size: 8
46
- - eval_batch_size: 8
47
  - seed: 42
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
@@ -54,17 +53,17 @@ The following hyperparameters were used during training:
54
 
55
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
56
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
57
- | No log | 1.0 | 64 | 2.7216 | 0.1506 | 0.0456 | 0.1219 | 0.1218 | 18.0703 |
58
- | No log | 2.0 | 128 | 2.4531 | 0.1719 | 0.0615 | 0.1433 | 0.1431 | 18.6719 |
59
- | No log | 3.0 | 192 | 2.3374 | 0.1871 | 0.0657 | 0.1508 | 0.151 | 18.8906 |
60
- | No log | 4.0 | 256 | 2.2510 | 0.1995 | 0.0702 | 0.1598 | 0.16 | 19.0 |
61
- | No log | 5.0 | 320 | 2.1939 | 0.2033 | 0.0739 | 0.1639 | 0.1639 | 19.0 |
62
- | No log | 6.0 | 384 | 2.1539 | 0.2063 | 0.0778 | 0.1679 | 0.1678 | 19.0 |
63
- | No log | 7.0 | 448 | 2.1271 | 0.2117 | 0.0823 | 0.1733 | 0.1735 | 19.0 |
64
- | 3.0108 | 8.0 | 512 | 2.1053 | 0.2113 | 0.083 | 0.1744 | 0.1746 | 19.0 |
65
- | 3.0108 | 9.0 | 576 | 2.0935 | 0.2144 | 0.0836 | 0.1761 | 0.176 | 19.0 |
66
- | 3.0108 | 10.0 | 640 | 2.0855 | 0.2156 | 0.0859 | 0.1774 | 0.1774 | 19.0 |
67
- | 3.0108 | 11.0 | 704 | 2.0827 | 0.2147 | 0.0844 | 0.1768 | 0.1768 | 19.0 |
68
 
69
 
70
  ### Framework versions
 
1
  ---
2
+ base_model: mrm8488/bart-legal-base-es
 
3
  tags:
4
  - generated_from_trainer
5
  metrics:
 
14
 
15
  # summary_naty_model
16
 
17
+ This model is a fine-tuned version of [mrm8488/bart-legal-base-es](https://huggingface.co/mrm8488/bart-legal-base-es) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 2.7235
20
+ - Rouge1: 0.2139
21
+ - Rouge2: 0.1064
22
+ - Rougel: 0.1798
23
+ - Rougelsum: 0.1802
24
+ - Gen Len: 20.0
25
 
26
  ## Model description
27
 
 
41
 
42
  The following hyperparameters were used during training:
43
  - learning_rate: 2e-05
44
+ - train_batch_size: 20
45
+ - eval_batch_size: 14
46
  - seed: 42
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: linear
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
56
+ | No log | 1.0 | 26 | 3.1779 | 0.2079 | 0.0817 | 0.1695 | 0.1697 | 20.0 |
57
+ | No log | 2.0 | 52 | 3.0521 | 0.2127 | 0.0875 | 0.1722 | 0.1725 | 20.0 |
58
+ | No log | 3.0 | 78 | 2.9548 | 0.2176 | 0.094 | 0.1742 | 0.1746 | 20.0 |
59
+ | No log | 4.0 | 104 | 2.8885 | 0.2191 | 0.0987 | 0.1761 | 0.1768 | 20.0 |
60
+ | No log | 5.0 | 130 | 2.8342 | 0.2176 | 0.1021 | 0.1767 | 0.177 | 20.0 |
61
+ | No log | 6.0 | 156 | 2.8070 | 0.2175 | 0.1042 | 0.1773 | 0.1776 | 20.0 |
62
+ | No log | 7.0 | 182 | 2.7686 | 0.2157 | 0.1044 | 0.1776 | 0.1781 | 20.0 |
63
+ | No log | 8.0 | 208 | 2.7491 | 0.2154 | 0.1038 | 0.1779 | 0.178 | 20.0 |
64
+ | No log | 9.0 | 234 | 2.7387 | 0.2133 | 0.1059 | 0.1771 | 0.1778 | 20.0 |
65
+ | No log | 10.0 | 260 | 2.7247 | 0.2131 | 0.1039 | 0.1781 | 0.1783 | 20.0 |
66
+ | No log | 11.0 | 286 | 2.7235 | 0.2139 | 0.1064 | 0.1798 | 0.1802 | 20.0 |
67
 
68
 
69
  ### Framework versions
config.json CHANGED
@@ -1,32 +1,75 @@
1
  {
2
- "_name_or_path": "HURIDOCS/mt5-small-spanish-es",
 
 
 
 
3
  "architectures": [
4
- "MT5ForConditionalGeneration"
5
  ],
 
 
 
6
  "classifier_dropout": 0.0,
7
- "d_ff": 1024,
8
- "d_kv": 64,
9
- "d_model": 512,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "gelu_new",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
- "initializer_factor": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "mt5",
20
- "num_decoder_layers": 8,
21
- "num_heads": 6,
22
- "num_layers": 8,
23
- "pad_token_id": 0,
24
- "relative_attention_max_distance": 128,
25
- "relative_attention_num_buckets": 32,
26
- "tie_word_embeddings": false,
27
- "tokenizer_class": "T5Tokenizer",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "torch_dtype": "float32",
29
  "transformers_version": "4.35.2",
30
  "use_cache": true,
31
- "vocab_size": 250100
32
  }
 
1
  {
2
+ "_name_or_path": "mrm8488/bart-legal-base-es",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
  "architectures": [
8
+ "BartForConditionalGeneration"
9
  ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.1,
13
  "classifier_dropout": 0.0,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 12,
23
+ "encoder_ffn_dim": 3072,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 0,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
  "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_position_embeddings": 1024,
43
+ "model_type": "bart",
44
+ "no_repeat_ngram_size": 3,
45
+ "normalize_before": false,
46
+ "normalize_embedding": true,
47
+ "num_beams": 4,
48
+ "num_hidden_layers": 6,
49
+ "pad_token_id": 1,
50
+ "scale_embedding": false,
51
+ "task_specific_params": {
52
+ "summarization": {
53
+ "length_penalty": 1.0,
54
+ "max_length": 128,
55
+ "min_length": 12,
56
+ "num_beams": 4
57
+ },
58
+ "summarization_cnn": {
59
+ "length_penalty": 2.0,
60
+ "max_length": 142,
61
+ "min_length": 56,
62
+ "num_beams": 4
63
+ },
64
+ "summarization_xsum": {
65
+ "length_penalty": 1.0,
66
+ "max_length": 62,
67
+ "min_length": 11,
68
+ "num_beams": 6
69
+ }
70
+ },
71
  "torch_dtype": "float32",
72
  "transformers_version": "4.35.2",
73
  "use_cache": true,
74
+ "vocab_size": 52000
75
  }
generation_config.json CHANGED
@@ -1,6 +1,12 @@
1
  {
2
- "decoder_start_token_id": 0,
3
- "eos_token_id": 1,
4
- "pad_token_id": 0,
 
 
 
 
 
 
5
  "transformers_version": "4.35.2"
6
  }
 
1
  {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "early_stopping": true,
5
+ "eos_token_id": 2,
6
+ "forced_bos_token_id": 0,
7
+ "forced_eos_token_id": 2,
8
+ "no_repeat_ngram_size": 3,
9
+ "num_beams": 4,
10
+ "pad_token_id": 1,
11
  "transformers_version": "4.35.2"
12
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4644603cba36587959d802f99780c40b50aa68ebbd16342fbfa97878588fbc
3
- size 1200680360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c822a515a1657236bfeca127232f4cbfe48981877cc098dfaa24e789816d85c
3
+ size 563249480
runs/Nov17_17-16-17_46bbe7f1dda0/events.out.tfevents.1700241378.46bbe7f1dda0.22459.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ae80f643ac5bbcc965f6a6e6679c753e1a0af6c5ff5d0dfdba4814af549638
3
+ size 5470
runs/Nov17_17-16-50_46bbe7f1dda0/events.out.tfevents.1700241411.46bbe7f1dda0.22459.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bd7b4ceb871e6312bf052e940908165a57e48c8ad517089fc96358683bb823
3
+ size 5472
runs/Nov17_17-17-08_46bbe7f1dda0/events.out.tfevents.1700241429.46bbe7f1dda0.22459.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adeaff1647dc01795788aecd079a726d454f8c4853ec6d8ac3666d8ebe837127
3
+ size 5472
runs/Nov17_17-17-32_46bbe7f1dda0/events.out.tfevents.1700241453.46bbe7f1dda0.22459.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4fe3d4cc8aa9ddb453bb37c87a381a5388c2a35e2a206c4bfb81dce6ff6cb72
3
+ size 11561
special_tokens_map.json CHANGED
@@ -1,5 +1,15 @@
1
  {
 
 
2
  "eos_token": "</s>",
 
 
 
 
 
 
 
3
  "pad_token": "<pad>",
 
4
  "unk_token": "<unk>"
5
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
  "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
  "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
  "unk_token": "<unk>"
15
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c00dd03b7b29fa0ca79bd6b2ac2a9575b3175486939f4c3429a27812e2830bbb
3
- size 16315311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76f7e898026f34191baf39e6baedefc81ea6756a7f6cb1ae4cecab7187cbab3
3
+ size 2331241
tokenizer_config.json CHANGED
@@ -1,7 +1,8 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "<pad>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +10,7 @@
9
  "special": true
10
  },
11
  "1": {
12
- "content": "</s>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,21 +18,40 @@
17
  "special": true
18
  },
19
  "2": {
 
 
 
 
 
 
 
 
20
  "content": "<unk>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
 
 
 
 
 
 
 
26
  }
27
  },
28
- "additional_special_tokens": [],
29
  "clean_up_tokenization_spaces": true,
 
30
  "eos_token": "</s>",
31
- "extra_ids": 0,
 
32
  "model_max_length": 1000000000000000019884624838656,
33
  "pad_token": "<pad>",
34
- "sp_model_kwargs": {},
35
- "tokenizer_class": "T5Tokenizer",
 
36
  "unk_token": "<unk>"
37
  }
 
1
  {
2
+ "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
+ "content": "<s>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "<pad>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
  "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
  }
44
  },
45
+ "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
  "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
  "model_max_length": 1000000000000000019884624838656,
52
  "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "BartTokenizer",
55
+ "trim_offsets": true,
56
  "unk_token": "<unk>"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c900bd072af3e4d868e8275ec571f98dcbdb20138c7185c62bc1541f0a2d91d6
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc63fb70f0ef3eae3ec1364c9d40638f984658c9032972b22ce538b4b3e0c41
3
  size 4728