End of training

Browse files

Files changed (8) hide show

README.md +67 -0
config.json +220 -0
generation_config.json +9 -0
pytorch_model.bin +3 -0
special_tokens_map.json +52 -0
spiece.model +3 -0
tokenizer_config.json +13 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,67 @@

+---
+base_model: csebuetnlp/mT5_m2m_crossSum
+tags:
+- generated_from_trainer
+metrics:
+- rouge
+model-index:
+- name: en_bn_summarize_v7
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# en_bn_summarize_v7
+This model is a fine-tuned version of [csebuetnlp/mT5_m2m_crossSum](https://huggingface.co/csebuetnlp/mT5_m2m_crossSum) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: nan
+- Rouge1: 0.0
+- Rouge2: 0.0
+- Rougel: 0.0
+- Rougelsum: 0.0
+- Gen Len: 28.882
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 5000
+- num_epochs: 2
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
+|:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
+| No log        | 1.0   | 154  | nan             | 0.0    | 0.0    | 0.0    | 0.0       | 28.882  |
+| No log        | 2.0   | 308  | nan             | 0.0    | 0.0    | 0.0    | 0.0       | 28.882  |
+### Framework versions
+- Transformers 4.33.1
+- Pytorch 2.0.1+cu118
+- Datasets 2.14.5
+- Tokenizers 0.13.3

config.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_name_or_path": "csebuetnlp/mT5_m2m_crossSum",
+  "architectures": [
+    "MT5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 250030,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "length_penalty": 0.6,
+  "max_length": 84,
+  "model_type": "mt5",
+  "num_beams": 4,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "langid_map": {
+      "amharic": [
+        35,
+        "\u2581<extra_id_64>"
+      ],
+      "arabic": [
+        4,
+        "\u2581<extra_id_95>"
+      ],
+      "azerbaijani": [
+        7,
+        "\u2581<extra_id_92>"
+      ],
+      "bengali": [
+        42,
+        "\u2581<extra_id_57>"
+      ],
+      "burmese": [
+        33,
+        "\u2581<extra_id_66>"
+      ],
+      "chinese_simplified": [
+        40,
+        "\u2581<extra_id_59>"
+      ],
+      "chinese_traditional": [
+        44,
+        "\u2581<extra_id_55>"
+      ],
+      "english": [
+        30,
+        "\u2581<extra_id_69>"
+      ],
+      "french": [
+        10,
+        "\u2581<extra_id_89>"
+      ],
+      "gujarati": [
+        27,
+        "\u2581<extra_id_72>"
+      ],
+      "hausa": [
+        43,
+        "\u2581<extra_id_56>"
+      ],
+      "hindi": [
+        21,
+        "\u2581<extra_id_78>"
+      ],
+      "igbo": [
+        9,
+        "\u2581<extra_id_90>"
+      ],
+      "indonesian": [
+        1,
+        "\u2581<extra_id_98>"
+      ],
+      "japanese": [
+        37,
+        "\u2581<extra_id_62>"
+      ],
+      "kirundi": [
+        0,
+        "\u2581<extra_id_99>"
+      ],
+      "korean": [
+        29,
+        "\u2581<extra_id_70>"
+      ],
+      "kyrgyz": [
+        5,
+        "\u2581<extra_id_94>"
+      ],
+      "marathi": [
+        13,
+        "\u2581<extra_id_86>"
+      ],
+      "nepali": [
+        20,
+        "\u2581<extra_id_79>"
+      ],
+      "oromo": [
+        41,
+        "\u2581<extra_id_58>"
+      ],
+      "pashto": [
+        34,
+        "\u2581<extra_id_65>"
+      ],
+      "persian": [
+        23,
+        "\u2581<extra_id_76>"
+      ],
+      "pidgin": [
+        14,
+        "\u2581<extra_id_85>"
+      ],
+      "portuguese": [
+        39,
+        "\u2581<extra_id_60>"
+      ],
+      "punjabi": [
+        17,
+        "\u2581<extra_id_82>"
+      ],
+      "russian": [
+        36,
+        "\u2581<extra_id_63>"
+      ],
+      "scottish_gaelic": [
+        24,
+        "\u2581<extra_id_75>"
+      ],
+      "serbian_cyrillic": [
+        28,
+        "\u2581<extra_id_71>"
+      ],
+      "serbian_latin": [
+        11,
+        "\u2581<extra_id_88>"
+      ],
+      "sinhala": [
+        31,
+        "\u2581<extra_id_68>"
+      ],
+      "somali": [
+        19,
+        "\u2581<extra_id_80>"
+      ],
+      "spanish": [
+        3,
+        "\u2581<extra_id_96>"
+      ],
+      "swahili": [
+        18,
+        "\u2581<extra_id_81>"
+      ],
+      "tamil": [
+        32,
+        "\u2581<extra_id_67>"
+      ],
+      "telugu": [
+        22,
+        "\u2581<extra_id_77>"
+      ],
+      "thai": [
+        6,
+        "\u2581<extra_id_93>"
+      ],
+      "tigrinya": [
+        16,
+        "\u2581<extra_id_83>"
+      ],
+      "turkish": [
+        15,
+        "\u2581<extra_id_84>"
+      ],
+      "ukrainian": [
+        2,
+        "\u2581<extra_id_97>"
+      ],
+      "urdu": [
+        38,
+        "\u2581<extra_id_61>"
+      ],
+      "uzbek": [
+        8,
+        "\u2581<extra_id_91>"
+      ],
+      "vietnamese": [
+        12,
+        "\u2581<extra_id_87>"
+      ],
+      "welsh": [
+        26,
+        "\u2581<extra_id_73>"
+      ],
+      "yoruba": [
+        25,
+        "\u2581<extra_id_74>"
+      ]
+    }
+  },
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.1",
+  "use_cache": true,
+  "vocab_size": 250112
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "decoder_start_token_id": 250030,
+  "eos_token_id": 1,
+  "length_penalty": 0.6,
+  "max_length": 84,
+  "num_beams": 4,
+  "pad_token_id": 0,
+  "transformers_version": "4.33.1"
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b22fda21a0f5312002c077099ee460840f75ceb3842dd28483009c4984d96e1a
+size 2329702581

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "additional_special_tokens": [
+    "▁<extra_id_64>",
+    "▁<extra_id_95>",
+    "▁<extra_id_92>",
+    "▁<extra_id_57>",
+    "▁<extra_id_66>",
+    "▁<extra_id_59>",
+    "▁<extra_id_55>",
+    "▁<extra_id_69>",
+    "▁<extra_id_89>",
+    "▁<extra_id_72>",
+    "▁<extra_id_56>",
+    "▁<extra_id_78>",
+    "▁<extra_id_90>",
+    "▁<extra_id_98>",
+    "▁<extra_id_62>",
+    "▁<extra_id_99>",
+    "▁<extra_id_70>",
+    "▁<extra_id_94>",
+    "▁<extra_id_86>",
+    "▁<extra_id_79>",
+    "▁<extra_id_58>",
+    "▁<extra_id_65>",
+    "▁<extra_id_76>",
+    "▁<extra_id_85>",
+    "▁<extra_id_60>",
+    "▁<extra_id_82>",
+    "▁<extra_id_63>",
+    "▁<extra_id_75>",
+    "▁<extra_id_71>",
+    "▁<extra_id_88>",
+    "▁<extra_id_68>",
+    "▁<extra_id_80>",
+    "▁<extra_id_96>",
+    "▁<extra_id_81>",
+    "▁<extra_id_67>",
+    "▁<extra_id_77>",
+    "▁<extra_id_93>",
+    "▁<extra_id_83>",
+    "▁<extra_id_84>",
+    "▁<extra_id_97>",
+    "▁<extra_id_61>",
+    "▁<extra_id_91>",
+    "▁<extra_id_87>",
+    "▁<extra_id_73>",
+    "▁<extra_id_74>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
+size 4309802

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "additional_special_tokens": null,
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 0,
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "tokenizer_file": null,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2940ff1a2e61dd26c316012f9a5c7fa15f3424fea400d13a6a7f9b5a2b588837
+size 4155