bgk commited on
Commit
12f0d75
·
1 Parent(s): 97b5a05

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "loodos/albert-base-turkish-uncased",
3
+ "architectures": [
4
+ "AlbertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0.1,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "B-YEMEK",
18
+ "1": "O",
19
+ "2": "B-ICECEK_BUYUKLUK",
20
+ "3": "B-ICECEK",
21
+ "4": "B-ADRES",
22
+ "5": "B-ODEME",
23
+ "6": "B-YEMEK_CESIT",
24
+ "7": "B-YEMEK_ADET",
25
+ "8": "B-ICECEK_ICERIK",
26
+ "9": "I-ADRES",
27
+ "10": "I-YEMEK",
28
+ "11": "B-YEMEK_BUYUKLUK",
29
+ "12": "B-YEMEK_ICERIK",
30
+ "13": "I-YEMEK_ICERIK",
31
+ "14": "I-ICECEK",
32
+ "15": "I-YEMEK_BUYUKLUK",
33
+ "16": "I-YEMEK_CESIT",
34
+ "17": "B-ICECEK_ADET",
35
+ "18": "I-ODEME",
36
+ "19": "I-YEMEK_ADET"
37
+ },
38
+ "initializer_range": 0.02,
39
+ "inner_group_num": 1,
40
+ "intermediate_size": 3072,
41
+ "label2id": {
42
+ "B-ADRES": 4,
43
+ "B-ICECEK": 3,
44
+ "B-ICECEK_ADET": 17,
45
+ "B-ICECEK_BUYUKLUK": 2,
46
+ "B-ICECEK_ICERIK": 8,
47
+ "B-ODEME": 5,
48
+ "B-YEMEK": 0,
49
+ "B-YEMEK_ADET": 7,
50
+ "B-YEMEK_BUYUKLUK": 11,
51
+ "B-YEMEK_CESIT": 6,
52
+ "B-YEMEK_ICERIK": 12,
53
+ "I-ADRES": 9,
54
+ "I-ICECEK": 14,
55
+ "I-ODEME": 18,
56
+ "I-YEMEK": 10,
57
+ "I-YEMEK_ADET": 19,
58
+ "I-YEMEK_BUYUKLUK": 15,
59
+ "I-YEMEK_CESIT": 16,
60
+ "I-YEMEK_ICERIK": 13,
61
+ "O": 1
62
+ },
63
+ "layer_norm_eps": 1e-12,
64
+ "max_position_embeddings": 512,
65
+ "model_type": "albert",
66
+ "net_structure_type": 0,
67
+ "num_attention_heads": 12,
68
+ "num_hidden_groups": 1,
69
+ "num_hidden_layers": 12,
70
+ "num_memory_blocks": 0,
71
+ "pad_token_id": 0,
72
+ "position_embedding_type": "absolute",
73
+ "torch_dtype": "float32",
74
+ "transformers_version": "4.27.4",
75
+ "type_vocab_size": 2,
76
+ "vocab_size": 32000
77
+ }
eval_results.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ ADRES 0.9983 1.0000 0.9991 579
4
+ ICECEK 0.9926 1.0000 0.9963 135
5
+ ICECEK_ADET 0.9672 0.9833 0.9752 60
6
+ ICECEK_BUYUKLUK 1.0000 1.0000 1.0000 21
7
+ ICECEK_ICERIK 1.0000 1.0000 1.0000 10
8
+ ODEME 0.9914 0.9931 0.9923 583
9
+ YEMEK 0.8685 0.9189 0.8930 740
10
+ YEMEK_ADET 0.9790 0.9894 0.9842 283
11
+ YEMEK_BUYUKLUK 0.9077 0.9008 0.9042 131
12
+ YEMEK_CESIT 0.8557 0.8592 0.8574 483
13
+ YEMEK_ICERIK 0.7482 0.6887 0.7172 151
14
+
15
+ micro avg 0.9269 0.9383 0.9326 3176
16
+ macro avg 0.9371 0.9394 0.9381 3176
17
+ weighted avg 0.9269 0.9383 0.9324 3176
18
+
19
+ eval_loss = 0.1106654754251502
20
+ f1_score = 0.9325614144891252
21
+ precision = 0.926905132192846
22
+ recall = 0.9382871536523929
model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 3e-05, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "loodos/albert-base-turkish-uncased", "model_type": "albert", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 5, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 4, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 223, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": true, "labels_list": ["B-YEMEK", "O", "B-ICECEK_BUYUKLUK", "B-ICECEK", "B-ADRES", "B-ODEME", "B-YEMEK_CESIT", "B-YEMEK_ADET", "B-ICECEK_ICERIK", "I-ADRES", "I-YEMEK", "B-YEMEK_BUYUKLUK", "B-YEMEK_ICERIK", "I-YEMEK_ICERIK", "I-ICECEK", "I-YEMEK_BUYUKLUK", "I-YEMEK_CESIT", "B-ICECEK_ADET", "I-ODEME", "I-YEMEK_ADET"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350d013addd74cf77b153391f10ef435f4a8b605e3bd463b03465b3d7e937789
3
+ size 45470591
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "<unk>"
9
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0906192fc8a73500782d11318813c4554fb0aeeb2ded6899b69839f09958ee99
3
+ size 862541
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": true,
7
+ "mask_token": {
8
+ "__type": "AddedToken",
9
+ "content": "[MASK]",
10
+ "lstrip": true,
11
+ "normalized": false,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "model_max_length": 512,
16
+ "pad_token": "<pad>",
17
+ "remove_space": true,
18
+ "sep_token": "[SEP]",
19
+ "sp_model_kwargs": {},
20
+ "special_tokens_map_file": "/root/.cache/huggingface/hub/models--loodos--albert-base-turkish-uncased/snapshots/3275004703c3ea35b5dcde5b684b707d32e5a69e/special_tokens_map.json",
21
+ "tokenizer_class": "AlbertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2ed257d9191b79c6c33901827369ae662118782febb345d94bb0c5ec88b78de
3
+ size 3323