minhdang14902 commited on
Commit
eff7084
·
verified ·
1 Parent(s): 00ae892

Upload model PhoBert 10/7/24

Browse files
QA_Legal_updated.json ADDED
The diff for this file is too large to render. See raw diff
 
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vinai/phobert-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "Th\u1ea9m_quy\u1ec1n_tuy\u1ec3n,_qu",
16
+ "1": "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_v\u00e0_do",
17
+ "2": "C\u00e1c_quy_\u0111\u1ecbnh_v\u1ec1_tuy\u1ec3",
18
+ "3": "V\u0103n_ph\u00f2ng_\u0111\u1ea1i_di\u1ec7n_c",
19
+ "4": "Tr\u01b0\u1eddng_h\u1ee3p_gi\u1ea3i_th\u1ec3",
20
+ "5": "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_c\u00f3",
21
+ "6": "Doanh_nghi\u1ec7p_ph\u1ea3i_cu",
22
+ "7": "H\u00e0nh_vi_ng\u01b0\u1eddi_s\u1eed_d\u1ee5n",
23
+ "8": "Ng\u01b0\u1eddi_s\u1eed_d\u1ee5ng_lao_\u0111\u1ed9",
24
+ "9": "Doanh_nghi\u1ec7p_c\u00f3_\u0111\u01b0\u1ee3c",
25
+ "10": "Doanh_nghi\u1ec7p_ph\u1ea3i_l\u00e0",
26
+ "11": "intent_12_Th\u1ea9m_quy\u1ec1n_giao_k\u1ebft",
27
+ "12": "Gi\u00e1m_\u0111\u1ed1c_nh\u00e2n_s\u1ef1_c\u00f3",
28
+ "13": "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c\u00f3_th",
29
+ "14": "N\u1ebfu_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_k",
30
+ "15": "C\u00e1c_tr\u01b0\u1eddng_h\u1ee3p_t\u1ea1m_h",
31
+ "16": "M\u1ed9t_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c",
32
+ "17": "C\u00f3_c\u00e1c_lo\u1ea1i_h\u1ee3p_\u0111\u1ed3ng",
33
+ "18": "Khi_c\u00f3_nhu_c\u1ea7u_s\u1eed_d\u1ee5",
34
+ "19": "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_kh",
35
+ "20": "C\u00e1c_ch\u1ebf_\u0111\u1ed9_v\u1ec1_l\u01b0\u01a1ng,",
36
+ "21": "Khi_h\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9n",
37
+ "22": "Th\u1eddi_h\u1ea1n_t\u1ea1m_ho\u00e3n_h\u1ee3",
38
+ "23": "C\u00f3_\u0111\u01b0\u1ee3c_k\u00fd_nhi\u1ec1u_h\u1ee3p",
39
+ "24": "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_b\u1eaf",
40
+ "25": "Ph\u1ee5_l\u1ee5c_H\u1ee3p_\u0111\u1ed3ng_lao",
41
+ "26": "Ph\u1ee5_l\u1ee5c_h\u1ee3p_\u0111\u1ed3ng_c\u1ea7n",
42
+ "27": "N\u1ebfu_c\u00f3_n\u1ed9i_dung_kh\u00e1c",
43
+ "28": "Hi\u1ec7u_l\u1ef1c_c\u1ee7a_h\u1ee3p_\u0111\u1ed3n",
44
+ "29": "Th\u1eddi_h\u1ea1n_b\u00e1o_tr\u01b0\u1edbc_t",
45
+ "30": "NSDL\u0110_c\u00f3_th\u1ec3_k\u00fd_ph\u1ee5"
46
+ },
47
+ "initializer_range": 0.02,
48
+ "intermediate_size": 3072,
49
+ "label2id": {
50
+ "C\u00e1c_ch\u1ebf_\u0111\u1ed9_v\u1ec1_l\u01b0\u01a1ng,": 20,
51
+ "C\u00e1c_quy_\u0111\u1ecbnh_v\u1ec1_tuy\u1ec3": 2,
52
+ "C\u00e1c_tr\u01b0\u1eddng_h\u1ee3p_t\u1ea1m_h": 15,
53
+ "C\u00f3_c\u00e1c_lo\u1ea1i_h\u1ee3p_\u0111\u1ed3ng": 17,
54
+ "C\u00f3_\u0111\u01b0\u1ee3c_k\u00fd_nhi\u1ec1u_h\u1ee3p": 23,
55
+ "Doanh_nghi\u1ec7p_c\u00f3_\u0111\u01b0\u1ee3c": 9,
56
+ "Doanh_nghi\u1ec7p_ph\u1ea3i_cu": 6,
57
+ "Doanh_nghi\u1ec7p_ph\u1ea3i_l\u00e0": 10,
58
+ "Gi\u00e1m_\u0111\u1ed1c_nh\u00e2n_s\u1ef1_c\u00f3": 12,
59
+ "Hi\u1ec7u_l\u1ef1c_c\u1ee7a_h\u1ee3p_\u0111\u1ed3n": 28,
60
+ "H\u00e0nh_vi_ng\u01b0\u1eddi_s\u1eed_d\u1ee5n": 7,
61
+ "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_b\u1eaf": 24,
62
+ "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_c\u00f3": 5,
63
+ "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_kh": 19,
64
+ "Khi_c\u00f3_nhu_c\u1ea7u_s\u1eed_d\u1ee5": 18,
65
+ "Khi_h\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9n": 21,
66
+ "M\u1ed9t_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c": 16,
67
+ "NSDL\u0110_c\u00f3_th\u1ec3_k\u00fd_ph\u1ee5": 30,
68
+ "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c\u00f3_th": 13,
69
+ "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_v\u00e0_do": 1,
70
+ "Ng\u01b0\u1eddi_s\u1eed_d\u1ee5ng_lao_\u0111\u1ed9": 8,
71
+ "N\u1ebfu_c\u00f3_n\u1ed9i_dung_kh\u00e1c": 27,
72
+ "N\u1ebfu_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_k": 14,
73
+ "Ph\u1ee5_l\u1ee5c_H\u1ee3p_\u0111\u1ed3ng_lao": 25,
74
+ "Ph\u1ee5_l\u1ee5c_h\u1ee3p_\u0111\u1ed3ng_c\u1ea7n": 26,
75
+ "Th\u1ea9m_quy\u1ec1n_tuy\u1ec3n,_qu": 0,
76
+ "Th\u1eddi_h\u1ea1n_b\u00e1o_tr\u01b0\u1edbc_t": 29,
77
+ "Th\u1eddi_h\u1ea1n_t\u1ea1m_ho\u00e3n_h\u1ee3": 22,
78
+ "Tr\u01b0\u1eddng_h\u1ee3p_gi\u1ea3i_th\u1ec3": 4,
79
+ "V\u0103n_ph\u00f2ng_\u0111\u1ea1i_di\u1ec7n_c": 3,
80
+ "intent_12_Th\u1ea9m_quy\u1ec1n_giao_k\u1ebft": 11
81
+ },
82
+ "layer_norm_eps": 1e-05,
83
+ "max_position_embeddings": 258,
84
+ "model_type": "roberta",
85
+ "num_attention_heads": 12,
86
+ "num_hidden_layers": 12,
87
+ "pad_token_id": 1,
88
+ "position_embedding_type": "absolute",
89
+ "problem_type": "single_label_classification",
90
+ "tokenizer_class": "PhobertTokenizer",
91
+ "torch_dtype": "float32",
92
+ "transformers_version": "4.30.1",
93
+ "type_vocab_size": 1,
94
+ "use_cache": true,
95
+ "vocab_size": 64001
96
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a244b641283ecdac7b4a49ec0a1df37814257960d3bdb8fd7b4ac5f69b3836ba
3
+ size 540157426
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "mask_token": "<mask>",
7
+ "model_max_length": 256,
8
+ "pad_token": "<pad>",
9
+ "sep_token": "</s>",
10
+ "tokenizer_class": "PhobertTokenizer",
11
+ "unk_token": "<unk>"
12
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c17cb9d3fbcd131f7283f89aa21c97fcfc28d6a2d470eb9a78deb09d9879de3
3
+ size 4344
vocab.txt ADDED
The diff for this file is too large to render. See raw diff