minhdang14902 commited on
Commit
aca2eee
·
verified ·
1 Parent(s): 9f50415

Upload 9 files

Browse files
data/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "_name_or_path": "bhavikardeshna/xlm-roberta-base-vietnamese",
3
  "architectures": [
4
- "XLMRobertaForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
@@ -11,21 +11,86 @@
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "initializer_range": 0.02,
15
  "intermediate_size": 3072,
16
- "language": "english",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "layer_norm_eps": 1e-05,
18
- "max_position_embeddings": 514,
19
- "model_type": "xlm-roberta",
20
- "name": "XLMRoberta",
21
  "num_attention_heads": 12,
22
  "num_hidden_layers": 12,
23
- "output_past": true,
24
  "pad_token_id": 1,
25
  "position_embedding_type": "absolute",
 
 
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.30.1",
28
  "type_vocab_size": 1,
29
  "use_cache": true,
30
- "vocab_size": 250002
31
  }
 
1
  {
2
+ "_name_or_path": "vinai/phobert-base",
3
  "architectures": [
4
+ "RobertaForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
 
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "Th\u1ea9m_quy\u1ec1n_tuy\u1ec3n,_qu",
16
+ "1": "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_v\u00e0_do",
17
+ "2": "C\u00e1c_quy_\u0111\u1ecbnh_v\u1ec1_tuy\u1ec3",
18
+ "3": "V\u0103n_ph\u00f2ng_\u0111\u1ea1i_di\u1ec7n_c",
19
+ "4": "Tr\u01b0\u1eddng_h\u1ee3p_gi\u1ea3i_th\u1ec3",
20
+ "5": "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_c\u00f3",
21
+ "6": "Doanh_nghi\u1ec7p_ph\u1ea3i_cu",
22
+ "7": "H\u00e0nh_vi_ng\u01b0\u1eddi_s\u1eed_d\u1ee5n",
23
+ "8": "Ng\u01b0\u1eddi_s\u1eed_d\u1ee5ng_lao_\u0111\u1ed9",
24
+ "9": "Doanh_nghi\u1ec7p_c\u00f3_\u0111\u01b0\u1ee3c",
25
+ "10": "Doanh_nghi\u1ec7p_ph\u1ea3i_l\u00e0",
26
+ "11": "intent_12_Th\u1ea9m_quy\u1ec1n_giao_k\u1ebft",
27
+ "12": "Gi\u00e1m_\u0111\u1ed1c_nh\u00e2n_s\u1ef1_c\u00f3",
28
+ "13": "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c\u00f3_th",
29
+ "14": "N\u1ebfu_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_k",
30
+ "15": "C\u00e1c_tr\u01b0\u1eddng_h\u1ee3p_t\u1ea1m_h",
31
+ "16": "M\u1ed9t_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c",
32
+ "17": "C\u00f3_c\u00e1c_lo\u1ea1i_h\u1ee3p_\u0111\u1ed3ng",
33
+ "18": "Khi_c\u00f3_nhu_c\u1ea7u_s\u1eed_d\u1ee5",
34
+ "19": "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_kh",
35
+ "20": "C\u00e1c_ch\u1ebf_\u0111\u1ed9_v\u1ec1_l\u01b0\u01a1ng,",
36
+ "21": "Khi_h\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9n",
37
+ "22": "Th\u1eddi_h\u1ea1n_t\u1ea1m_ho\u00e3n_h\u1ee3",
38
+ "23": "C\u00f3_\u0111\u01b0\u1ee3c_k\u00fd_nhi\u1ec1u_h\u1ee3p",
39
+ "24": "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_b\u1eaf",
40
+ "25": "Ph\u1ee5_l\u1ee5c_H\u1ee3p_\u0111\u1ed3ng_lao",
41
+ "26": "Ph\u1ee5_l\u1ee5c_h\u1ee3p_\u0111\u1ed3ng_c\u1ea7n",
42
+ "27": "N\u1ebfu_c\u00f3_n\u1ed9i_dung_kh\u00e1c",
43
+ "28": "Hi\u1ec7u_l\u1ef1c_c\u1ee7a_h\u1ee3p_\u0111\u1ed3n",
44
+ "29": "Th\u1eddi_h\u1ea1n_b\u00e1o_tr\u01b0\u1edbc_t",
45
+ "30": "NSDL\u0110_c\u00f3_th\u1ec3_k\u00fd_ph\u1ee5"
46
+ },
47
  "initializer_range": 0.02,
48
  "intermediate_size": 3072,
49
+ "label2id": {
50
+ "C\u00e1c_ch\u1ebf_\u0111\u1ed9_v\u1ec1_l\u01b0\u01a1ng,": 20,
51
+ "C\u00e1c_quy_\u0111\u1ecbnh_v\u1ec1_tuy\u1ec3": 2,
52
+ "C\u00e1c_tr\u01b0\u1eddng_h\u1ee3p_t\u1ea1m_h": 15,
53
+ "C\u00f3_c\u00e1c_lo\u1ea1i_h\u1ee3p_\u0111\u1ed3ng": 17,
54
+ "C\u00f3_\u0111\u01b0\u1ee3c_k\u00fd_nhi\u1ec1u_h\u1ee3p": 23,
55
+ "Doanh_nghi\u1ec7p_c\u00f3_\u0111\u01b0\u1ee3c": 9,
56
+ "Doanh_nghi\u1ec7p_ph\u1ea3i_cu": 6,
57
+ "Doanh_nghi\u1ec7p_ph\u1ea3i_l\u00e0": 10,
58
+ "Gi\u00e1m_\u0111\u1ed1c_nh\u00e2n_s\u1ef1_c\u00f3": 12,
59
+ "Hi\u1ec7u_l\u1ef1c_c\u1ee7a_h\u1ee3p_\u0111\u1ed3n": 28,
60
+ "H\u00e0nh_vi_ng\u01b0\u1eddi_s\u1eed_d\u1ee5n": 7,
61
+ "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_b\u1eaf": 24,
62
+ "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_c\u00f3": 5,
63
+ "H\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9ng_kh": 19,
64
+ "Khi_c\u00f3_nhu_c\u1ea7u_s\u1eed_d\u1ee5": 18,
65
+ "Khi_h\u1ee3p_\u0111\u1ed3ng_lao_\u0111\u1ed9n": 21,
66
+ "M\u1ed9t_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c": 16,
67
+ "NSDL\u0110_c\u00f3_th\u1ec3_k\u00fd_ph\u1ee5": 30,
68
+ "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_c\u00f3_th": 13,
69
+ "Ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_v\u00e0_do": 1,
70
+ "Ng\u01b0\u1eddi_s\u1eed_d\u1ee5ng_lao_\u0111\u1ed9": 8,
71
+ "N\u1ebfu_c\u00f3_n\u1ed9i_dung_kh\u00e1c": 27,
72
+ "N\u1ebfu_ng\u01b0\u1eddi_lao_\u0111\u1ed9ng_k": 14,
73
+ "Ph\u1ee5_l\u1ee5c_H\u1ee3p_\u0111\u1ed3ng_lao": 25,
74
+ "Ph\u1ee5_l\u1ee5c_h\u1ee3p_\u0111\u1ed3ng_c\u1ea7n": 26,
75
+ "Th\u1ea9m_quy\u1ec1n_tuy\u1ec3n,_qu": 0,
76
+ "Th\u1eddi_h\u1ea1n_b\u00e1o_tr\u01b0\u1edbc_t": 29,
77
+ "Th\u1eddi_h\u1ea1n_t\u1ea1m_ho\u00e3n_h\u1ee3": 22,
78
+ "Tr\u01b0\u1eddng_h\u1ee3p_gi\u1ea3i_th\u1ec3": 4,
79
+ "V\u0103n_ph\u00f2ng_\u0111\u1ea1i_di\u1ec7n_c": 3,
80
+ "intent_12_Th\u1ea9m_quy\u1ec1n_giao_k\u1ebft": 11
81
+ },
82
  "layer_norm_eps": 1e-05,
83
+ "max_position_embeddings": 258,
84
+ "model_type": "roberta",
 
85
  "num_attention_heads": 12,
86
  "num_hidden_layers": 12,
 
87
  "pad_token_id": 1,
88
  "position_embedding_type": "absolute",
89
+ "problem_type": "single_label_classification",
90
+ "tokenizer_class": "PhobertTokenizer",
91
  "torch_dtype": "float32",
92
  "transformers_version": "4.30.1",
93
  "type_vocab_size": 1,
94
  "use_cache": true,
95
+ "vocab_size": 64001
96
  }
data/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3e1c7209cd9cbaa524adce4c46e717cfb7675d98f0c91e56892a1041c0797a4
3
- size 1109888810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a244b641283ecdac7b4a49ec0a1df37814257960d3bdb8fd7b4ac5f69b3836ba
3
+ size 540157426
data/tokenizer_config.json CHANGED
@@ -2,21 +2,11 @@
2
  "bos_token": "<s>",
3
  "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
5
- "do_lower_case": false,
6
  "eos_token": "</s>",
7
- "full_tokenizer_file": null,
8
- "mask_token": {
9
- "__type": "AddedToken",
10
- "content": "<mask>",
11
- "lstrip": true,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "model_max_length": 512,
17
  "pad_token": "<pad>",
18
  "sep_token": "</s>",
19
- "sp_model_kwargs": {},
20
- "tokenizer_class": "XLMRobertaTokenizer",
21
  "unk_token": "<unk>"
22
  }
 
2
  "bos_token": "<s>",
3
  "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
 
5
  "eos_token": "</s>",
6
+ "mask_token": "<mask>",
7
+ "model_max_length": 256,
 
 
 
 
 
 
 
 
8
  "pad_token": "<pad>",
9
  "sep_token": "</s>",
10
+ "tokenizer_class": "PhobertTokenizer",
 
11
  "unk_token": "<unk>"
12
  }