Diar
commited on
Commit
Β·
c7aa77f
1
Parent(s):
f9ca82a
lm1
Browse files- lm1/language_model_config.json β language_model_config.json +0 -1
- lm2/language_model.bin +0 -3
- lm2/language_model_config.json +0 -34
- passage/tokenizer_config.json +0 -1
- prediction_head_0.bin +0 -3
- prediction_head_0_config.json +0 -1
- processor_config.json +0 -1
- lm1/language_model.bin β pytorch_model.bin +0 -0
- query/special_tokens_map.json +0 -1
- query/vocab.txt +0 -0
- passage/special_tokens_map.json β special_tokens_map.json +0 -0
- query/tokenizer_config.json β tokenizer_config.json +0 -0
- passage/vocab.txt β vocab.txt +0 -0
lm1/language_model_config.json β language_model_config.json
RENAMED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "sentence-transformers/LaBSE",
|
3 |
"architectures": [
|
4 |
"BertModel"
|
5 |
],
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"BertModel"
|
4 |
],
|
lm2/language_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d293ec78f56a8c0f471480fe13916b7055836e89f5fa1774b48cf1bc35228e7b
|
3 |
-
size 1883803575
|
|
|
|
|
|
|
|
lm2/language_model_config.json
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "sentence-transformers/LaBSE",
|
3 |
-
"architectures": [
|
4 |
-
"BertModel"
|
5 |
-
],
|
6 |
-
"attention_probs_dropout_prob": 0.1,
|
7 |
-
"directionality": "bidi",
|
8 |
-
"gradient_checkpointing": false,
|
9 |
-
"hidden_act": "gelu",
|
10 |
-
"hidden_dropout_prob": 0.1,
|
11 |
-
"hidden_size": 768,
|
12 |
-
"initializer_range": 0.02,
|
13 |
-
"intermediate_size": 3072,
|
14 |
-
"language": "english",
|
15 |
-
"layer_norm_eps": 1e-12,
|
16 |
-
"max_position_embeddings": 512,
|
17 |
-
"model_type": "dpr",
|
18 |
-
"name": "DPRContextEncoder",
|
19 |
-
"num_attention_heads": 12,
|
20 |
-
"num_hidden_layers": 12,
|
21 |
-
"pad_token_id": 0,
|
22 |
-
"pooler_fc_size": 768,
|
23 |
-
"pooler_num_attention_heads": 12,
|
24 |
-
"pooler_num_fc_layers": 3,
|
25 |
-
"pooler_size_per_head": 128,
|
26 |
-
"pooler_type": "first_token_transform",
|
27 |
-
"position_embedding_type": "absolute",
|
28 |
-
"projection_dim": 0,
|
29 |
-
"revision": null,
|
30 |
-
"transformers_version": "4.6.0.dev0",
|
31 |
-
"type_vocab_size": 2,
|
32 |
-
"use_cache": true,
|
33 |
-
"vocab_size": 501153
|
34 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
passage/tokenizer_config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "C:\\Users\\DiarS/.cache\\huggingface\\transformers\\5fb4590a69eca214db9d31f0a4e90637a90fab773b17d382309a27f2a34da5be.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "../input/mexdpr/LaBSE-BERT/tokenizer", "tokenizer_class": "DPRContextEncoderTokenizer", "vocab_size": 501153}
|
|
|
|
prediction_head_0.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:339cf0a79d42b8a0e283632633b8c7c079101ef627869b4b27f743462db4ecf9
|
3 |
-
size 495
|
|
|
|
|
|
|
|
prediction_head_0_config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"training": false, "similarity_function": "dot_product", "task_name": "text_similarity", "model_type": "text_similarity", "ph_output_type": "per_sequence", "global_loss_buffer_size": 150000, "label_tensor_name": "label_ids", "label_list": ["hard_negative", "positive"], "metric": "text_similarity_metric", "name": "TextSimilarityHead"}
|
|
|
|
processor_config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"baskets": [], "data_dir": "../input/mexdpr/fifths", "dev_filename": "nq-dev.json", "dev_split": 0.0, "embed_title": true, "max_samples": null, "max_seq_len": null, "max_seq_len_passage": 256, "max_seq_len_query": 64, "num_hard_negatives": 1, "num_positives": 1, "proxies": null, "shuffle_negatives": true, "shuffle_positives": false, "tasks": {"text_similarity": {"label_list": ["hard_negative", "positive"], "metric": "text_similarity_metric", "label_tensor_name": "label_ids", "label_name": "label", "label_column_name": null, "text_column_name": null, "task_type": "text_similarity"}}, "test_filename": "nq-test.json", "tokenizer": null, "train_filename": "nq-train.json", "query_tokenizer": "DPRQuestionEncoderTokenizer", "passage_tokenizer": "DPRContextEncoderTokenizer", "processor": "TextSimilarityProcessor"}
|
|
|
|
lm1/language_model.bin β pytorch_model.bin
RENAMED
File without changes
|
query/special_tokens_map.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
|
|
|
query/vocab.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|
passage/special_tokens_map.json β special_tokens_map.json
RENAMED
File without changes
|
query/tokenizer_config.json β tokenizer_config.json
RENAMED
File without changes
|
passage/vocab.txt β vocab.txt
RENAMED
File without changes
|