Update FlexRAG retriever
Browse files- .gitattributes +1 -0
- README.md +1 -0
- config.yaml +44 -0
- corpus.jsonl +3 -0
- corpus.mmindex.json +0 -0
- data.csc.index.npy +3 -0
- indices.csc.index.npy +3 -0
- indptr.csc.index.npy +3 -0
- params.index.json +12 -0
- vocab.index.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
corpus.jsonl filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Test
|
config.yaml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
log_interval: 1000
|
2 |
+
top_k: 10
|
3 |
+
batch_size: 32
|
4 |
+
query_preprocess_pipeline:
|
5 |
+
processor_type: []
|
6 |
+
length_filter_config:
|
7 |
+
max_tokens: null
|
8 |
+
min_tokens: null
|
9 |
+
max_chars: null
|
10 |
+
min_chars: null
|
11 |
+
max_bytes: null
|
12 |
+
min_bytes: null
|
13 |
+
tokenizer_config:
|
14 |
+
tokenizer_type: moses
|
15 |
+
hf_tokenizer_path: null
|
16 |
+
tiktok_tokenizer_name: null
|
17 |
+
lang: null
|
18 |
+
token_normalize_config:
|
19 |
+
lang: en
|
20 |
+
penn: true
|
21 |
+
norm_quote_commas: true
|
22 |
+
norm_numbers: true
|
23 |
+
pre_replace_unicode_punct: false
|
24 |
+
post_remove_control_chars: false
|
25 |
+
perl_parity: false
|
26 |
+
truncate_config:
|
27 |
+
max_chars: null
|
28 |
+
max_bytes: null
|
29 |
+
max_tokens: null
|
30 |
+
tokenizer_config:
|
31 |
+
tokenizer_type: moses
|
32 |
+
hf_tokenizer_path: null
|
33 |
+
tiktok_tokenizer_name: null
|
34 |
+
lang: null
|
35 |
+
database_path: null
|
36 |
+
method: lucene
|
37 |
+
idf_method: null
|
38 |
+
backend: auto
|
39 |
+
k1: 1.5
|
40 |
+
b: 0.75
|
41 |
+
delta: 0.5
|
42 |
+
lang: english
|
43 |
+
indexed_fields:
|
44 |
+
- text
|
corpus.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3277862a9a39eaf35b3fbec97f027ed94042a2122221497b35a261f517e2e31
|
3 |
+
size 42200397
|
corpus.mmindex.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data.csc.index.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35ae3e8830a8f6e99c5fd09c75a5cb3294d0f085522894ad332a19d71263aa62
|
3 |
+
size 12248400
|
indices.csc.index.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cda39bd0a10daf053c2172ad71b1a48713286ffdd5831ac65b9fafbc561f5a2
|
3 |
+
size 12248400
|
indptr.csc.index.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88e600606134eb8b26f637cdeb95fb55693db9aba9cba2d57b31c43970fa42bb
|
3 |
+
size 617688
|
params.index.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"k1": 1.5,
|
3 |
+
"b": 0.75,
|
4 |
+
"delta": 0.5,
|
5 |
+
"method": "lucene",
|
6 |
+
"idf_method": "lucene",
|
7 |
+
"dtype": "float32",
|
8 |
+
"int_dtype": "int32",
|
9 |
+
"num_docs": 62225,
|
10 |
+
"version": "0.2.6",
|
11 |
+
"backend": "numpy"
|
12 |
+
}
|
vocab.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|