zhuocheng commited on
Commit
4cbdc2d
·
verified ·
1 Parent(s): 0749503

Update FlexRAG retriever

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ corpus.jsonl filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Test
config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_interval: 1000
2
+ top_k: 10
3
+ batch_size: 32
4
+ query_preprocess_pipeline:
5
+ processor_type: []
6
+ length_filter_config:
7
+ max_tokens: null
8
+ min_tokens: null
9
+ max_chars: null
10
+ min_chars: null
11
+ max_bytes: null
12
+ min_bytes: null
13
+ tokenizer_config:
14
+ tokenizer_type: moses
15
+ hf_tokenizer_path: null
16
+ tiktok_tokenizer_name: null
17
+ lang: null
18
+ token_normalize_config:
19
+ lang: en
20
+ penn: true
21
+ norm_quote_commas: true
22
+ norm_numbers: true
23
+ pre_replace_unicode_punct: false
24
+ post_remove_control_chars: false
25
+ perl_parity: false
26
+ truncate_config:
27
+ max_chars: null
28
+ max_bytes: null
29
+ max_tokens: null
30
+ tokenizer_config:
31
+ tokenizer_type: moses
32
+ hf_tokenizer_path: null
33
+ tiktok_tokenizer_name: null
34
+ lang: null
35
+ database_path: null
36
+ method: lucene
37
+ idf_method: null
38
+ backend: auto
39
+ k1: 1.5
40
+ b: 0.75
41
+ delta: 0.5
42
+ lang: english
43
+ indexed_fields:
44
+ - text
corpus.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3277862a9a39eaf35b3fbec97f027ed94042a2122221497b35a261f517e2e31
3
+ size 42200397
corpus.mmindex.json ADDED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ae3e8830a8f6e99c5fd09c75a5cb3294d0f085522894ad332a19d71263aa62
3
+ size 12248400
indices.csc.index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cda39bd0a10daf053c2172ad71b1a48713286ffdd5831ac65b9fafbc561f5a2
3
+ size 12248400
indptr.csc.index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88e600606134eb8b26f637cdeb95fb55693db9aba9cba2d57b31c43970fa42bb
3
+ size 617688
params.index.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "k1": 1.5,
3
+ "b": 0.75,
4
+ "delta": 0.5,
5
+ "method": "lucene",
6
+ "idf_method": "lucene",
7
+ "dtype": "float32",
8
+ "int_dtype": "int32",
9
+ "num_docs": 62225,
10
+ "version": "0.2.6",
11
+ "backend": "numpy"
12
+ }
vocab.index.json ADDED
The diff for this file is too large to render. See raw diff