tien314 commited on
Commit
805c2fa
·
verified ·
1 Parent(s): 1b1e7d1

Update BM25S model

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ corpus.mmindex.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -123,9 +123,9 @@ This dataset was created using the following data:
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
- | Number of documents | 1107975 |
127
- | Number of tokens | 12554642 |
128
- | Average tokens per document | 11.33 |
129
 
130
  ## Parameters
131
 
 
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
+ | Number of documents | 1200871 |
127
+ | Number of tokens | 13474476 |
128
+ | Average tokens per document | 11.22 |
129
 
130
  ## Parameters
131
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba46de91e0433ec65d809dff8b635a99e8b0955302f7170f6ce56326d4eef7cb
3
- size 117724984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7728d4d7e75abe5bc882a3d1bd832691943cf7e995516e5492b6301b585842fa
3
+ size 126727301
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08330452ed03f8f4d856a06397aae2a88c54d263b0ca48efc37f351b8051193b
3
- size 50218696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22a2b694b7c0de69afa5309f3496cc98caefc26e523a351ed89fa97d573bd3ee
3
+ size 53898032
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6784f4a7ee5657bbb65dfedf40ec32b71e775091095d6bdae4bc4de1e396b4de
3
- size 50218696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a8609353f489da6ed1bf59c86029e88f4ffd4bb7712170dc1da975eb4fc12a8
3
+ size 53898032
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79c346f2247f99257be8e532ce10c9ce94b7fa5b5e0021ae2ffae5a0c823047
3
- size 2181376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1af0b96ea41d02c815364e786dea127af71df51bc7d473288327016d53b3373
3
+ size 2215576
params.index.json CHANGED
@@ -6,7 +6,7 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 1107975,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 1200871,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
vocab.index.json CHANGED
The diff for this file is too large to render. See raw diff