tien314 commited on
Commit
9fedd52
·
verified ·
1 Parent(s): 9c658fb

Update BM25S model

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  corpus.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ corpus.mmindex.json filter=lfs diff=lfs merge=lfs -text
38
+ vocab.index.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -123,9 +123,9 @@ This dataset was created using the following data:
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
- | Number of documents | 765912 |
127
- | Number of tokens | 8821141 |
128
- | Average tokens per document | 11.52 |
129
 
130
  ## Parameters
131
 
 
123
 
124
  | Statistic | Value |
125
  | --- | --- |
126
+ | Number of documents | 3442247 |
127
+ | Number of tokens | 44700484 |
128
+ | Average tokens per document | 12.99 |
129
 
130
  ## Parameters
131
 
corpus.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf86865729a799468c1b3cbb545fb146abd770cef9efdbd68eb129d272636ad
3
- size 84916447
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d7104c9c5c348417170f1ad3269e40bfcd98b68d556fe008cb656f805520b8
3
+ size 414153548
corpus.mmindex.json CHANGED
The diff for this file is too large to render. See raw diff
 
data.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eca6091de0d7dc17c84d4abf718fceb168b2b34803cbeccee325a994ac0e58e8
3
- size 35284692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e0add5fb6b2e0862e1a3c5566c42dcf5dc4310960885cf36b141641933d68cd
3
+ size 178802064
indices.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e206e1093b0d3b6e7ad479cc834d13c776f1d97ee95af05e96ef0f98945e8ac
3
- size 35284692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62438280a3375491410ede51ee59c476a949d1d5a51e203d1d0f6f6d974f569d
3
+ size 178802064
indptr.csc.index.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca27161440ff1d1bed8f3a59e9fa1b246e0563ea7235f5cd20ff1c02ad91220e
3
- size 2060516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2908964d0221793377dc051744dbcac038554133f574a932890959e82771695
3
+ size 8841532
params.index.json CHANGED
@@ -6,7 +6,7 @@
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
- "num_docs": 765912,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
 
6
  "idf_method": "lucene",
7
  "dtype": "float32",
8
  "int_dtype": "int32",
9
+ "num_docs": 3442247,
10
  "version": "0.2.7post1",
11
  "backend": "numpy"
12
  }
vocab.index.json CHANGED
The diff for this file is too large to render. See raw diff