pawasthy's picture
Update README.md
0bc9461 verified
|
raw
history blame
50.1 kB
---
language:
- en
license: apache-2.0
library_name: transformers
tags:
- language
- granite
- embeddings
model-index:
- name: ibm-granite/granite-embedding-30m-english
results:
- dataset:
type: mteb/arguana
name: MTEB ArguaAna
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.31792
- type: map_at_10
value: 0.47599
- type: map_at_100
value: 0.48425
- type: map_at_1000
value: 0.48427
- type: map_at_3
value: 0.42757
- type: map_at_5
value: 0.45634
- type: mrr_at_1
value: 0.32788
- type: mrr_at_10
value: 0.47974
- type: mrr_at_100
value: 0.48801
- type: mrr_at_1000
value: 0.48802
- type: mrr_at_3
value: 0.43065
- type: mrr_at_5
value: 0.45999
- type: ndcg_at_1
value: 0.31792
- type: ndcg_at_10
value: 0.56356
- type: ndcg_at_100
value: 0.59789
- type: ndcg_at_1000
value: 0.59857
- type: ndcg_at_3
value: 0.46453
- type: ndcg_at_5
value: 0.51623
- type: precision_at_1
value: 0.31792
- type: precision_at_10
value: 0.08428
- type: precision_at_100
value: 0.00991
- type: precision_at_1000
value: 0.001
- type: precision_at_3
value: 0.19061
- type: precision_at_5
value: 0.1394
- type: recall_at_1
value: 0.31792
- type: recall_at_10
value: 0.84282
- type: recall_at_100
value: 0.99075
- type: recall_at_1000
value: 0.99644
- type: recall_at_3
value: 0.57183
- type: recall_at_5
value: 0.69701
- dataset:
type: mteb/climate-fever
name: MTEB ClimateFEVER
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.13189
- type: map_at_10
value: 0.21789
- type: map_at_100
value: 0.2358
- type: map_at_1000
value: 0.23772
- type: map_at_3
value: 0.18513
- type: map_at_5
value: 0.20212
- type: mrr_at_1
value: 0.29837
- type: mrr_at_10
value: 0.41376
- type: mrr_at_100
value: 0.42282
- type: mrr_at_1000
value: 0.42319
- type: mrr_at_3
value: 0.38284
- type: mrr_at_5
value: 0.40301
- type: ndcg_at_1
value: 0.29837
- type: ndcg_at_10
value: 0.30263
- type: ndcg_at_100
value: 0.37228
- type: ndcg_at_1000
value: 0.40677
- type: ndcg_at_3
value: 0.25392
- type: ndcg_at_5
value: 0.27153
- type: precision_at_1
value: 0.29837
- type: precision_at_10
value: 0.09179
- type: precision_at_100
value: 0.01659
- type: precision_at_1000
value: 0.0023
- type: precision_at_3
value: 0.18545
- type: precision_at_5
value: 0.14241
- type: recall_at_1
value: 0.13189
- type: recall_at_10
value: 0.35355
- type: recall_at_100
value: 0.59255
- type: recall_at_1000
value: 0.78637
- type: recall_at_3
value: 0.23255
- type: recall_at_5
value: 0.28446
- dataset:
type: mteb/cqadupstack-android
name: MTEB CQADupstackAndroidRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.35797
- type: map_at_10
value: 0.47793
- type: map_at_100
value: 0.49422
- type: map_at_1000
value: 0.49546
- type: map_at_3
value: 0.44137
- type: map_at_5
value: 0.46063
- type: mrr_at_1
value: 0.44206
- type: mrr_at_10
value: 0.53808
- type: mrr_at_100
value: 0.5454
- type: mrr_at_1000
value: 0.54578
- type: mrr_at_3
value: 0.51431
- type: mrr_at_5
value: 0.5284
- type: ndcg_at_1
value: 0.44206
- type: ndcg_at_10
value: 0.54106
- type: ndcg_at_100
value: 0.59335
- type: ndcg_at_1000
value: 0.61015
- type: ndcg_at_3
value: 0.49365
- type: ndcg_at_5
value: 0.51429
- type: precision_at_1
value: 0.44206
- type: precision_at_10
value: 0.10443
- type: precision_at_100
value: 0.01631
- type: precision_at_1000
value: 0.00214
- type: precision_at_3
value: 0.23653
- type: precision_at_5
value: 0.1691
- type: recall_at_1
value: 0.35797
- type: recall_at_10
value: 0.65182
- type: recall_at_100
value: 0.86654
- type: recall_at_1000
value: 0.97131
- type: recall_at_3
value: 0.51224
- type: recall_at_5
value: 0.57219
- dataset:
type: mteb/cqadupstack-english
name: MTEB CQADupstackEnglishRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.32748
- type: map_at_10
value: 0.44138
- type: map_at_100
value: 0.45565
- type: map_at_1000
value: 0.45698
- type: map_at_3
value: 0.40916
- type: map_at_5
value: 0.42621
- type: mrr_at_1
value: 0.41274
- type: mrr_at_10
value: 0.5046
- type: mrr_at_100
value: 0.5107
- type: mrr_at_1000
value: 0.51109
- type: mrr_at_3
value: 0.48238
- type: mrr_at_5
value: 0.49563
- type: ndcg_at_1
value: 0.41274
- type: ndcg_at_10
value: 0.50251
- type: ndcg_at_100
value: 0.54725
- type: ndcg_at_1000
value: 0.56635
- type: ndcg_at_3
value: 0.46023
- type: ndcg_at_5
value: 0.47883
- type: precision_at_1
value: 0.41274
- type: precision_at_10
value: 0.09828
- type: precision_at_100
value: 0.01573
- type: precision_at_1000
value: 0.00202
- type: precision_at_3
value: 0.22718
- type: precision_at_5
value: 0.16064
- type: recall_at_1
value: 0.32748
- type: recall_at_10
value: 0.60322
- type: recall_at_100
value: 0.79669
- type: recall_at_1000
value: 0.9173
- type: recall_at_3
value: 0.47523
- type: recall_at_5
value: 0.52957
- dataset:
type: mteb/cqadupstack-gaming
name: MTEB CQADupstackGamingRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.41126
- type: map_at_10
value: 0.53661
- type: map_at_100
value: 0.54588
- type: map_at_1000
value: 0.54638
- type: map_at_3
value: 0.50389
- type: map_at_5
value: 0.52286
- type: mrr_at_1
value: 0.47147
- type: mrr_at_10
value: 0.5685
- type: mrr_at_100
value: 0.57458
- type: mrr_at_1000
value: 0.57487
- type: mrr_at_3
value: 0.54431
- type: mrr_at_5
value: 0.55957
- type: ndcg_at_1
value: 0.47147
- type: ndcg_at_10
value: 0.59318
- type: ndcg_at_100
value: 0.62972
- type: ndcg_at_1000
value: 0.64033
- type: ndcg_at_3
value: 0.53969
- type: ndcg_at_5
value: 0.56743
- type: precision_at_1
value: 0.47147
- type: precision_at_10
value: 0.09549
- type: precision_at_100
value: 0.01224
- type: precision_at_1000
value: 0.00135
- type: precision_at_3
value: 0.24159
- type: precision_at_5
value: 0.16577
- type: recall_at_1
value: 0.41126
- type: recall_at_10
value: 0.72691
- type: recall_at_100
value: 0.88692
- type: recall_at_1000
value: 0.96232
- type: recall_at_3
value: 0.58374
- type: recall_at_5
value: 0.65226
- dataset:
type: mteb/cqadupstack-gis
name: MTEB CQADupstackGisRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.28464
- type: map_at_10
value: 0.3828
- type: map_at_100
value: 0.39277
- type: map_at_1000
value: 0.39355
- type: map_at_3
value: 0.35704
- type: map_at_5
value: 0.37116
- type: mrr_at_1
value: 0.30734
- type: mrr_at_10
value: 0.40422
- type: mrr_at_100
value: 0.41297
- type: mrr_at_1000
value: 0.41355
- type: mrr_at_3
value: 0.38136
- type: mrr_at_5
value: 0.39362
- type: ndcg_at_1
value: 0.30734
- type: ndcg_at_10
value: 0.43564
- type: ndcg_at_100
value: 0.48419
- type: ndcg_at_1000
value: 0.50404
- type: ndcg_at_3
value: 0.38672
- type: ndcg_at_5
value: 0.40954
- type: precision_at_1
value: 0.30734
- type: precision_at_10
value: 0.06633
- type: precision_at_100
value: 0.00956
- type: precision_at_1000
value: 0.00116
- type: precision_at_3
value: 0.16497
- type: precision_at_5
value: 0.11254
- type: recall_at_1
value: 0.28464
- type: recall_at_10
value: 0.57621
- type: recall_at_100
value: 0.7966
- type: recall_at_1000
value: 0.94633
- type: recall_at_3
value: 0.44588
- type: recall_at_5
value: 0.50031
- dataset:
type: mteb/cqadupstack-mathematica
name: MTEB CQADupstackMathematicaRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.18119
- type: map_at_10
value: 0.27055
- type: map_at_100
value: 0.28461
- type: map_at_1000
value: 0.28577
- type: map_at_3
value: 0.24341
- type: map_at_5
value: 0.25861
- type: mrr_at_1
value: 0.22886
- type: mrr_at_10
value: 0.32234
- type: mrr_at_100
value: 0.3328
- type: mrr_at_1000
value: 0.3334
- type: mrr_at_3
value: 0.29664
- type: mrr_at_5
value: 0.31107
- type: ndcg_at_1
value: 0.22886
- type: ndcg_at_10
value: 0.32749
- type: ndcg_at_100
value: 0.39095
- type: ndcg_at_1000
value: 0.41656
- type: ndcg_at_3
value: 0.27864
- type: ndcg_at_5
value: 0.30177
- type: precision_at_1
value: 0.22886
- type: precision_at_10
value: 0.06169
- type: precision_at_100
value: 0.0107
- type: precision_at_1000
value: 0.00143
- type: precision_at_3
value: 0.13682
- type: precision_at_5
value: 0.0995
- type: recall_at_1
value: 0.18119
- type: recall_at_10
value: 0.44983
- type: recall_at_100
value: 0.72396
- type: recall_at_1000
value: 0.90223
- type: recall_at_3
value: 0.31633
- type: recall_at_5
value: 0.37532
- dataset:
type: mteb/cqadupstack-physics
name: MTEB CQADupstackPhysicsRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.30517
- type: map_at_10
value: 0.42031
- type: map_at_100
value: 0.43415
- type: map_at_1000
value: 0.43525
- type: map_at_3
value: 0.38443
- type: map_at_5
value: 0.40685
- type: mrr_at_1
value: 0.38114
- type: mrr_at_10
value: 0.47783
- type: mrr_at_100
value: 0.48647
- type: mrr_at_1000
value: 0.48688
- type: mrr_at_3
value: 0.45172
- type: mrr_at_5
value: 0.46817
- type: ndcg_at_1
value: 0.38114
- type: ndcg_at_10
value: 0.4834
- type: ndcg_at_100
value: 0.53861
- type: ndcg_at_1000
value: 0.55701
- type: ndcg_at_3
value: 0.42986
- type: ndcg_at_5
value: 0.45893
- type: precision_at_1
value: 0.38114
- type: precision_at_10
value: 0.08893
- type: precision_at_100
value: 0.01375
- type: precision_at_1000
value: 0.00172
- type: precision_at_3
value: 0.20821
- type: precision_at_5
value: 0.15034
- type: recall_at_1
value: 0.30517
- type: recall_at_10
value: 0.61332
- type: recall_at_100
value: 0.84051
- type: recall_at_1000
value: 0.95826
- type: recall_at_3
value: 0.46015
- type: recall_at_5
value: 0.53801
- dataset:
type: mteb/cqadupstack-programmers
name: MTEB CQADupstackProgrammersRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.27396
- type: map_at_10
value: 0.38043
- type: map_at_100
value: 0.39341
- type: map_at_1000
value: 0.39454
- type: map_at_3
value: 0.34783
- type: map_at_5
value: 0.3663
- type: mrr_at_1
value: 0.34247
- type: mrr_at_10
value: 0.43681
- type: mrr_at_100
value: 0.4451
- type: mrr_at_1000
value: 0.44569
- type: mrr_at_3
value: 0.41172
- type: mrr_at_5
value: 0.42702
- type: ndcg_at_1
value: 0.34247
- type: ndcg_at_10
value: 0.44065
- type: ndcg_at_100
value: 0.49434
- type: ndcg_at_1000
value: 0.51682
- type: ndcg_at_3
value: 0.38976
- type: ndcg_at_5
value: 0.41332
- type: precision_at_1
value: 0.34247
- type: precision_at_10
value: 0.08059
- type: precision_at_100
value: 0.01258
- type: precision_at_1000
value: 0.00162
- type: precision_at_3
value: 0.1876
- type: precision_at_5
value: 0.13333
- type: recall_at_1
value: 0.27396
- type: recall_at_10
value: 0.56481
- type: recall_at_100
value: 0.79012
- type: recall_at_1000
value: 0.94182
- type: recall_at_3
value: 0.41785
- type: recall_at_5
value: 0.48303
- dataset:
type: mteb/cqadupstack-stats
name: MTEB CQADupstackStatsRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.25728
- type: map_at_10
value: 0.33903
- type: map_at_100
value: 0.34853
- type: map_at_1000
value: 0.34944
- type: map_at_3
value: 0.31268
- type: map_at_5
value: 0.32596
- type: mrr_at_1
value: 0.29141
- type: mrr_at_10
value: 0.36739
- type: mrr_at_100
value: 0.37545
- type: mrr_at_1000
value: 0.37608
- type: mrr_at_3
value: 0.34407
- type: mrr_at_5
value: 0.3568
- type: ndcg_at_1
value: 0.29141
- type: ndcg_at_10
value: 0.38596
- type: ndcg_at_100
value: 0.43375
- type: ndcg_at_1000
value: 0.45562
- type: ndcg_at_3
value: 0.33861
- type: ndcg_at_5
value: 0.35887
- type: precision_at_1
value: 0.29141
- type: precision_at_10
value: 0.06334
- type: precision_at_100
value: 0.00952
- type: precision_at_1000
value: 0.00121
- type: precision_at_3
value: 0.14826
- type: precision_at_5
value: 0.10429
- type: recall_at_1
value: 0.25728
- type: recall_at_10
value: 0.50121
- type: recall_at_100
value: 0.72382
- type: recall_at_1000
value: 0.88306
- type: recall_at_3
value: 0.36638
- type: recall_at_5
value: 0.41689
- dataset:
type: mteb/cqadupstack-tex
name: MTEB CQADupstackTexRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.19911
- type: map_at_10
value: 0.2856
- type: map_at_100
value: 0.29785
- type: map_at_1000
value: 0.29911
- type: map_at_3
value: 0.25875
- type: map_at_5
value: 0.2741
- type: mrr_at_1
value: 0.24054
- type: mrr_at_10
value: 0.32483
- type: mrr_at_100
value: 0.33464
- type: mrr_at_1000
value: 0.33534
- type: mrr_at_3
value: 0.30162
- type: mrr_at_5
value: 0.31506
- type: ndcg_at_1
value: 0.24054
- type: ndcg_at_10
value: 0.33723
- type: ndcg_at_100
value: 0.39362
- type: ndcg_at_1000
value: 0.42065
- type: ndcg_at_3
value: 0.29116
- type: ndcg_at_5
value: 0.31299
- type: precision_at_1
value: 0.24054
- type: precision_at_10
value: 0.06194
- type: precision_at_100
value: 0.01058
- type: precision_at_1000
value: 0.00148
- type: precision_at_3
value: 0.13914
- type: precision_at_5
value: 0.10076
- type: recall_at_1
value: 0.19911
- type: recall_at_10
value: 0.45183
- type: recall_at_100
value: 0.7025
- type: recall_at_1000
value: 0.89222
- type: recall_at_3
value: 0.32195
- type: recall_at_5
value: 0.37852
- dataset:
type: mteb/cqadupstack-unix
name: MTEB CQADupstackUnixRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.29819
- type: map_at_10
value: 0.40073
- type: map_at_100
value: 0.41289
- type: map_at_1000
value: 0.41375
- type: map_at_3
value: 0.36572
- type: map_at_5
value: 0.38386
- type: mrr_at_1
value: 0.35168
- type: mrr_at_10
value: 0.44381
- type: mrr_at_100
value: 0.45191
- type: mrr_at_1000
value: 0.45234
- type: mrr_at_3
value: 0.41402
- type: mrr_at_5
value: 0.43039
- type: ndcg_at_1
value: 0.35168
- type: ndcg_at_10
value: 0.46071
- type: ndcg_at_100
value: 0.51351
- type: ndcg_at_1000
value: 0.5317
- type: ndcg_at_3
value: 0.39972
- type: ndcg_at_5
value: 0.42586
- type: precision_at_1
value: 0.35168
- type: precision_at_10
value: 0.07985
- type: precision_at_100
value: 0.01185
- type: precision_at_1000
value: 0.00144
- type: precision_at_3
value: 0.18221
- type: precision_at_5
value: 0.12892
- type: recall_at_1
value: 0.29819
- type: recall_at_10
value: 0.60075
- type: recall_at_100
value: 0.82771
- type: recall_at_1000
value: 0.95219
- type: recall_at_3
value: 0.43245
- type: recall_at_5
value: 0.49931
- dataset:
type: mteb/cqadupstack-webmasters
name: MTEB CQADupstackWebmastersRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.28409
- type: map_at_10
value: 0.37621
- type: map_at_100
value: 0.39233
- type: map_at_1000
value: 0.39471
- type: map_at_3
value: 0.34337
- type: map_at_5
value: 0.35985
- type: mrr_at_1
value: 0.33794
- type: mrr_at_10
value: 0.42349
- type: mrr_at_100
value: 0.43196
- type: mrr_at_1000
value: 0.43237
- type: mrr_at_3
value: 0.39526
- type: mrr_at_5
value: 0.41087
- type: ndcg_at_1
value: 0.33794
- type: ndcg_at_10
value: 0.43832
- type: ndcg_at_100
value: 0.49514
- type: ndcg_at_1000
value: 0.51742
- type: ndcg_at_3
value: 0.38442
- type: ndcg_at_5
value: 0.40737
- type: precision_at_1
value: 0.33794
- type: precision_at_10
value: 0.08597
- type: precision_at_100
value: 0.01652
- type: precision_at_1000
value: 0.00251
- type: precision_at_3
value: 0.17787
- type: precision_at_5
value: 0.13241
- type: recall_at_1
value: 0.28409
- type: recall_at_10
value: 0.55388
- type: recall_at_100
value: 0.81517
- type: recall_at_1000
value: 0.95038
- type: recall_at_3
value: 0.40133
- type: recall_at_5
value: 0.45913
- dataset:
type: mteb/cqadupstack-wordpress
name: MTEB CQADupstackWordpressRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.24067
- type: map_at_10
value: 0.32184
- type: map_at_100
value: 0.33357
- type: map_at_1000
value: 0.33458
- type: map_at_3
value: 0.29492
- type: map_at_5
value: 0.3111
- type: mrr_at_1
value: 0.26248
- type: mrr_at_10
value: 0.34149
- type: mrr_at_100
value: 0.35189
- type: mrr_at_1000
value: 0.35251
- type: mrr_at_3
value: 0.31639
- type: mrr_at_5
value: 0.33182
- type: ndcg_at_1
value: 0.26248
- type: ndcg_at_10
value: 0.36889
- type: ndcg_at_100
value: 0.42426
- type: ndcg_at_1000
value: 0.44745
- type: ndcg_at_3
value: 0.31799
- type: ndcg_at_5
value: 0.34563
- type: precision_at_1
value: 0.26248
- type: precision_at_10
value: 0.05712
- type: precision_at_100
value: 0.00915
- type: precision_at_1000
value: 0.00123
- type: precision_at_3
value: 0.13309
- type: precision_at_5
value: 0.09649
- type: recall_at_1
value: 0.24067
- type: recall_at_10
value: 0.49344
- type: recall_at_100
value: 0.7412
- type: recall_at_1000
value: 0.91276
- type: recall_at_3
value: 0.36272
- type: recall_at_5
value: 0.4277
- dataset:
type: mteb/dbpedia
name: MTEB DBPedia
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.08651
- type: map_at_10
value: 0.17628
- type: map_at_100
value: 0.23354
- type: map_at_1000
value: 0.24827
- type: map_at_3
value: 0.1351
- type: map_at_5
value: 0.15468
- type: mrr_at_1
value: 0.645
- type: mrr_at_10
value: 0.71989
- type: mrr_at_100
value: 0.72332
- type: mrr_at_1000
value: 0.72346
- type: mrr_at_3
value: 0.7025
- type: mrr_at_5
value: 0.71275
- type: ndcg_at_1
value: 0.51375
- type: ndcg_at_10
value: 0.3596
- type: ndcg_at_100
value: 0.39878
- type: ndcg_at_1000
value: 0.47931
- type: ndcg_at_3
value: 0.41275
- type: ndcg_at_5
value: 0.38297
- type: precision_at_1
value: 0.645
- type: precision_at_10
value: 0.2745
- type: precision_at_100
value: 0.08405
- type: precision_at_1000
value: 0.01923
- type: precision_at_3
value: 0.44417
- type: precision_at_5
value: 0.366
- type: recall_at_1
value: 0.08651
- type: recall_at_10
value: 0.22416
- type: recall_at_100
value: 0.46381
- type: recall_at_1000
value: 0.71557
- type: recall_at_3
value: 0.14847
- type: recall_at_5
value: 0.1804
- dataset:
type: mteb/fever
name: MTEB FEVER
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.73211
- type: map_at_10
value: 0.81463
- type: map_at_100
value: 0.81622
- type: map_at_1000
value: 0.81634
- type: map_at_3
value: 0.805
- type: map_at_5
value: 0.81134
- type: mrr_at_1
value: 0.79088
- type: mrr_at_10
value: 0.86943
- type: mrr_at_100
value: 0.87017
- type: mrr_at_1000
value: 0.87018
- type: mrr_at_3
value: 0.86154
- type: mrr_at_5
value: 0.867
- type: ndcg_at_1
value: 0.79088
- type: ndcg_at_10
value: 0.85528
- type: ndcg_at_100
value: 0.86134
- type: ndcg_at_1000
value: 0.86367
- type: ndcg_at_3
value: 0.83943
- type: ndcg_at_5
value: 0.84878
- type: precision_at_1
value: 0.79088
- type: precision_at_10
value: 0.10132
- type: precision_at_100
value: 0.01055
- type: precision_at_1000
value: 0.00109
- type: precision_at_3
value: 0.31963
- type: precision_at_5
value: 0.19769
- type: recall_at_1
value: 0.73211
- type: recall_at_10
value: 0.92797
- type: recall_at_100
value: 0.95263
- type: recall_at_1000
value: 0.96738
- type: recall_at_3
value: 0.88328
- type: recall_at_5
value: 0.90821
- dataset:
type: mteb/fiqa
name: MTEB FiQA2018
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.18311
- type: map_at_10
value: 0.29201
- type: map_at_100
value: 0.3093
- type: map_at_1000
value: 0.31116
- type: map_at_3
value: 0.24778
- type: map_at_5
value: 0.27453
- type: mrr_at_1
value: 0.35494
- type: mrr_at_10
value: 0.44489
- type: mrr_at_100
value: 0.4532
- type: mrr_at_1000
value: 0.45369
- type: mrr_at_3
value: 0.41667
- type: mrr_at_5
value: 0.43418
- type: ndcg_at_1
value: 0.35494
- type: ndcg_at_10
value: 0.36868
- type: ndcg_at_100
value: 0.43463
- type: ndcg_at_1000
value: 0.46766
- type: ndcg_at_3
value: 0.32305
- type: ndcg_at_5
value: 0.34332
- type: precision_at_1
value: 0.35494
- type: precision_at_10
value: 0.10324
- type: precision_at_100
value: 0.01707
- type: precision_at_1000
value: 0.00229
- type: precision_at_3
value: 0.21142
- type: precision_at_5
value: 0.16327
- type: recall_at_1
value: 0.18311
- type: recall_at_10
value: 0.43881
- type: recall_at_100
value: 0.68593
- type: recall_at_1000
value: 0.8855
- type: recall_at_3
value: 0.28824
- type: recall_at_5
value: 0.36178
- dataset:
type: mteb/hotpotqa
name: MTEB HotpotQA
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.36766
- type: map_at_10
value: 0.53639
- type: map_at_100
value: 0.54532
- type: map_at_1000
value: 0.54608
- type: map_at_3
value: 0.50427
- type: map_at_5
value: 0.5245
- type: mrr_at_1
value: 0.73531
- type: mrr_at_10
value: 0.80104
- type: mrr_at_100
value: 0.80341
- type: mrr_at_1000
value: 0.80351
- type: mrr_at_3
value: 0.78949
- type: mrr_at_5
value: 0.79729
- type: ndcg_at_1
value: 0.73531
- type: ndcg_at_10
value: 0.62918
- type: ndcg_at_100
value: 0.66056
- type: ndcg_at_1000
value: 0.67554
- type: ndcg_at_3
value: 0.58247
- type: ndcg_at_5
value: 0.60905
- type: precision_at_1
value: 0.73531
- type: precision_at_10
value: 0.1302
- type: precision_at_100
value: 0.01546
- type: precision_at_1000
value: 0.00175
- type: precision_at_3
value: 0.36556
- type: precision_at_5
value: 0.24032
- type: recall_at_1
value: 0.36766
- type: recall_at_10
value: 0.65098
- type: recall_at_100
value: 0.77306
- type: recall_at_1000
value: 0.87252
- type: recall_at_3
value: 0.54835
- type: recall_at_5
value: 0.60081
- dataset:
type: mteb/msmarco
name: MTEB MSMARCO
config: default
split: dev
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.14654
- type: map_at_10
value: 0.2472
- type: map_at_100
value: 0.25994
- type: map_at_1000
value: 0.26067
- type: map_at_3
value: 0.21234
- type: map_at_5
value: 0.2319
- type: mrr_at_1
value: 0.15086
- type: mrr_at_10
value: 0.25184
- type: mrr_at_100
value: 0.26422
- type: mrr_at_1000
value: 0.26489
- type: mrr_at_3
value: 0.21731
- type: mrr_at_5
value: 0.23674
- type: ndcg_at_1
value: 0.15086
- type: ndcg_at_10
value: 0.30711
- type: ndcg_at_100
value: 0.37221
- type: ndcg_at_1000
value: 0.39133
- type: ndcg_at_3
value: 0.23567
- type: ndcg_at_5
value: 0.27066
- type: precision_at_1
value: 0.15086
- type: precision_at_10
value: 0.05132
- type: precision_at_100
value: 0.00845
- type: precision_at_1000
value: 0.00101
- type: precision_at_3
value: 0.10277
- type: precision_at_5
value: 0.07923
- type: recall_at_1
value: 0.14654
- type: recall_at_10
value: 0.49341
- type: recall_at_100
value: 0.80224
- type: recall_at_1000
value: 0.95037
- type: recall_at_3
value: 0.29862
- type: recall_at_5
value: 0.38274
- dataset:
type: mteb/nfcorpus
name: MTEB NFCorpus
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.05452
- type: map_at_10
value: 0.12758
- type: map_at_100
value: 0.1593
- type: map_at_1000
value: 0.17422
- type: map_at_3
value: 0.0945
- type: map_at_5
value: 0.1092
- type: mrr_at_1
value: 0.43963
- type: mrr_at_10
value: 0.53237
- type: mrr_at_100
value: 0.53777
- type: mrr_at_1000
value: 0.53822
- type: mrr_at_3
value: 0.51445
- type: mrr_at_5
value: 0.52466
- type: ndcg_at_1
value: 0.41486
- type: ndcg_at_10
value: 0.33737
- type: ndcg_at_100
value: 0.30886
- type: ndcg_at_1000
value: 0.40018
- type: ndcg_at_3
value: 0.39324
- type: ndcg_at_5
value: 0.36949
- type: precision_at_1
value: 0.43344
- type: precision_at_10
value: 0.24799
- type: precision_at_100
value: 0.07895
- type: precision_at_1000
value: 0.02091
- type: precision_at_3
value: 0.37152
- type: precision_at_5
value: 0.31703
- type: recall_at_1
value: 0.05452
- type: recall_at_10
value: 0.1712
- type: recall_at_100
value: 0.30719
- type: recall_at_1000
value: 0.62766
- type: recall_at_3
value: 0.10733
- type: recall_at_5
value: 0.13553
- dataset:
type: mteb/nq
name: MTEB NQ
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.29022
- type: map_at_10
value: 0.4373
- type: map_at_100
value: 0.44849
- type: map_at_1000
value: 0.44877
- type: map_at_3
value: 0.39045
- type: map_at_5
value: 0.4186
- type: mrr_at_1
value: 0.32793
- type: mrr_at_10
value: 0.46243
- type: mrr_at_100
value: 0.47083
- type: mrr_at_1000
value: 0.47101
- type: mrr_at_3
value: 0.42261
- type: mrr_at_5
value: 0.44775
- type: ndcg_at_1
value: 0.32793
- type: ndcg_at_10
value: 0.51631
- type: ndcg_at_100
value: 0.56287
- type: ndcg_at_1000
value: 0.56949
- type: ndcg_at_3
value: 0.42782
- type: ndcg_at_5
value: 0.47554
- type: precision_at_1
value: 0.32793
- type: precision_at_10
value: 0.08737
- type: precision_at_100
value: 0.01134
- type: precision_at_1000
value: 0.0012
- type: precision_at_3
value: 0.19583
- type: precision_at_5
value: 0.14484
- type: recall_at_1
value: 0.29022
- type: recall_at_10
value: 0.73325
- type: recall_at_100
value: 0.93455
- type: recall_at_1000
value: 0.98414
- type: recall_at_3
value: 0.50406
- type: recall_at_5
value: 0.6145
- dataset:
type: mteb/quora
name: MTEB QuoraRetrieval
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.68941
- type: map_at_10
value: 0.82641
- type: map_at_100
value: 0.83317
- type: map_at_1000
value: 0.83337
- type: map_at_3
value: 0.79604
- type: map_at_5
value: 0.81525
- type: mrr_at_1
value: 0.7935
- type: mrr_at_10
value: 0.85969
- type: mrr_at_100
value: 0.86094
- type: mrr_at_1000
value: 0.86095
- type: mrr_at_3
value: 0.84852
- type: mrr_at_5
value: 0.85627
- type: ndcg_at_1
value: 0.7936
- type: ndcg_at_10
value: 0.86687
- type: ndcg_at_100
value: 0.88094
- type: ndcg_at_1000
value: 0.88243
- type: ndcg_at_3
value: 0.83538
- type: ndcg_at_5
value: 0.85308
- type: precision_at_1
value: 0.7936
- type: precision_at_10
value: 0.13145
- type: precision_at_100
value: 0.01517
- type: precision_at_1000
value: 0.00156
- type: precision_at_3
value: 0.36353
- type: precision_at_5
value: 0.24044
- type: recall_at_1
value: 0.68941
- type: recall_at_10
value: 0.94407
- type: recall_at_100
value: 0.99226
- type: recall_at_1000
value: 0.99958
- type: recall_at_3
value: 0.85502
- type: recall_at_5
value: 0.90372
- dataset:
type: mteb/scidocs
name: MTEB SCIDOCS
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.04988
- type: map_at_10
value: 0.13553
- type: map_at_100
value: 0.16136
- type: map_at_1000
value: 0.16512
- type: map_at_3
value: 0.09439
- type: map_at_5
value: 0.1146
- type: mrr_at_1
value: 0.246
- type: mrr_at_10
value: 0.36792
- type: mrr_at_100
value: 0.37973
- type: mrr_at_1000
value: 0.38011
- type: mrr_at_3
value: 0.33117
- type: mrr_at_5
value: 0.35172
- type: ndcg_at_1
value: 0.246
- type: ndcg_at_10
value: 0.22542
- type: ndcg_at_100
value: 0.32326
- type: ndcg_at_1000
value: 0.3828
- type: ndcg_at_3
value: 0.20896
- type: ndcg_at_5
value: 0.18497
- type: precision_at_1
value: 0.246
- type: precision_at_10
value: 0.1194
- type: precision_at_100
value: 0.02616
- type: precision_at_1000
value: 0.00404
- type: precision_at_3
value: 0.198
- type: precision_at_5
value: 0.1654
- type: recall_at_1
value: 0.04988
- type: recall_at_10
value: 0.24212
- type: recall_at_100
value: 0.53105
- type: recall_at_1000
value: 0.82022
- type: recall_at_3
value: 0.12047
- type: recall_at_5
value: 0.16777
- dataset:
type: mteb/scifact
name: MTEB SciFact
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.56578
- type: map_at_10
value: 0.66725
- type: map_at_100
value: 0.67379
- type: map_at_1000
value: 0.674
- type: map_at_3
value: 0.63416
- type: map_at_5
value: 0.6577
- type: mrr_at_1
value: 0.59333
- type: mrr_at_10
value: 0.67533
- type: mrr_at_100
value: 0.68062
- type: mrr_at_1000
value: 0.68082
- type: mrr_at_3
value: 0.64944
- type: mrr_at_5
value: 0.66928
- type: ndcg_at_1
value: 0.59333
- type: ndcg_at_10
value: 0.7127
- type: ndcg_at_100
value: 0.73889
- type: ndcg_at_1000
value: 0.7441
- type: ndcg_at_3
value: 0.65793
- type: ndcg_at_5
value: 0.69429
- type: precision_at_1
value: 0.59333
- type: precision_at_10
value: 0.096
- type: precision_at_100
value: 0.01087
- type: precision_at_1000
value: 0.00113
- type: precision_at_3
value: 0.25556
- type: precision_at_5
value: 0.17667
- type: recall_at_1
value: 0.56578
- type: recall_at_10
value: 0.842
- type: recall_at_100
value: 0.95667
- type: recall_at_1000
value: 0.99667
- type: recall_at_3
value: 0.70072
- type: recall_at_5
value: 0.79011
- dataset:
type: mteb/touche2020
name: MTEB Touche2020
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.01976
- type: map_at_10
value: 0.09688
- type: map_at_100
value: 0.15117
- type: map_at_1000
value: 0.16769
- type: map_at_3
value: 0.04589
- type: map_at_5
value: 0.06556
- type: mrr_at_1
value: 0.26531
- type: mrr_at_10
value: 0.43863
- type: mrr_at_100
value: 0.44767
- type: mrr_at_1000
value: 0.44767
- type: mrr_at_3
value: 0.39116
- type: mrr_at_5
value: 0.41156
- type: ndcg_at_1
value: 0.23469
- type: ndcg_at_10
value: 0.24029
- type: ndcg_at_100
value: 0.34425
- type: ndcg_at_1000
value: 0.46907
- type: ndcg_at_3
value: 0.25522
- type: ndcg_at_5
value: 0.24333
- type: precision_at_1
value: 0.26531
- type: precision_at_10
value: 0.22449
- type: precision_at_100
value: 0.07122
- type: precision_at_1000
value: 0.01527
- type: precision_at_3
value: 0.27891
- type: precision_at_5
value: 0.25714
- type: recall_at_1
value: 0.01976
- type: recall_at_10
value: 0.16633
- type: recall_at_100
value: 0.4561
- type: recall_at_1000
value: 0.82481
- type: recall_at_3
value: 0.06101
- type: recall_at_5
value: 0.0968
- dataset:
type: mteb/trec-covid
name: MTEB TRECCOVID
config: default
split: test
task:
type: Retrieval
metrics:
- type: map_at_1
value: 0.00211
- type: map_at_10
value: 0.01526
- type: map_at_100
value: 0.08863
- type: map_at_1000
value: 0.23162
- type: map_at_3
value: 0.00555
- type: map_at_5
value: 0.00873
- type: mrr_at_1
value: 0.76
- type: mrr_at_10
value: 0.8485
- type: mrr_at_100
value: 0.8485
- type: mrr_at_1000
value: 0.8485
- type: mrr_at_3
value: 0.84
- type: mrr_at_5
value: 0.844
- type: ndcg_at_1
value: 0.7
- type: ndcg_at_10
value: 0.63098
- type: ndcg_at_100
value: 0.49847
- type: ndcg_at_1000
value: 0.48395
- type: ndcg_at_3
value: 0.68704
- type: ndcg_at_5
value: 0.67533
- type: precision_at_1
value: 0.76
- type: precision_at_10
value: 0.66
- type: precision_at_100
value: 0.5134
- type: precision_at_1000
value: 0.2168
- type: precision_at_3
value: 0.72667
- type: precision_at_5
value: 0.716
- type: recall_at_1
value: 0.00211
- type: recall_at_10
value: 0.01748
- type: recall_at_100
value: 0.12448
- type: recall_at_1000
value: 0.46795
- type: recall_at_3
value: 0.00593
- type: recall_at_5
value: 0.00962
pipeline_tag: sentence-similarity
---
# Granite-Embedding-30m-English
**Model Summary:**
Granite-Embedding-30m-English is a 30M parameter model from the Granite Embeddings suite that can be used to generate high quality text embeddings. This model produces embedding vectors of size 384 and is trained using a combination of open source relevance-pair datasets with permissive, enterprise-friendly license, and IBM collected and generated datasets. This model is developed using retrieval oriented pretraining, contrastive finetuning, knowledge distillation and model merging for improved performance.
- **Developers:** Granite Embedding Team, IBM
- **GitHub Repository:**
- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)
- **Paper:**
- **Release Date**: December 18th, 2024
- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
**Supported Languages:**
English.
**Intended use:**
The model is designed to produce fixed length vector representations for a given text, which can be used for text similarity, retrieval, and search applications.
**Usage with Sentence Transformers:**
The model is compatible with SentenceTransformer library and is very easy to use:
First, install the sentence transformers library
```shell
pip install sentence_transformers
```
The model can then be used to encode pairs of text and find the similarity between their representations
```python
from sentence_transformers import SentenceTransformer, util
model_path = "ibm-granite/granite-embedding-30m-english"
# Load the Sentence Transformer model
model = SentenceTransformer(model_path)
input_queries = [
' Who made the song My achy breaky heart? ',
'summit define'
]
input_passages = [
"Achy Breaky Heart is a country song written by Don Von Tress. Originally titled Don't Tell My Heart and performed by The Marcy Brothers in 1991. ",
"Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments."
]
# encode queries and passages
query_embeddings = model.encode(input_queries)
passage_embeddings = model.encode(input_passages)
# calculate cosine similarity
print(util.cos_sim(query_embeddings, passage_embeddings))
```
**Usage with Huggingface Transformers:**
This is a simple example of how to use the Granite-Embedding-30m-English model with the Transformers library and PyTorch.
First, install the required libraries
```shell
pip install transformers torch
```
The model can then be used to encode pairs of text
```python
import torch
from transformers import AutoModel, AutoTokenizer
model_path = "ibm-granite/granite-embedding-30m-english"
# Load the model and tokenizer
model = AutoModel.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()
input_queries = [
' Who made the song My achy breaky heart? ',
'summit define'
]
# tokenize inputs
tokenized_queries = tokenizer(input_queries, padding=True, truncation=True, return_tensors='pt')
# encode queries
with torch.no_grad():
# Queries
model_output = model(**tokenized_queries)
# Perform pooling. granite-embedding-30m-english uses CLS Pooling
query_embeddings = model_output[0][:, 0]
# normalize the embeddings
query_embeddings = torch.nn.functional.normalize(query_embeddings, dim=1)
```
**Evaluation:**
Granite-Embedding-30M-English is twice as fast as other models with similar embedding dimensions, while maintaining competitive performance. The performance of the Granite-Embedding-30M-English model on MTEB Retrieval (i.e., BEIR) and code retrieval (CoIR) benchmarks is reported below. MTEB Retrieval(14) indicates the average BEIR performance excluding MS-MARCO task as, unlike all other models, Granite-Embedding-30M-English did not train on MS-MARCO due to the dataset's non-commercial license. The average time required to encode and retrieve per query is also reported.
| Model | Paramters (M)| Embedding Dimension | MTEB Retrieval (15) | MTEB Retrieval (14) | CoIR (10) | Retrieval Time (seconds/query)|
|---------------------------------|-------------:|--------------------:|--------------------:|---------------------:|----------:|------------------------------:|
|granite-embedding-30m-english |30 |384 |49.1 |50.4 |47.0 | 0.16 |
**Model Architecture:**
Granite-Embedding-30m-English is based on an encoder-only RoBERTa like transformer architecture, trained internally at IBM Research.
| Model | granite-embedding-30m-english | granite-embedding-125m-english | granite-embedding-107m-multilingual | granite-embedding-278m-multilingual |
| :--------- | :-------:| :--------: | :-----:| :-----:|
| Embedding size | **384** | 768 | 384 | 768 |
| Number of layers | **6** | 12 | 6 | 12 |
| Number of attention heads | **12** | 12 | 12 | 12 |
| Intermediate size | **1536** | 3072 | 1536 | 3072 |
| Activation Function | **GeLU** | GeLU | GeLU | GeLU |
| Vocabulary Size | **50265**| 50265 | 250002 | 250002 |
| Max. Sequence Length | **512** | 512 | 512 | 512 |
| # Parameters | **30M** | 125M | 107M | 278M |
**Training Data:**
Overall, the training data consists of four key sources: (1) unsupervised title-body paired data scraped from the web, (2) publicly available paired with permissive, enterprise-friendly license, (3) IBM-internal paired data targetting specific technical domains, and (4) IBM-generated synthetic data. The data is listed below:
| **Dataset** | **Num. Pairs** |
|----------------------------------------------------|:---------------:|
| SPECTER citation triplets | 684,100 |
| Stack Exchange Duplicate questions (titles) | 304,525 |
| Stack Exchange Duplicate questions (bodies) | 250,519 |
| Stack Exchange Duplicate questions (titles+bodies) | 250,460 |
| Natural Questions (NQ) | 100,231 |
| SQuAD2.0 | 87,599 |
| PAQ (Question, Answer) pairs | 64,371,441 |
| Stack Exchange (Title, Answer) pairs | 4,067,139 |
| Stack Exchange (Title, Body) pairs | 23,978,013 |
| Stack Exchange (Title+Body, Answer) pairs | 187,195 |
| S2ORC Citation pairs (Titles) | 52,603,982 |
| S2ORC (Title, Abstract) | 41,769,185 |
| S2ORC (Citations, abstracts) | 52,603,982 |
| WikiAnswers Duplicate question pairs | 77,427,422 |
| SearchQA | 582,261 |
| HotpotQA | 85,000 |
| Fever | 109,810 |
| Arxiv | 2,358,545 |
| Wikipedia | 20,745,403 |
| PubMed | 20,000,000 |
| Miracl En Pairs | 9,016 |
| DBPedia Title-Body Pairs | 4,635,922 |
| Synthetic: Query-Wikipedia Passage | 1,879,093 |
| Synthetic: Fact Verification | 9,888 |
| IBM Internal Triples | 40,290 |
| IBM Internal Title-Body Pairs | 1,524,586 |
Notably, we do not use the popular MS-MARCO retrieval dataset in our training corpus due to its non-commercial license, while other open-source models train on this dataset due to its high quality.
**Infrastructure:**
We train Granite Embedding Models using IBM's computing cluster, Cognitive Compute Cluster, which is outfitted with NVIDIA A100 80gb GPUs. This cluster provides a scalable and efficient infrastructure for training our models over multiple GPUs.
**Ethical Considerations and Limitations:**
The data used to train the base language model was filtered to remove text containing hate, abuse, and profanity. Granite-Embedding-30m-English is trained only for English texts, and has a context length of 512 tokens (longer texts will be truncated to this size).
<!-- ## Citation
```
@misc{granite-embedding-models,
author = {author 1, author2, ...},
title = {},
journal = {},
volume = {},
year = {2024},
url = {https://arxiv.org/abs/0000.00000},
}
``` -->