Wendy-Fly commited on
Commit
04738a0
·
verified ·
1 Parent(s): 34f76b7

Upload dataset_info.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. dataset_info.json +688 -0
dataset_info.json ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "identity": {
3
+ "file_name": "identity.json"
4
+ },
5
+ "alpaca_en_demo": {
6
+ "file_name": "alpaca_en_demo.json"
7
+ },
8
+ "alpaca_zh_demo": {
9
+ "file_name": "alpaca_zh_demo.json"
10
+ },
11
+ "glaive_toolcall_en_demo": {
12
+ "file_name": "glaive_toolcall_en_demo.json",
13
+ "formatting": "sharegpt",
14
+ "columns": {
15
+ "messages": "conversations",
16
+ "tools": "tools"
17
+ }
18
+ },
19
+ "glaive_toolcall_zh_demo": {
20
+ "file_name": "glaive_toolcall_zh_demo.json",
21
+ "formatting": "sharegpt",
22
+ "columns": {
23
+ "messages": "conversations",
24
+ "tools": "tools"
25
+ }
26
+ },
27
+ "mllm_demo": {
28
+ "file_name": "mllm_demo.json",
29
+ "formatting": "sharegpt",
30
+ "columns": {
31
+ "messages": "messages",
32
+ "images": "images"
33
+ },
34
+ "tags": {
35
+ "role_tag": "role",
36
+ "content_tag": "content",
37
+ "user_tag": "user",
38
+ "assistant_tag": "assistant"
39
+ }
40
+ },
41
+ "mllm_audio_demo": {
42
+ "file_name": "mllm_audio_demo.json",
43
+ "formatting": "sharegpt",
44
+ "columns": {
45
+ "messages": "messages",
46
+ "audios": "audios"
47
+ },
48
+ "tags": {
49
+ "role_tag": "role",
50
+ "content_tag": "content",
51
+ "user_tag": "user",
52
+ "assistant_tag": "assistant"
53
+ }
54
+ },
55
+ "mllm_video_demo": {
56
+ "file_name": "mllm_video_demo.json",
57
+ "formatting": "sharegpt",
58
+ "columns": {
59
+ "messages": "messages",
60
+ "videos": "videos"
61
+ },
62
+ "tags": {
63
+ "role_tag": "role",
64
+ "content_tag": "content",
65
+ "user_tag": "user",
66
+ "assistant_tag": "assistant"
67
+ }
68
+ },
69
+ "alpaca_en": {
70
+ "hf_hub_url": "llamafactory/alpaca_en",
71
+ "ms_hub_url": "llamafactory/alpaca_en",
72
+ "om_hub_url": "HaM/alpaca_en"
73
+ },
74
+ "alpaca_zh": {
75
+ "hf_hub_url": "llamafactory/alpaca_zh",
76
+ "ms_hub_url": "llamafactory/alpaca_zh"
77
+ },
78
+ "alpaca_gpt4_en": {
79
+ "hf_hub_url": "llamafactory/alpaca_gpt4_en",
80
+ "ms_hub_url": "llamafactory/alpaca_gpt4_en"
81
+ },
82
+ "alpaca_gpt4_zh": {
83
+ "hf_hub_url": "llamafactory/alpaca_gpt4_zh",
84
+ "ms_hub_url": "llamafactory/alpaca_gpt4_zh",
85
+ "om_hub_url": "State_Cloud/alpaca-gpt4-data-zh"
86
+ },
87
+ "glaive_toolcall_en": {
88
+ "hf_hub_url": "llamafactory/glaive_toolcall_en",
89
+ "formatting": "sharegpt",
90
+ "columns": {
91
+ "messages": "conversations",
92
+ "tools": "tools"
93
+ }
94
+ },
95
+ "glaive_toolcall_zh": {
96
+ "hf_hub_url": "llamafactory/glaive_toolcall_zh",
97
+ "formatting": "sharegpt",
98
+ "columns": {
99
+ "messages": "conversations",
100
+ "tools": "tools"
101
+ }
102
+ },
103
+ "lima": {
104
+ "hf_hub_url": "llamafactory/lima",
105
+ "formatting": "sharegpt"
106
+ },
107
+ "guanaco": {
108
+ "hf_hub_url": "JosephusCheung/GuanacoDataset",
109
+ "ms_hub_url": "AI-ModelScope/GuanacoDataset"
110
+ },
111
+ "belle_2m": {
112
+ "hf_hub_url": "BelleGroup/train_2M_CN",
113
+ "ms_hub_url": "AI-ModelScope/train_2M_CN"
114
+ },
115
+ "belle_1m": {
116
+ "hf_hub_url": "BelleGroup/train_1M_CN",
117
+ "ms_hub_url": "AI-ModelScope/train_1M_CN"
118
+ },
119
+ "belle_0.5m": {
120
+ "hf_hub_url": "BelleGroup/train_0.5M_CN",
121
+ "ms_hub_url": "AI-ModelScope/train_0.5M_CN"
122
+ },
123
+ "belle_dialog": {
124
+ "hf_hub_url": "BelleGroup/generated_chat_0.4M",
125
+ "ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
126
+ },
127
+ "belle_math": {
128
+ "hf_hub_url": "BelleGroup/school_math_0.25M",
129
+ "ms_hub_url": "AI-ModelScope/school_math_0.25M"
130
+ },
131
+ "belle_multiturn": {
132
+ "script_url": "belle_multiturn",
133
+ "formatting": "sharegpt"
134
+ },
135
+ "ultra_chat": {
136
+ "script_url": "ultra_chat",
137
+ "formatting": "sharegpt"
138
+ },
139
+ "open_platypus": {
140
+ "hf_hub_url": "garage-bAInd/Open-Platypus",
141
+ "ms_hub_url": "AI-ModelScope/Open-Platypus"
142
+ },
143
+ "codealpaca": {
144
+ "hf_hub_url": "sahil2801/CodeAlpaca-20k",
145
+ "ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
146
+ },
147
+ "alpaca_cot": {
148
+ "hf_hub_url": "QingyiSi/Alpaca-CoT",
149
+ "ms_hub_url": "AI-ModelScope/Alpaca-CoT"
150
+ },
151
+ "openorca": {
152
+ "hf_hub_url": "Open-Orca/OpenOrca",
153
+ "ms_hub_url": "AI-ModelScope/OpenOrca",
154
+ "columns": {
155
+ "prompt": "question",
156
+ "response": "response",
157
+ "system": "system_prompt"
158
+ }
159
+ },
160
+ "slimorca": {
161
+ "hf_hub_url": "Open-Orca/SlimOrca",
162
+ "formatting": "sharegpt"
163
+ },
164
+ "mathinstruct": {
165
+ "hf_hub_url": "TIGER-Lab/MathInstruct",
166
+ "ms_hub_url": "AI-ModelScope/MathInstruct",
167
+ "columns": {
168
+ "prompt": "instruction",
169
+ "response": "output"
170
+ }
171
+ },
172
+ "firefly": {
173
+ "hf_hub_url": "YeungNLP/firefly-train-1.1M",
174
+ "columns": {
175
+ "prompt": "input",
176
+ "response": "target"
177
+ }
178
+ },
179
+ "wikiqa": {
180
+ "hf_hub_url": "wiki_qa",
181
+ "columns": {
182
+ "prompt": "question",
183
+ "response": "answer"
184
+ }
185
+ },
186
+ "webqa": {
187
+ "hf_hub_url": "suolyer/webqa",
188
+ "ms_hub_url": "AI-ModelScope/webqa",
189
+ "columns": {
190
+ "prompt": "input",
191
+ "response": "output"
192
+ }
193
+ },
194
+ "webnovel": {
195
+ "hf_hub_url": "zxbsmk/webnovel_cn",
196
+ "ms_hub_url": "AI-ModelScope/webnovel_cn"
197
+ },
198
+ "nectar_sft": {
199
+ "hf_hub_url": "AstraMindAI/SFT-Nectar",
200
+ "ms_hub_url": "AI-ModelScope/SFT-Nectar"
201
+ },
202
+ "deepctrl": {
203
+ "ms_hub_url": "deepctrl/deepctrl-sft-data"
204
+ },
205
+ "adgen_train": {
206
+ "hf_hub_url": "HasturOfficial/adgen",
207
+ "ms_hub_url": "AI-ModelScope/adgen",
208
+ "split": "train",
209
+ "columns": {
210
+ "prompt": "content",
211
+ "response": "summary"
212
+ }
213
+ },
214
+ "adgen_eval": {
215
+ "hf_hub_url": "HasturOfficial/adgen",
216
+ "ms_hub_url": "AI-ModelScope/adgen",
217
+ "split": "validation",
218
+ "columns": {
219
+ "prompt": "content",
220
+ "response": "summary"
221
+ }
222
+ },
223
+ "sharegpt_hyper": {
224
+ "hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
225
+ "formatting": "sharegpt"
226
+ },
227
+ "sharegpt4": {
228
+ "hf_hub_url": "shibing624/sharegpt_gpt4",
229
+ "ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
230
+ "formatting": "sharegpt"
231
+ },
232
+ "ultrachat_200k": {
233
+ "hf_hub_url": "HuggingFaceH4/ultrachat_200k",
234
+ "ms_hub_url": "AI-ModelScope/ultrachat_200k",
235
+ "formatting": "sharegpt",
236
+ "columns": {
237
+ "messages": "messages"
238
+ },
239
+ "tags": {
240
+ "role_tag": "role",
241
+ "content_tag": "content",
242
+ "user_tag": "user",
243
+ "assistant_tag": "assistant"
244
+ }
245
+ },
246
+ "agent_instruct": {
247
+ "hf_hub_url": "THUDM/AgentInstruct",
248
+ "ms_hub_url": "ZhipuAI/AgentInstruct",
249
+ "formatting": "sharegpt"
250
+ },
251
+ "lmsys_chat": {
252
+ "hf_hub_url": "lmsys/lmsys-chat-1m",
253
+ "ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
254
+ "formatting": "sharegpt",
255
+ "columns": {
256
+ "messages": "conversation"
257
+ },
258
+ "tags": {
259
+ "role_tag": "role",
260
+ "content_tag": "content",
261
+ "user_tag": "human",
262
+ "assistant_tag": "assistant"
263
+ }
264
+ },
265
+ "evol_instruct": {
266
+ "hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
267
+ "ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
268
+ "formatting": "sharegpt"
269
+ },
270
+ "glaive_toolcall_100k": {
271
+ "hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
272
+ "formatting": "sharegpt",
273
+ "columns": {
274
+ "messages": "conversations",
275
+ "tools": "tools"
276
+ }
277
+ },
278
+ "cosmopedia": {
279
+ "hf_hub_url": "HuggingFaceTB/cosmopedia",
280
+ "columns": {
281
+ "prompt": "prompt",
282
+ "response": "text"
283
+ }
284
+ },
285
+ "stem_zh": {
286
+ "hf_hub_url": "hfl/stem_zh_instruction"
287
+ },
288
+ "ruozhiba_gpt4": {
289
+ "hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
290
+ },
291
+ "neo_sft": {
292
+ "hf_hub_url": "m-a-p/neo_sft_phase2",
293
+ "formatting": "sharegpt"
294
+ },
295
+ "magpie_pro_300k": {
296
+ "hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
297
+ "formatting": "sharegpt"
298
+ },
299
+ "magpie_ultra": {
300
+ "hf_hub_url": "argilla/magpie-ultra-v0.1",
301
+ "columns": {
302
+ "prompt": "instruction",
303
+ "response": "response"
304
+ }
305
+ },
306
+ "web_instruct": {
307
+ "hf_hub_url": "TIGER-Lab/WebInstructSub",
308
+ "columns": {
309
+ "prompt": "question",
310
+ "response": "answer"
311
+ }
312
+ },
313
+ "openo1_sft": {
314
+ "hf_hub_url": "llamafactory/OpenO1-SFT",
315
+ "ms_hub_url": "llamafactory/OpenO1-SFT",
316
+ "columns": {
317
+ "prompt": "prompt",
318
+ "response": "response"
319
+ }
320
+ },
321
+ "open_thoughts": {
322
+ "hf_hub_url": "llamafactory/OpenThoughts-114k",
323
+ "formatting": "sharegpt",
324
+ "columns": {
325
+ "messages": "messages"
326
+ },
327
+ "tags": {
328
+ "role_tag": "role",
329
+ "content_tag": "content",
330
+ "user_tag": "user",
331
+ "assistant_tag": "assistant",
332
+ "system_tag": "system"
333
+ }
334
+ },
335
+ "open_r1_math": {
336
+ "hf_hub_url": "llamafactory/OpenR1-Math-94k",
337
+ "formatting": "sharegpt",
338
+ "columns": {
339
+ "messages": "messages"
340
+ },
341
+ "tags": {
342
+ "role_tag": "role",
343
+ "content_tag": "content",
344
+ "user_tag": "user",
345
+ "assistant_tag": "assistant",
346
+ "system_tag": "system"
347
+ }
348
+ },
349
+ "chinese_r1_distill": {
350
+ "hf_hub_url": "Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT",
351
+ "ms_hub_url": "liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT"
352
+ },
353
+ "llava_1k_en": {
354
+ "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
355
+ "subset": "en",
356
+ "formatting": "sharegpt",
357
+ "columns": {
358
+ "messages": "messages",
359
+ "images": "images"
360
+ },
361
+ "tags": {
362
+ "role_tag": "role",
363
+ "content_tag": "content",
364
+ "user_tag": "user",
365
+ "assistant_tag": "assistant"
366
+ }
367
+ },
368
+ "llava_1k_zh": {
369
+ "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
370
+ "subset": "zh",
371
+ "formatting": "sharegpt",
372
+ "columns": {
373
+ "messages": "messages",
374
+ "images": "images"
375
+ },
376
+ "tags": {
377
+ "role_tag": "role",
378
+ "content_tag": "content",
379
+ "user_tag": "user",
380
+ "assistant_tag": "assistant"
381
+ }
382
+ },
383
+ "llava_150k_en": {
384
+ "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
385
+ "subset": "en",
386
+ "formatting": "sharegpt",
387
+ "columns": {
388
+ "messages": "messages",
389
+ "images": "images"
390
+ },
391
+ "tags": {
392
+ "role_tag": "role",
393
+ "content_tag": "content",
394
+ "user_tag": "user",
395
+ "assistant_tag": "assistant"
396
+ }
397
+ },
398
+ "llava_150k_zh": {
399
+ "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
400
+ "subset": "zh",
401
+ "formatting": "sharegpt",
402
+ "columns": {
403
+ "messages": "messages",
404
+ "images": "images"
405
+ },
406
+ "tags": {
407
+ "role_tag": "role",
408
+ "content_tag": "content",
409
+ "user_tag": "user",
410
+ "assistant_tag": "assistant"
411
+ }
412
+ },
413
+ "pokemon_cap": {
414
+ "hf_hub_url": "llamafactory/pokemon-gpt4o-captions",
415
+ "formatting": "sharegpt",
416
+ "columns": {
417
+ "messages": "conversations",
418
+ "images": "images"
419
+ }
420
+ },
421
+ "mllm_pt_demo": {
422
+ "hf_hub_url": "BUAADreamer/mllm_pt_demo",
423
+ "formatting": "sharegpt",
424
+ "columns": {
425
+ "messages": "messages",
426
+ "images": "images"
427
+ },
428
+ "tags": {
429
+ "role_tag": "role",
430
+ "content_tag": "content",
431
+ "user_tag": "user",
432
+ "assistant_tag": "assistant"
433
+ }
434
+ },
435
+ "oasst_de": {
436
+ "hf_hub_url": "mayflowergmbh/oasst_de"
437
+ },
438
+ "dolly_15k_de": {
439
+ "hf_hub_url": "mayflowergmbh/dolly-15k_de"
440
+ },
441
+ "alpaca-gpt4_de": {
442
+ "hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
443
+ },
444
+ "openschnabeltier_de": {
445
+ "hf_hub_url": "mayflowergmbh/openschnabeltier_de"
446
+ },
447
+ "evol_instruct_de": {
448
+ "hf_hub_url": "mayflowergmbh/evol-instruct_de"
449
+ },
450
+ "dolphin_de": {
451
+ "hf_hub_url": "mayflowergmbh/dolphin_de"
452
+ },
453
+ "booksum_de": {
454
+ "hf_hub_url": "mayflowergmbh/booksum_de"
455
+ },
456
+ "airoboros_de": {
457
+ "hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
458
+ },
459
+ "ultrachat_de": {
460
+ "hf_hub_url": "mayflowergmbh/ultra-chat_de"
461
+ },
462
+ "dpo_en_demo": {
463
+ "file_name": "dpo_en_demo.json",
464
+ "ranking": true,
465
+ "formatting": "sharegpt",
466
+ "columns": {
467
+ "messages": "conversations",
468
+ "chosen": "chosen",
469
+ "rejected": "rejected"
470
+ }
471
+ },
472
+ "dpo_zh_demo": {
473
+ "file_name": "dpo_zh_demo.json",
474
+ "ranking": true,
475
+ "formatting": "sharegpt",
476
+ "columns": {
477
+ "messages": "conversations",
478
+ "chosen": "chosen",
479
+ "rejected": "rejected"
480
+ }
481
+ },
482
+ "dpo_mix_en": {
483
+ "hf_hub_url": "llamafactory/DPO-En-Zh-20k",
484
+ "subset": "en",
485
+ "ranking": true,
486
+ "formatting": "sharegpt",
487
+ "columns": {
488
+ "messages": "conversations",
489
+ "chosen": "chosen",
490
+ "rejected": "rejected"
491
+ }
492
+ },
493
+ "dpo_mix_zh": {
494
+ "hf_hub_url": "llamafactory/DPO-En-Zh-20k",
495
+ "subset": "zh",
496
+ "ranking": true,
497
+ "formatting": "sharegpt",
498
+ "columns": {
499
+ "messages": "conversations",
500
+ "chosen": "chosen",
501
+ "rejected": "rejected"
502
+ }
503
+ },
504
+ "ultrafeedback": {
505
+ "hf_hub_url": "llamafactory/ultrafeedback_binarized",
506
+ "ms_hub_url": "llamafactory/ultrafeedback_binarized",
507
+ "ranking": true,
508
+ "columns": {
509
+ "prompt": "instruction",
510
+ "chosen": "chosen",
511
+ "rejected": "rejected"
512
+ }
513
+ },
514
+ "rlhf_v": {
515
+ "hf_hub_url": "llamafactory/RLHF-V",
516
+ "ranking": true,
517
+ "formatting": "sharegpt",
518
+ "columns": {
519
+ "messages": "conversations",
520
+ "chosen": "chosen",
521
+ "rejected": "rejected",
522
+ "images": "images"
523
+ }
524
+ },
525
+ "vlfeedback": {
526
+ "hf_hub_url": "Zhihui/VLFeedback",
527
+ "ranking": true,
528
+ "formatting": "sharegpt",
529
+ "columns": {
530
+ "messages": "conversations",
531
+ "chosen": "chosen",
532
+ "rejected": "rejected",
533
+ "images": "images"
534
+ }
535
+ },
536
+ "orca_pairs": {
537
+ "hf_hub_url": "Intel/orca_dpo_pairs",
538
+ "ranking": true,
539
+ "columns": {
540
+ "prompt": "question",
541
+ "chosen": "chosen",
542
+ "rejected": "rejected",
543
+ "system": "system"
544
+ }
545
+ },
546
+ "hh_rlhf_en": {
547
+ "script_url": "hh_rlhf_en",
548
+ "ranking": true,
549
+ "columns": {
550
+ "prompt": "instruction",
551
+ "chosen": "chosen",
552
+ "rejected": "rejected",
553
+ "history": "history"
554
+ }
555
+ },
556
+ "nectar_rm": {
557
+ "hf_hub_url": "AstraMindAI/RLAIF-Nectar",
558
+ "ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
559
+ "ranking": true
560
+ },
561
+ "orca_dpo_de": {
562
+ "hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
563
+ "ranking": true
564
+ },
565
+ "kto_en_demo": {
566
+ "file_name": "kto_en_demo.json",
567
+ "formatting": "sharegpt",
568
+ "columns": {
569
+ "messages": "messages",
570
+ "kto_tag": "label"
571
+ },
572
+ "tags": {
573
+ "role_tag": "role",
574
+ "content_tag": "content",
575
+ "user_tag": "user",
576
+ "assistant_tag": "assistant"
577
+ }
578
+ },
579
+ "kto_mix_en": {
580
+ "hf_hub_url": "argilla/kto-mix-15k",
581
+ "formatting": "sharegpt",
582
+ "columns": {
583
+ "messages": "completion",
584
+ "kto_tag": "label"
585
+ },
586
+ "tags": {
587
+ "role_tag": "role",
588
+ "content_tag": "content",
589
+ "user_tag": "user",
590
+ "assistant_tag": "assistant"
591
+ }
592
+ },
593
+ "ultrafeedback_kto": {
594
+ "hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
595
+ "ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
596
+ "columns": {
597
+ "prompt": "prompt",
598
+ "response": "completion",
599
+ "kto_tag": "label"
600
+ }
601
+ },
602
+ "wiki_demo": {
603
+ "file_name": "wiki_demo.txt",
604
+ "columns": {
605
+ "prompt": "text"
606
+ }
607
+ },
608
+ "c4_demo": {
609
+ "file_name": "c4_demo.json",
610
+ "columns": {
611
+ "prompt": "text"
612
+ }
613
+ },
614
+ "refinedweb": {
615
+ "hf_hub_url": "tiiuae/falcon-refinedweb",
616
+ "columns": {
617
+ "prompt": "content"
618
+ }
619
+ },
620
+ "redpajama_v2": {
621
+ "hf_hub_url": "togethercomputer/RedPajama-Data-V2",
622
+ "columns": {
623
+ "prompt": "raw_content"
624
+ },
625
+ "subset": "default"
626
+ },
627
+ "wikipedia_en": {
628
+ "hf_hub_url": "olm/olm-wikipedia-20221220",
629
+ "ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
630
+ "columns": {
631
+ "prompt": "text"
632
+ }
633
+ },
634
+ "wikipedia_zh": {
635
+ "hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
636
+ "ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
637
+ "columns": {
638
+ "prompt": "completion"
639
+ }
640
+ },
641
+ "pile": {
642
+ "hf_hub_url": "monology/pile-uncopyrighted",
643
+ "ms_hub_url": "AI-ModelScope/pile",
644
+ "columns": {
645
+ "prompt": "text"
646
+ }
647
+ },
648
+ "skypile": {
649
+ "hf_hub_url": "Skywork/SkyPile-150B",
650
+ "ms_hub_url": "AI-ModelScope/SkyPile-150B",
651
+ "columns": {
652
+ "prompt": "text"
653
+ }
654
+ },
655
+ "fineweb": {
656
+ "hf_hub_url": "HuggingFaceFW/fineweb",
657
+ "columns": {
658
+ "prompt": "text"
659
+ }
660
+ },
661
+ "fineweb_edu": {
662
+ "hf_hub_url": "HuggingFaceFW/fineweb-edu",
663
+ "columns": {
664
+ "prompt": "text"
665
+ }
666
+ },
667
+ "the_stack": {
668
+ "hf_hub_url": "bigcode/the-stack",
669
+ "ms_hub_url": "AI-ModelScope/the-stack",
670
+ "columns": {
671
+ "prompt": "content"
672
+ }
673
+ },
674
+ "starcoder_python": {
675
+ "hf_hub_url": "bigcode/starcoderdata",
676
+ "ms_hub_url": "AI-ModelScope/starcoderdata",
677
+ "columns": {
678
+ "prompt": "content"
679
+ },
680
+ "folder": "python"
681
+ },
682
+ "VideoMMMu": {
683
+ "hf_hub_url": "lmms-lab/VideoMMMU",
684
+ "columns": {
685
+ "prompt": "content"
686
+ }
687
+ }
688
+ }