flunardelli commited on
Commit
8ffe518
·
verified ·
1 Parent(s): 35494dd

Upload 2 files

Browse files
llm_metaeval_eval_harness_mixtral_mmlu.ipynb ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "U8RTc2PmnX-v"
7
+ },
8
+ "source": [
9
+ "Initial setup"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {
16
+ "id": "kGW7vfRkrqHe"
17
+ },
18
+ "outputs": [],
19
+ "source": [
20
+ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {
27
+ "id": "2I850FIsCVNw"
28
+ },
29
+ "outputs": [],
30
+ "source": [
31
+ "from datetime import datetime\n",
32
+ "import os\n",
33
+ "from huggingface_hub import login, upload_folder\n",
34
+ "from google.colab import userdata\n",
35
+ "import shutil\n",
36
+ "\n",
37
+ "HF_TOKEN = userdata.get('HF_TOKEN')\n",
38
+ "login(HF_TOKEN, True)\n",
39
+ "BASE_DATASET='mmlu'\n",
40
+ "REPO_ID='flunardelli/llm-metaeval'\n",
41
+ "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
42
+ "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
43
+ "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
44
+ "#shutil.rmtree(BASE_FOLDER)\n",
45
+ "os.makedirs(OUTPUT_FOLDER)\n",
46
+ "os.makedirs(TASK_FOLDER)\n",
47
+ "os.environ['HF_TOKEN'] = HF_TOKEN\n",
48
+ "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
49
+ "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
50
+ "\n",
51
+ "def hf_upload_folder(folder_path):\n",
52
+ " upload_folder(\n",
53
+ " folder_path=folder_path,\n",
54
+ " path_in_repo=\"evals/\",\n",
55
+ " repo_id=REPO_ID,\n",
56
+ " token=HF_TOKEN,\n",
57
+ " repo_type=\"dataset\"\n",
58
+ " )\n",
59
+ "\n",
60
+ "def create_task(content, filename):\n",
61
+ " filename_path = os.path.join(TASK_FOLDER,filename)\n",
62
+ " with open(filename_path, \"w\") as f:\n",
63
+ " f.write(content)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "markdown",
68
+ "metadata": {
69
+ "id": "Jd2JwKZaPkNS"
70
+ },
71
+ "source": [
72
+ "Create task for MMLU all datasets"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "metadata": {
79
+ "id": "xP0cC_sHih7C"
80
+ },
81
+ "outputs": [],
82
+ "source": [
83
+ "YAML_mmlu_en_us_string = \"\"\"\n",
84
+ "task: mmlu_all\n",
85
+ "dataset_path: cais/mmlu\n",
86
+ "dataset_name: all\n",
87
+ "description: \"MMLU dataset\"\n",
88
+ "test_split: test\n",
89
+ "fewshot_split: dev\n",
90
+ "fewshot_config:\n",
91
+ " sampler: first_n\n",
92
+ "num_fewshot: 5\n",
93
+ "output_type: multiple_choice\n",
94
+ "doc_to_text: \"{{question.strip()}}\\nA. {{choices[0]}}\\nB. {{choices[1]}}\\nC. {{choices[2]}}\\nD. {{choices[3]}}\\nAnswer:\"\n",
95
+ "doc_to_choice: [\"A\", \"B\", \"C\", \"D\"]\n",
96
+ "doc_to_target: answer\n",
97
+ "metric_list:\n",
98
+ " - metric: acc\n",
99
+ " aggregation: mean\n",
100
+ " higher_is_better: true\n",
101
+ "\"\"\"\n",
102
+ "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n",
103
+ "os.environ['TASKS'] = 'mmlu_all'\n"
104
+ ]
105
+ },
106
+ {
107
+ "cell_type": "markdown",
108
+ "metadata": {
109
+ "id": "1fEX-49hQ-Be"
110
+ },
111
+ "source": [
112
+ "Mistral Models"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "source": [
118
+ "!lm_eval \\\n",
119
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
120
+ "--tasks $TASKS \\\n",
121
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
122
+ "--batch_size auto &> run.log\n",
123
+ "#--limit 10 \\"
124
+ ],
125
+ "metadata": {
126
+ "id": "E0IqSzsvWO0P"
127
+ },
128
+ "execution_count": null,
129
+ "outputs": []
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "source": [
134
+ "hf_upload_folder(BASE_FOLDER)"
135
+ ],
136
+ "metadata": {
137
+ "id": "HA819yqvWjRV"
138
+ },
139
+ "execution_count": null,
140
+ "outputs": []
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": null,
145
+ "metadata": {
146
+ "id": "3cHI2qxN2fJ0"
147
+ },
148
+ "outputs": [],
149
+ "source": [
150
+ "!lm_eval \\\n",
151
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
152
+ "--tasks $TASKS \\\n",
153
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
154
+ "--batch_size auto &> run.log\n",
155
+ "#--limit 10 \\"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": null,
161
+ "metadata": {
162
+ "id": "mGGdqBNBzFYL"
163
+ },
164
+ "outputs": [],
165
+ "source": [
166
+ "hf_upload_folder(BASE_FOLDER)"
167
+ ]
168
+ }
169
+ ],
170
+ "metadata": {
171
+ "accelerator": "GPU",
172
+ "colab": {
173
+ "gpuType": "L4",
174
+ "machine_shape": "hm",
175
+ "provenance": []
176
+ },
177
+ "kernelspec": {
178
+ "display_name": "Python 3",
179
+ "name": "python3"
180
+ },
181
+ "language_info": {
182
+ "name": "python"
183
+ }
184
+ },
185
+ "nbformat": 4,
186
+ "nbformat_minor": 0
187
+ }
llm_metaeval_eval_harness_mixtral_pub.ipynb ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4",
8
+ "machine_shape": "hm"
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ },
17
+ "accelerator": "GPU"
18
+ },
19
+ "cells": [
20
+ {
21
+ "cell_type": "markdown",
22
+ "source": [
23
+ "Initial setup"
24
+ ],
25
+ "metadata": {
26
+ "id": "U8RTc2PmnX-v"
27
+ }
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "source": [
32
+ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
33
+ ],
34
+ "metadata": {
35
+ "id": "kGW7vfRkrqHe"
36
+ },
37
+ "execution_count": null,
38
+ "outputs": []
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "source": [
43
+ "from datetime import datetime\n",
44
+ "import os\n",
45
+ "from huggingface_hub import login, upload_folder\n",
46
+ "from google.colab import userdata\n",
47
+ "import shutil\n",
48
+ "\n",
49
+ "HF_TOKEN = userdata.get('HF_TOKEN')\n",
50
+ "login(HF_TOKEN, True)\n",
51
+ "BASE_DATASET='pub'\n",
52
+ "REPO_ID='flunardelli/llm-metaeval'\n",
53
+ "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
54
+ "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
55
+ "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
56
+ "#shutil.rmtree(BASE_FOLDER)\n",
57
+ "os.makedirs(OUTPUT_FOLDER)\n",
58
+ "os.makedirs(TASK_FOLDER)\n",
59
+ "os.environ['HF_TOKEN'] = HF_TOKEN\n",
60
+ "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
61
+ "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
62
+ "\n",
63
+ "def hf_upload_folder(folder_path):\n",
64
+ " upload_folder(\n",
65
+ " folder_path=folder_path,\n",
66
+ " path_in_repo=\"evals/\",\n",
67
+ " repo_id=REPO_ID,\n",
68
+ " token=HF_TOKEN,\n",
69
+ " repo_type=\"dataset\"\n",
70
+ " )\n",
71
+ "\n",
72
+ "def create_task(content, filename):\n",
73
+ " filename_path = os.path.join(TASK_FOLDER,filename)\n",
74
+ " with open(filename_path, \"w\") as f:\n",
75
+ " f.write(content)"
76
+ ],
77
+ "metadata": {
78
+ "id": "IHxFvAC4eSnW"
79
+ },
80
+ "execution_count": null,
81
+ "outputs": []
82
+ },
83
+ {
84
+ "cell_type": "markdown",
85
+ "source": [
86
+ "Create task for PUB all datasets"
87
+ ],
88
+ "metadata": {
89
+ "id": "Jd2JwKZaPkNS"
90
+ }
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "source": [
95
+ "YAML_template_pub_tasks = [\n",
96
+ " (\"task_1\", 2),\n",
97
+ " (\"task_2\", 5),\n",
98
+ " (\"task_3\", 5),\n",
99
+ " (\"task_4\", 3),\n",
100
+ " (\"task_5\", 2),\n",
101
+ " (\"task_6\", 2),\n",
102
+ " (\"task_7\", 2),\n",
103
+ " (\"task_8\", 2),\n",
104
+ " (\"task_9\", 2),\n",
105
+ " (\"task_10\", 3),\n",
106
+ " (\"task_11\", 3),\n",
107
+ " (\"task_12\", 2),\n",
108
+ " (\"task_13\", 2),\n",
109
+ " (\"task_14\", 4)\n",
110
+ "]\n",
111
+ "\n",
112
+ "default_doc_to_text = \"{{pretext.strip()}}\\n {{options[0]}}\\n{{options[1]}}\\\\n{{options[2]}}\\\\n{{options[3]}}\\\\n{{options[4]}}\\\\nAnswer:\"\n",
113
+ "\n",
114
+ "\n",
115
+ "YAML_template_pub_base = \"\"\"\n",
116
+ "task: __task_name__\n",
117
+ "dataset_path: flunardelli/PUB\n",
118
+ "dataset_name: __dataset_name__\n",
119
+ "description: \"PUB\"\n",
120
+ "test_split: test\n",
121
+ "fewshot_split: test\n",
122
+ "fewshot_config:\n",
123
+ " sampler: first_n\n",
124
+ "num_fewshot: 5\n",
125
+ "output_type: multiple_choice\n",
126
+ "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
127
+ "doc_to_choice: \"{{options}}\"\n",
128
+ "doc_to_target: \"correct answer\"\n",
129
+ "metric_list:\n",
130
+ " - metric: acc\n",
131
+ " aggregation: mean\n",
132
+ " higher_is_better: true\n",
133
+ "\"\"\"\n",
134
+ "tasks = []\n",
135
+ "for t in YAML_template_pub_tasks:\n",
136
+ " dataset_name, num_choices = t\n",
137
+ " task_name = f\"pub_{dataset_name}\"\n",
138
+ " tasks.append(task_name)\n",
139
+ " templace_choices = '\\n'.join([\"{{options[__i__]}}\".replace('__i__',str(i)) for i in range(num_choices)])\n",
140
+ " template = (YAML_template_pub_base\n",
141
+ " .replace('__options__',templace_choices)\n",
142
+ " .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n",
143
+ " )\n",
144
+ " create_task(template, f\"pub_{dataset_name}.yaml\")\n",
145
+ "\n",
146
+ "os.environ['TASKS'] = ','.join(tasks)"
147
+ ],
148
+ "metadata": {
149
+ "id": "xP0cC_sHih7C"
150
+ },
151
+ "execution_count": null,
152
+ "outputs": []
153
+ },
154
+ {
155
+ "cell_type": "markdown",
156
+ "source": [
157
+ "Mistral Models"
158
+ ],
159
+ "metadata": {
160
+ "id": "1fEX-49hQ-Be"
161
+ }
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "source": [
166
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
167
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
168
+ "--tasks $i \\\n",
169
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
170
+ "--batch_size auto; done &> run.log"
171
+ ],
172
+ "metadata": {
173
+ "id": "v_MTb0t1XHOJ"
174
+ },
175
+ "execution_count": null,
176
+ "outputs": []
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "source": [
181
+ "hf_upload_folder(BASE_FOLDER)"
182
+ ],
183
+ "metadata": {
184
+ "id": "vUf27nLYXGzh"
185
+ },
186
+ "execution_count": null,
187
+ "outputs": []
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "source": [
192
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
193
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
194
+ "--tasks $i \\\n",
195
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
196
+ "--batch_size auto; done &> run.log"
197
+ ],
198
+ "metadata": {
199
+ "id": "LPqTo2z29RKx"
200
+ },
201
+ "execution_count": null,
202
+ "outputs": []
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "source": [
207
+ "hf_upload_folder(BASE_FOLDER)"
208
+ ],
209
+ "metadata": {
210
+ "id": "ZQl05b1rf83u"
211
+ },
212
+ "execution_count": null,
213
+ "outputs": []
214
+ },
215
+ {
216
+ "cell_type": "markdown",
217
+ "source": [],
218
+ "metadata": {
219
+ "id": "ZUTPHnV0kMB1"
220
+ }
221
+ }
222
+ ]
223
+ }