arpieb commited on
Commit
bb3ff55
·
1 Parent(s): 0a3d3d1

Added testing notebook

Browse files
Files changed (1) hide show
  1. test.ipynb +188 -0
test.ipynb ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "d2d5bc5c-d465-4483-b137-52e168fc6f6e",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from peft import PeftModel, PeftConfig\n",
11
+ "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
12
+ "\n",
13
+ "checkpoint = \"bigcode/starcoderbase-7b\"\n",
14
+ "device = \"cuda\" # for GPU usage or \"cpu\" for CPU usage"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 2,
20
+ "id": "def31126-da54-4099-b8f7-3236829d7559",
21
+ "metadata": {},
22
+ "outputs": [
23
+ {
24
+ "name": "stdout",
25
+ "output_type": "stream",
26
+ "text": [
27
+ "CPU times: user 157 ms, sys: 14.8 ms, total: 172 ms\n",
28
+ "Wall time: 293 ms\n"
29
+ ]
30
+ }
31
+ ],
32
+ "source": [
33
+ "%%time\n",
34
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 3,
40
+ "id": "d6fa452a-33a3-4e57-983a-28e1020004cb",
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "data": {
45
+ "application/vnd.jupyter.widget-view+json": {
46
+ "model_id": "ef692d63e58c42939869f3f53600be37",
47
+ "version_major": 2,
48
+ "version_minor": 0
49
+ },
50
+ "text/plain": [
51
+ "Downloading adapter_config.json: 0%| | 0.00/517 [00:00<?, ?B/s]"
52
+ ]
53
+ },
54
+ "metadata": {},
55
+ "output_type": "display_data"
56
+ },
57
+ {
58
+ "data": {
59
+ "application/vnd.jupyter.widget-view+json": {
60
+ "model_id": "22fa3c7f2fbd411d865a0a805003a84a",
61
+ "version_major": 2,
62
+ "version_minor": 0
63
+ },
64
+ "text/plain": [
65
+ "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
66
+ ]
67
+ },
68
+ "metadata": {},
69
+ "output_type": "display_data"
70
+ },
71
+ {
72
+ "data": {
73
+ "application/vnd.jupyter.widget-view+json": {
74
+ "model_id": "e3db312bf99c401191e0b5ab424b6074",
75
+ "version_major": 2,
76
+ "version_minor": 0
77
+ },
78
+ "text/plain": [
79
+ "Downloading (…)er_model.safetensors: 0%| | 0.00/155M [00:00<?, ?B/s]"
80
+ ]
81
+ },
82
+ "metadata": {},
83
+ "output_type": "display_data"
84
+ },
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "CPU times: user 2min 11s, sys: 57.6 s, total: 3min 8s\n",
90
+ "Wall time: 1min 57s\n"
91
+ ]
92
+ }
93
+ ],
94
+ "source": [
95
+ "%%time\n",
96
+ "config = PeftConfig.from_pretrained(\"arpieb/peft-lora-starcoderbase-7b-personal-copilot-elixir\")\n",
97
+ "model = AutoModelForCausalLM.from_pretrained(\"bigcode/starcoderbase-7b\")\n",
98
+ "model = PeftModel.from_pretrained(model, \"arpieb/peft-lora-starcoderbase-7b-personal-copilot-elixir\")\n",
99
+ "model = model.merge_and_unload()"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 4,
105
+ "id": "b8315302-801b-4b59-b158-25c86be30192",
106
+ "metadata": {},
107
+ "outputs": [
108
+ {
109
+ "name": "stdout",
110
+ "output_type": "stream",
111
+ "text": [
112
+ "tensor([[ 589, 1459, 81, 7656, 81, 5860, 346, 745, 44]])\n",
113
+ "CPU times: user 4.03 ms, sys: 0 ns, total: 4.03 ms\n",
114
+ "Wall time: 1.51 ms\n"
115
+ ]
116
+ }
117
+ ],
118
+ "source": [
119
+ "%%time\n",
120
+ "inputs = tokenizer.encode(\"def print_hello_world() do:\", return_tensors=\"pt\")\n",
121
+ "print(inputs)"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 5,
127
+ "id": "53d735d7-5941-4793-8b50-cc8e00de5437",
128
+ "metadata": {},
129
+ "outputs": [
130
+ {
131
+ "name": "stderr",
132
+ "output_type": "stream",
133
+ "text": [
134
+ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
135
+ "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
136
+ "/home/rbates/src/starcoder-elixir/DHS-LLM-Workshop/ENV/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
137
+ " warnings.warn(\n"
138
+ ]
139
+ },
140
+ {
141
+ "name": "stdout",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "def print_hello_world() do: IO.puts(\"Hello, World!\")\n",
145
+ "\n",
146
+ "#\n",
147
+ "CPU times: user 52.1 s, sys: 4.77 ms, total: 52.1 s\n",
148
+ "Wall time: 8.69 s\n"
149
+ ]
150
+ }
151
+ ],
152
+ "source": [
153
+ "%%time\n",
154
+ "outputs = model.generate(inputs)\n",
155
+ "print(tokenizer.decode(outputs[0]))"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": null,
161
+ "id": "1a346bef-a007-4311-b0ac-275dd786713d",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": []
165
+ }
166
+ ],
167
+ "metadata": {
168
+ "kernelspec": {
169
+ "display_name": "Python 3 (ipykernel)",
170
+ "language": "python",
171
+ "name": "python3"
172
+ },
173
+ "language_info": {
174
+ "codemirror_mode": {
175
+ "name": "ipython",
176
+ "version": 3
177
+ },
178
+ "file_extension": ".py",
179
+ "mimetype": "text/x-python",
180
+ "name": "python",
181
+ "nbconvert_exporter": "python",
182
+ "pygments_lexer": "ipython3",
183
+ "version": "3.10.12"
184
+ }
185
+ },
186
+ "nbformat": 4,
187
+ "nbformat_minor": 5
188
+ }