File size: 5,070 Bytes

bb3ff55

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d2d5bc5c-d465-4483-b137-52e168fc6f6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import PeftModel, PeftConfig\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "checkpoint = \"bigcode/starcoderbase-7b\"\n",
    "device = \"cuda\" # for GPU usage or \"cpu\" for CPU usage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "def31126-da54-4099-b8f7-3236829d7559",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 157 ms, sys: 14.8 ms, total: 172 ms\n",
      "Wall time: 293 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d6fa452a-33a3-4e57-983a-28e1020004cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ef692d63e58c42939869f3f53600be37",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading adapter_config.json:   0%|          | 0.00/517 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "22fa3c7f2fbd411d865a0a805003a84a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e3db312bf99c401191e0b5ab424b6074",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading (…)er_model.safetensors:   0%|          | 0.00/155M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2min 11s, sys: 57.6 s, total: 3min 8s\n",
      "Wall time: 1min 57s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "config = PeftConfig.from_pretrained(\"arpieb/peft-lora-starcoderbase-7b-personal-copilot-elixir\")\n",
    "model = AutoModelForCausalLM.from_pretrained(\"bigcode/starcoderbase-7b\")\n",
    "model = PeftModel.from_pretrained(model, \"arpieb/peft-lora-starcoderbase-7b-personal-copilot-elixir\")\n",
    "model = model.merge_and_unload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b8315302-801b-4b59-b158-25c86be30192",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 589, 1459,   81, 7656,   81, 5860,  346,  745,   44]])\n",
      "CPU times: user 4.03 ms, sys: 0 ns, total: 4.03 ms\n",
      "Wall time: 1.51 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "inputs = tokenizer.encode(\"def print_hello_world() do:\", return_tensors=\"pt\")\n",
    "print(inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "53d735d7-5941-4793-8b50-cc8e00de5437",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
      "/home/rbates/src/starcoder-elixir/DHS-LLM-Workshop/ENV/lib/python3.10/site-packages/transformers/generation/utils.py:1353: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "def print_hello_world() do: IO.puts(\"Hello, World!\")\n",
      "\n",
      "#\n",
      "CPU times: user 52.1 s, sys: 4.77 ms, total: 52.1 s\n",
      "Wall time: 8.69 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "outputs = model.generate(inputs)\n",
    "print(tokenizer.decode(outputs[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a346bef-a007-4311-b0ac-275dd786713d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}