{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\harsh\\anaconda3\\envs\\transformmers\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from PIL import Image\n",
    "from transformers import AutoModel, AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.61s/it]\n"
     ]
    }
   ],
   "source": [
    "model = AutoModel.from_pretrained('MiniCPM', trust_remote_code=True, torch_dtype=torch.bfloat16)\n",
    "# For Nvidia GPUs support BF16 (like A100, H100, RTX3090)\n",
    "model = model.to(device='cuda', dtype=torch.bfloat16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained('MiniCPM-Tokenizer', trust_remote_code=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The image depicts a breathtaking view of a rocky coastline. The rocky cliff, with its steep and rugged terrain, dominates the left side of the frame. The water, which is a shade of blue, is calm and stretches out to the right of the image. The coastline appears to be rocky and uneven, with a variety of shapes and sizes of rocks and boulders. The image also captures a glimpse of the sky, which is visible at the top of the frame.\n"
     ]
    }
   ],
   "source": [
    "image = Image.open('demo2.jpg').convert('RGB')\n",
    "question = 'What is in the image?'\n",
    "msgs = [{'role': 'user', 'content': question}]\n",
    "\n",
    "res, context, _ = model.chat(\n",
    "    image=image,\n",
    "    msgs=msgs,\n",
    "    context=None,\n",
    "    tokenizer=tokenizer,\n",
    "    sampling=True,\n",
    "    temperature=0.7\n",
    ")\n",
    "print(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "transformmers",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}