{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\harsh\\anaconda3\\envs\\transformmers\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import torch\n", "from PIL import Image\n", "from transformers import AutoModel, AutoTokenizer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00, 5.61s/it]\n" ] } ], "source": [ "model = AutoModel.from_pretrained('MiniCPM', trust_remote_code=True, torch_dtype=torch.bfloat16)\n", "# For Nvidia GPUs support BF16 (like A100, H100, RTX3090)\n", "model = model.to(device='cuda', dtype=torch.bfloat16)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained('MiniCPM-Tokenizer', trust_remote_code=True)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The image depicts a breathtaking view of a rocky coastline. The rocky cliff, with its steep and rugged terrain, dominates the left side of the frame. The water, which is a shade of blue, is calm and stretches out to the right of the image. The coastline appears to be rocky and uneven, with a variety of shapes and sizes of rocks and boulders. The image also captures a glimpse of the sky, which is visible at the top of the frame.\n" ] } ], "source": [ "image = Image.open('demo2.jpg').convert('RGB')\n", "question = 'What is in the image?'\n", "msgs = [{'role': 'user', 'content': question}]\n", "\n", "res, context, _ = model.chat(\n", " image=image,\n", " msgs=msgs,\n", " context=None,\n", " tokenizer=tokenizer,\n", " sampling=True,\n", " temperature=0.7\n", ")\n", "print(res)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "transformmers", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }