{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/envs/llava/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "Loading checkpoint shards: 100%|██████████| 17/17 [01:22<00:00, 4.86s/it]\n" ] } ], "source": [ "\n", "\n", "import json\n", "import pprint \n", "\n", "\n", "def read_json(file_path): \n", " with open(file_path, 'r', encoding='utf-8') as file:\n", " data = json.load(file)\n", " return data\n", "\n", "def write_json(file_path, data):\n", " with open(file_path, 'w', encoding='utf-8') as file:\n", " json.dump(data, file, ensure_ascii=False, indent=4)\n", " \n", "# data = read_json(\"/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/MiniCPM-V/all_blip_train_llava_coco_layout_caption_s1s3.json\") \n", "data = read_json(\"/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/merged_data.json\") \n", " \n", "\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "model_name = \"Model/QwQ-32B-Preview\"\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " torch_dtype=\"auto\",\n", " device_map=\"auto\"\n", ")\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "\n", "def chat_QwQ(prompt):\n", " \n", " messages = [\n", " {\"role\": \"system\", \"content\": \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.\"},\n", " {\"role\": \"user\", \"content\": prompt}\n", " ]\n", " \n", " text = tokenizer.apply_chat_template(\n", " messages,\n", " tokenize=False,\n", " add_generation_prompt=True\n", " )\n", " \n", " model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n", "\n", " generated_ids = model.generate(\n", " **model_inputs,\n", " max_new_tokens=512\n", " )\n", " \n", " generated_ids = [\n", " output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n", " ]\n", "\n", " response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n", " # print(response)\n", " return response\n", "\n", "\n", "\n", "def chat_QwQ_batch(batch):\n", " \n", " all_text = []\n", " for prompt in batch:\n", " \n", " messages = [\n", " {\"role\": \"system\", \"content\": \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.\"},\n", " {\"role\": \"user\", \"content\": prompt}\n", " ]\n", " \n", " text = tokenizer.apply_chat_template(\n", " messages,\n", " tokenize=False,\n", " add_generation_prompt=True\n", " )\n", " \n", " all_text.append(text)\n", " \n", " # model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n", " model_inputs = tokenizer(all_text, return_tensors=\"pt\",padding=True,).to(model.device)\n", "\n", " generated_ids = model.generate(\n", " **model_inputs,\n", " max_new_tokens=512\n", " )\n", " \n", " return generated_ids\n", " \n", " # print(generated_ids.shape)\n", " \n", " # generated_ids = [\n", " # output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n", " # ]\n", "\n", " # response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n", " # # print(response)\n", " # return response\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 2000/2000 [00:00<00:00, 1097410.78it/s]\n", "A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.\n" ] }, { "ename": "OutOfMemoryError", "evalue": "CUDA out of memory. Tried to allocate 32.85 GiB. GPU 0 has a total capacty of 79.33 GiB of which 11.21 GiB is free. Process 2176602 has 68.11 GiB memory in use. Of the allocated memory 67.07 GiB is allocated by PyTorch, and 577.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/QwQ_infer.ipynb Cell 3\u001b[0m line \u001b[0;36m3\n\u001b[1;32m 27\u001b[0m \u001b[39mfor\u001b[39;00m batch_idx \u001b[39min\u001b[39;00m tqdm(\u001b[39mrange\u001b[39m(\u001b[39m1\u001b[39m, \u001b[39mlen\u001b[39m(data)\u001b[39m+\u001b[39m\u001b[39m1\u001b[39m, batch_size)):\n\u001b[1;32m 28\u001b[0m \u001b[39m# batch = data[batch_idx:batch_idx + batch_size]\u001b[39;00m\n\u001b[1;32m 29\u001b[0m text_list\u001b[39m.\u001b[39mappend(data[\u001b[39mstr\u001b[39m(batch_idx)][\u001b[39m'\u001b[39m\u001b[39mcontent\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[0;32m---> 32\u001b[0m output \u001b[39m=\u001b[39m chat_QwQ_batch(text_list)\n\u001b[1;32m 35\u001b[0m \u001b[39mfor\u001b[39;00m idx, i \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(batch):\n\u001b[1;32m 36\u001b[0m i[\u001b[39m'\u001b[39m\u001b[39manswer_QwQ\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m processor\u001b[39m.\u001b[39mdecode(output[idx])\n", "\u001b[1;32m/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/QwQ_infer.ipynb Cell 3\u001b[0m line \u001b[0;36m5\n\u001b[1;32m 49\u001b[0m \u001b[39m# model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\u001b[39;00m\n\u001b[1;32m 50\u001b[0m model_inputs \u001b[39m=\u001b[39m tokenizer(all_text, return_tensors\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mpt\u001b[39m\u001b[39m\"\u001b[39m,padding\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,)\u001b[39m.\u001b[39mto(model\u001b[39m.\u001b[39mdevice)\n\u001b[0;32m---> 52\u001b[0m generated_ids \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mgenerate(\n\u001b[1;32m 53\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mmodel_inputs,\n\u001b[1;32m 54\u001b[0m max_new_tokens\u001b[39m=\u001b[39;49m\u001b[39m512\u001b[39;49m\n\u001b[1;32m 55\u001b[0m )\n\u001b[1;32m 57\u001b[0m \u001b[39mreturn\u001b[39;00m generated_ids\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(func)\n\u001b[1;32m 113\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecorate_context\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 114\u001b[0m \u001b[39mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/transformers/generation/utils.py:2252\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m 2244\u001b[0m input_ids, model_kwargs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m 2245\u001b[0m input_ids\u001b[39m=\u001b[39minput_ids,\n\u001b[1;32m 2246\u001b[0m expand_size\u001b[39m=\u001b[39mgeneration_config\u001b[39m.\u001b[39mnum_return_sequences,\n\u001b[1;32m 2247\u001b[0m is_encoder_decoder\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconfig\u001b[39m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m 2248\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mmodel_kwargs,\n\u001b[1;32m 2249\u001b[0m )\n\u001b[1;32m 2251\u001b[0m \u001b[39m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[0;32m-> 2252\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sample(\n\u001b[1;32m 2253\u001b[0m input_ids,\n\u001b[1;32m 2254\u001b[0m logits_processor\u001b[39m=\u001b[39;49mprepared_logits_processor,\n\u001b[1;32m 2255\u001b[0m stopping_criteria\u001b[39m=\u001b[39;49mprepared_stopping_criteria,\n\u001b[1;32m 2256\u001b[0m generation_config\u001b[39m=\u001b[39;49mgeneration_config,\n\u001b[1;32m 2257\u001b[0m synced_gpus\u001b[39m=\u001b[39;49msynced_gpus,\n\u001b[1;32m 2258\u001b[0m streamer\u001b[39m=\u001b[39;49mstreamer,\n\u001b[1;32m 2259\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mmodel_kwargs,\n\u001b[1;32m 2260\u001b[0m )\n\u001b[1;32m 2262\u001b[0m \u001b[39melif\u001b[39;00m generation_mode \u001b[39min\u001b[39;00m (GenerationMode\u001b[39m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[39m.\u001b[39mBEAM_SEARCH):\n\u001b[1;32m 2263\u001b[0m \u001b[39m# 11. prepare beam search scorer\u001b[39;00m\n\u001b[1;32m 2264\u001b[0m beam_scorer \u001b[39m=\u001b[39m BeamSearchScorer(\n\u001b[1;32m 2265\u001b[0m batch_size\u001b[39m=\u001b[39mbatch_size,\n\u001b[1;32m 2266\u001b[0m num_beams\u001b[39m=\u001b[39mgeneration_config\u001b[39m.\u001b[39mnum_beams,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2271\u001b[0m max_length\u001b[39m=\u001b[39mgeneration_config\u001b[39m.\u001b[39mmax_length,\n\u001b[1;32m 2272\u001b[0m )\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/transformers/generation/utils.py:3251\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[0m\n\u001b[1;32m 3248\u001b[0m model_inputs\u001b[39m.\u001b[39mupdate({\u001b[39m\"\u001b[39m\u001b[39moutput_hidden_states\u001b[39m\u001b[39m\"\u001b[39m: output_hidden_states} \u001b[39mif\u001b[39;00m output_hidden_states \u001b[39melse\u001b[39;00m {})\n\u001b[1;32m 3250\u001b[0m \u001b[39mif\u001b[39;00m is_prefill:\n\u001b[0;32m-> 3251\u001b[0m outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mmodel_inputs, return_dict\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)\n\u001b[1;32m 3252\u001b[0m is_prefill \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 3253\u001b[0m \u001b[39melse\u001b[39;00m:\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/accelerate/hooks.py:165\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 163\u001b[0m output \u001b[39m=\u001b[39m module\u001b[39m.\u001b[39m_old_forward(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 164\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 165\u001b[0m output \u001b[39m=\u001b[39m module\u001b[39m.\u001b[39;49m_old_forward(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[39mreturn\u001b[39;00m module\u001b[39m.\u001b[39m_hf_hook\u001b[39m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:1165\u001b[0m, in \u001b[0;36mQwen2ForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, num_logits_to_keep, **loss_kwargs)\u001b[0m\n\u001b[1;32m 1162\u001b[0m return_dict \u001b[39m=\u001b[39m return_dict \u001b[39mif\u001b[39;00m return_dict \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconfig\u001b[39m.\u001b[39muse_return_dict\n\u001b[1;32m 1164\u001b[0m \u001b[39m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[0;32m-> 1165\u001b[0m outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel(\n\u001b[1;32m 1166\u001b[0m input_ids\u001b[39m=\u001b[39;49minput_ids,\n\u001b[1;32m 1167\u001b[0m attention_mask\u001b[39m=\u001b[39;49mattention_mask,\n\u001b[1;32m 1168\u001b[0m position_ids\u001b[39m=\u001b[39;49mposition_ids,\n\u001b[1;32m 1169\u001b[0m past_key_values\u001b[39m=\u001b[39;49mpast_key_values,\n\u001b[1;32m 1170\u001b[0m inputs_embeds\u001b[39m=\u001b[39;49minputs_embeds,\n\u001b[1;32m 1171\u001b[0m use_cache\u001b[39m=\u001b[39;49muse_cache,\n\u001b[1;32m 1172\u001b[0m output_attentions\u001b[39m=\u001b[39;49moutput_attentions,\n\u001b[1;32m 1173\u001b[0m output_hidden_states\u001b[39m=\u001b[39;49moutput_hidden_states,\n\u001b[1;32m 1174\u001b[0m return_dict\u001b[39m=\u001b[39;49mreturn_dict,\n\u001b[1;32m 1175\u001b[0m cache_position\u001b[39m=\u001b[39;49mcache_position,\n\u001b[1;32m 1176\u001b[0m )\n\u001b[1;32m 1178\u001b[0m hidden_states \u001b[39m=\u001b[39m outputs[\u001b[39m0\u001b[39m]\n\u001b[1;32m 1179\u001b[0m \u001b[39m# Only compute necessary logits, and do not upcast them to float if we are not computing the loss\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:895\u001b[0m, in \u001b[0;36mQwen2Model.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[1;32m 883\u001b[0m layer_outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m 884\u001b[0m decoder_layer\u001b[39m.\u001b[39m\u001b[39m__call__\u001b[39m,\n\u001b[1;32m 885\u001b[0m hidden_states,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 892\u001b[0m position_embeddings,\n\u001b[1;32m 893\u001b[0m )\n\u001b[1;32m 894\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 895\u001b[0m layer_outputs \u001b[39m=\u001b[39m decoder_layer(\n\u001b[1;32m 896\u001b[0m hidden_states,\n\u001b[1;32m 897\u001b[0m attention_mask\u001b[39m=\u001b[39;49mcausal_mask,\n\u001b[1;32m 898\u001b[0m position_ids\u001b[39m=\u001b[39;49mposition_ids,\n\u001b[1;32m 899\u001b[0m past_key_value\u001b[39m=\u001b[39;49mpast_key_values,\n\u001b[1;32m 900\u001b[0m output_attentions\u001b[39m=\u001b[39;49moutput_attentions,\n\u001b[1;32m 901\u001b[0m use_cache\u001b[39m=\u001b[39;49muse_cache,\n\u001b[1;32m 902\u001b[0m cache_position\u001b[39m=\u001b[39;49mcache_position,\n\u001b[1;32m 903\u001b[0m position_embeddings\u001b[39m=\u001b[39;49mposition_embeddings,\n\u001b[1;32m 904\u001b[0m )\n\u001b[1;32m 906\u001b[0m hidden_states \u001b[39m=\u001b[39m layer_outputs[\u001b[39m0\u001b[39m]\n\u001b[1;32m 908\u001b[0m \u001b[39mif\u001b[39;00m use_cache:\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/accelerate/hooks.py:165\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 163\u001b[0m output \u001b[39m=\u001b[39m module\u001b[39m.\u001b[39m_old_forward(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 164\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 165\u001b[0m output \u001b[39m=\u001b[39m module\u001b[39m.\u001b[39;49m_old_forward(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[39mreturn\u001b[39;00m module\u001b[39m.\u001b[39m_hf_hook\u001b[39m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:620\u001b[0m, in \u001b[0;36mQwen2DecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[0m\n\u001b[1;32m 596\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 597\u001b[0m \u001b[39mArgs:\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[39m hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 615\u001b[0m \u001b[39m into the model\u001b[39;00m\n\u001b[1;32m 616\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 618\u001b[0m residual \u001b[39m=\u001b[39m hidden_states\n\u001b[0;32m--> 620\u001b[0m hidden_states \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minput_layernorm(hidden_states)\n\u001b[1;32m 622\u001b[0m \u001b[39m# Self Attention\u001b[39;00m\n\u001b[1;32m 623\u001b[0m hidden_states, self_attn_weights, present_key_value \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mself_attn(\n\u001b[1;32m 624\u001b[0m hidden_states\u001b[39m=\u001b[39mhidden_states,\n\u001b[1;32m 625\u001b[0m attention_mask\u001b[39m=\u001b[39mattention_mask,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 631\u001b[0m position_embeddings\u001b[39m=\u001b[39mposition_embeddings,\n\u001b[1;32m 632\u001b[0m )\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/accelerate/hooks.py:165\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 163\u001b[0m output \u001b[39m=\u001b[39m module\u001b[39m.\u001b[39m_old_forward(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 164\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 165\u001b[0m output \u001b[39m=\u001b[39m module\u001b[39m.\u001b[39;49m_old_forward(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 166\u001b[0m \u001b[39mreturn\u001b[39;00m module\u001b[39m.\u001b[39m_hf_hook\u001b[39m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m/opt/conda/envs/llava/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py:79\u001b[0m, in \u001b[0;36mQwen2RMSNorm.forward\u001b[0;34m(self, hidden_states)\u001b[0m\n\u001b[1;32m 77\u001b[0m input_dtype \u001b[39m=\u001b[39m hidden_states\u001b[39m.\u001b[39mdtype\n\u001b[1;32m 78\u001b[0m hidden_states \u001b[39m=\u001b[39m hidden_states\u001b[39m.\u001b[39mto(torch\u001b[39m.\u001b[39mfloat32)\n\u001b[0;32m---> 79\u001b[0m variance \u001b[39m=\u001b[39m hidden_states\u001b[39m.\u001b[39;49mpow(\u001b[39m2\u001b[39;49m)\u001b[39m.\u001b[39mmean(\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m, keepdim\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 80\u001b[0m hidden_states \u001b[39m=\u001b[39m hidden_states \u001b[39m*\u001b[39m torch\u001b[39m.\u001b[39mrsqrt(variance \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvariance_epsilon)\n\u001b[1;32m 81\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mweight \u001b[39m*\u001b[39m hidden_states\u001b[39m.\u001b[39mto(input_dtype)\n", "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 32.85 GiB. GPU 0 has a total capacty of 79.33 GiB of which 11.21 GiB is free. Process 2176602 has 68.11 GiB memory in use. Of the allocated memory 67.07 GiB is allocated by PyTorch, and 577.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF" ] } ], "source": [ "# for i in data:\n", "# # sent1 = i['conversations'][0]['value']\n", "# # sent2 = i['conversations'][1]['value']\n", "# # sentence = sent1 + sent2 \n", " \n", "# sent1 = i['caption']\n", "# sent2 = i['action_target']\n", "# goal = i['ori_question'].split('Goal:')[1]\n", " \n", " \n", "# prompt = \"The description of image is: \" + sent1 + \" The goal is :\" + goal + \" ###### Infer from the content of the image and the goal, why the following actions were performed in this step.###### \" + \" The action is :\" + sent2\n", "# answer = chat_QwQ(prompt)\n", " \n", " \n", "# # english_text = answer\n", "# # inputs = tokenizer.encode(english_text, return_tensors=\"pt\", truncation=True)\n", "# # translated = model.generate(inputs, max_length=40, num_beams=4, early_stopping=True)\n", "# # chinese_translation = tokenizer.decode(translated[0], skip_special_tokens=True)\n", "# pprint.pprint(prompt)\n", "# pprint.pprint(answer)\n", "# break\n", "\n", "batch_size = 2\n", "from tqdm import tqdm\n", "\n", "text_list = []\n", "for batch_idx in tqdm(range(1, len(data)+1, batch_size)):\n", " # batch = data[batch_idx:batch_idx + batch_size]\n", " text_list.append(data[str(batch_idx)]['content'])\n", " \n", " \n", "output = chat_QwQ_batch(text_list)\n", "\n", "\n", "for idx, i in enumerate(batch):\n", " i['answer_QwQ'] = processor.decode(output[idx])\n", " print(i['answer_QwQ'])\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'你是一位数学专家,请解答以下问题并提供详细解答步骤:\\n\\n将点 $(0,3)$ 从直角坐标系转换到极坐标系。请以 $(r,\\\\theta)$ 的形式输入答案,其中 $r > 0$ 且 $0 \\\\le \\\\theta < 2 \\\\pi$。'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# batch_idx,batch_size, data[0:4]\n", "data['1']['content']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4000" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 4 }