Wendy-Fly
/

LLaVA-Select

Model card Files Files and versions Community

File size: 8,697 Bytes

cb3fe1e

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "def read_json(file_path): \n",
    "    with open(file_path, 'r', encoding='utf-8') as file:\n",
    "        data = json.load(file)\n",
    "    return data\n",
    "\n",
    "def write_json(file_path, data):\n",
    "    with open(file_path, 'w', encoding='utf-8') as file:\n",
    "        json.dump(data, file, ensure_ascii=False, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = read_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in data:\n",
    "    i['conversations'][0]['value'] = i['conversations'][0]['value'].replace('<image>\\n','')\n",
    "    # break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 0,\n",
       " 'image': 'llava_image_tune/coco/train2017/000000033471.jpg',\n",
       " 'conversations': [{'from': 'human',\n",
       "   'value': 'What are the colors of the bus in the image?'},\n",
       "  {'from': 'gpt', 'value': 'The bus in the image is white and red.'},\n",
       "  {'from': 'human',\n",
       "   'value': 'What feature can be seen on the back of the bus?'},\n",
       "  {'from': 'gpt', 'value': 'The back of the bus features an advertisement.'},\n",
       "  {'from': 'human',\n",
       "   'value': 'Is the bus driving down the street or pulled off to the side?'},\n",
       "  {'from': 'gpt',\n",
       "   'value': 'The bus is driving down the street, which is crowded with people and other vehicles.'}]}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "这里可以解释 loss 大小 和loss ifd 对于数据的影响\n",
    "\n",
    "high loss 意味着 整体对话风格 都比较难学，而不是事实因素\n",
    "\n",
    "high ifd 意味着 对话并不依赖图片\n",
    "\n",
    "不同意义上的 高质量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# write_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_NImg.json',data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = read_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import copy \n",
    "\n",
    "template = ''' \n",
    "Does the previous paragraph demarcated within ### and ###\n",
    "contain informative signal for visual instruction tuning a vision-language model?\n",
    "An informative datapoint should be well-formatted, contain some\n",
    "usable knowledge of the world, and strictly NOT have any harmful,\n",
    "racist, sexist, etc. content.\n",
    "OPTIONS:\n",
    "- yes\n",
    "- no\n",
    "'''\n",
    " \n",
    "con_template = [{'from': 'human',\n",
    "   'value': '<image>\\nWhat are the colors of the bus in the image?'},\n",
    "  {'from': 'gpt', 'value': 'The bus in the image is white and red.'}]\n",
    "\n",
    "\n",
    "for i in data:\n",
    "    i['ori_conversations'] = copy.deepcopy(i['conversations'])\n",
    "    \n",
    "    sentence = ''\n",
    "    for j in i['conversations']:\n",
    "        sentence = sentence + j['value'] + ' '\n",
    "        \n",
    "    final_sent = \"############\\n\" + sentence + \"############\\n\" + template\n",
    "    final_sent = final_sent #.replace('<image>','')\n",
    "    final_result = \"response: yes\"\n",
    "    \n",
    "    new_con_template = copy.deepcopy(con_template)\n",
    "    new_con_template[0]['value'] = final_sent\n",
    "    new_con_template[1]['value'] = final_result\n",
    "    i['conversations'] = new_con_template\n",
    "    \n",
    "    # del i['image']\n",
    "    # i['Old_Path'] = i.pop('image')\n",
    "    # break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 0,\n",
       " 'image': 'llava_image_tune/coco/train2017/000000033471.jpg',\n",
       " 'conversations': [{'from': 'human',\n",
       "   'value': '############\\n<image>\\nWhat are the colors of the bus in the image? The bus in the image is white and red. What feature can be seen on the back of the bus? The back of the bus features an advertisement. Is the bus driving down the street or pulled off to the side? The bus is driving down the street, which is crowded with people and other vehicles. ############\\n \\nDoes the previous paragraph demarcated within ### and ###\\ncontain informative signal for visual instruction tuning a vision-language model?\\nAn informative datapoint should be well-formatted, contain some\\nusable knowledge of the world, and strictly NOT have any harmful,\\nracist, sexist, etc. content.\\nOPTIONS:\\n- yes\\n- no\\n'},\n",
       "  {'from': 'gpt', 'value': 'response: yes'}],\n",
       " 'ori_conversations': [{'from': 'human',\n",
       "   'value': '<image>\\nWhat are the colors of the bus in the image?'},\n",
       "  {'from': 'gpt', 'value': 'The bus in the image is white and red.'},\n",
       "  {'from': 'human',\n",
       "   'value': 'What feature can be seen on the back of the bus?'},\n",
       "  {'from': 'gpt', 'value': 'The back of the bus features an advertisement.'},\n",
       "  {'from': 'human',\n",
       "   'value': 'Is the bus driving down the street or pulled off to the side?'},\n",
       "  {'from': 'gpt',\n",
       "   'value': 'The bus is driving down the street, which is crowded with people and other vehicles.'}]}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# write_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_logits_Img.json',data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data1 = read_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_logits_Img_20P.json')\n",
    "data1 = read_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_logits_NoImg_20P.json')\n",
    "data2 = read_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_rand_20P.json')\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data = data1 + data2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# write_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_rand_logits_Img_40P.json',new_data)\n",
    "write_json('/inspire/hdd/ws-ba572160-47f8-4ca1-984e-d6bcdeb95dbb/a100-maybe/albus/DataSet/LLaVA-Select/llava_image_tune_rand_logits_NoImg_40P.json',new_data)\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llava",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}