{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os \n", "import json\n", "\n", "def read_json(file_path): \n", " with open(file_path, 'r', encoding='utf-8') as file:\n", " data = json.load(file)\n", " return data\n", "\n", "def write_json(file_path, data):\n", " with open(file_path, 'w', encoding='utf-8') as file:\n", " json.dump(data, file, ensure_ascii=False, indent=4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 更换路径" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# data = read_json('/code/Data/m4_instruct_annotations.json')\n", "# data = read_json('/code/Data/general_blip_train_llava_imgh.json')\n", "data = read_json('/code/Data/general_blip_train_llava_swift.json')\n", "# data = read_json('/code/Data/general_blip_test_llava_swift.json')\n", "\n", "# data = read_json('/code/LLaVA/data/json/general_blip_train_llava.json')\n", "# data = read_json('/code/LLaVA/data/json/all_blip_train_llava_coco.json')\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'conversations': [{'from': 'human',\n", " 'value': '\\n'\n", " 'Previous Actions: Goal: Open a new Chrome '\n", " 'private window'},\n", " {'from': 'gpt',\n", " 'value': 'Action Plan: '\n", " '[DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,STATUS_TASK_COMPLETE]\\n'\n", " '; Action Decision: \"action_type\": \"DUAL_POINT\", '\n", " '\"touch_point\": \"[0.7761, 0.7089]\", \"lift_point\": '\n", " '\"[0.7761, 0.7089]\", \"typed_text\": \"\"'}],\n", " 'id': 'general_blip_0',\n", " 'image': 'blip/general_texts_splits/10_1.png'}\n" ] } ], "source": [ "import pprint\n", "\n", "pprint.pprint(data[0])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'images': ['/code/Auto-GUI/dataset/blip/general_texts_splits/10_1.png'],\n", " 'query': '<\\n'\n", " 'Previous Actions: Goal: Open a new Chrome private window>55555',\n", " 'response': 'Action Plan: '\n", " '[DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,DUAL_POINT,STATUS_TASK_COMPLETE]\\n'\n", " '; Action Decision: \"action_type\": \"DUAL_POINT\", \"touch_point\": '\n", " '\"[0.7761, 0.7089]\", \"lift_point\": \"[0.7761, 0.7089]\", '\n", " '\"typed_text\": \"\"'}\n" ] } ], "source": [ "import pprint\n", "\n", "pprint.pprint(data[0])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8831" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "for index, i in enumerate(data):\n", "\n", " # ################## A6000 ##################\n", " # data[index]['images'][0] = '/data/home/zbz5349/WorkSpace/LLaVA/data/blip' + data[index]['images'][0][27:]\n", "\n", " ################## H100 ##################\n", " data[index]['images'][0] = '/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/blip' + data[index]['images'][0][27:]\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# write_json('/code/Data/general_blip_train_llava_swift_a6000.json', data)\n", "# write_json('/code/Data/general_blip_train_llava_swift_H100.json', data)\n", "write_json('/code/Data/general_blip_test_llava_swift_H100.json', data)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "-----" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 重构格式 / Single Image" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "path = '/code/LLaVA/data/json/general_blip_train_llava.json'\n", "# path = '/code/LLaVA/data/json/general_blip_train_llava_coco.json'\n", "# path = '/code/LLaVA/data/json/general_blip_train_llava_70ORI_30COCO.json'\n", "\n", "# path = '/code/LLaVA/data/json/all_blip_train_llava.json'\n", "# path = '/code/LLaVA/data/json/all_blip_train_llava_coco.json'\n", "\n", "data = read_json(path)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "\n", "new_data = []\n", "for index, i in enumerate(data):\n", "\n", " temp = {}\n", " temp['query'] = i['conversations'][0]['value']\n", " temp['response'] = i['conversations'][1]['value']\n", " temp['images'] = []\n", "\n", " ############## H100\n", " # temp_image_path = '/gpu02home/zbz5349/ICLR_2024/LLaVA_Mobile_V1/data/' + i['image'] \n", "\n", " ############## A100\n", " temp_image_path = '/data/zbz5349/ICLR_2024/data/' + i['image']\n", " \n", " temp['images'].append(temp_image_path)\n", "\n", " new_data.append(temp)\n", " # pprint.pprint(temp)\n", " # break" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "new_path = path.split('.')[0] + '_swift_A100.json'\n", "write_json(new_path, new_data)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/code/LLaVA/data/json/all_blip_train_llava_swift.json'" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_path" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 重构格式 / Multi Image" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "path = '/code/Data/general_blip_train_llava_imgh.json'\n", " \n", "data = read_json(path)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "import pprint\n", "\n", "\n", "def replace_hashes(text):\n", " # 使用 rsplit 分割并替换最后五个 ### 为 xxx\n", " parts = text.rsplit('###', 4)\n", " # 将替换后的文本重新拼接\n", " return ''.join(parts)\n", "\n", "def ensure_five_xxx(text):\n", " # 统计文本中 'xxx' 的出现次数\n", " count = text.count('')\n", " if count < 5:\n", " # 计算还需要补充多少个 'xxx'\n", " missing_xxx = 5 - count\n", " # 在文本末尾添加足够的 'xxx'\n", " text += ' ' * missing_xxx\n", " \n", " return text\n", "\n", "\n", "new_data = []\n", "for index, i in enumerate(data):\n", "\n", " temp = {}\n", " temp['query'] = i['conversations'][0]['value']\n", " temp['response'] = i['conversations'][1]['value']\n", " \n", " ############## A100 ##############\n", " temp_image_path_list = []\n", " for w in i['image_history']:\n", " temp_image_path = '/data/zbz5349/ICLR_2024/data/' + w\n", " temp_image_path_list.append(temp_image_path)\n", "\n", " temp['images'] = temp_image_path_list \n", " \n", " new_temp = temp['query'].split('\"action_type\"')\n", " new_temp = ' ###\\n \"action_type\"'.join(new_temp)\n", " # pprint.pprint(new_temp) \n", " new_temp = replace_hashes(new_temp)\n", " new_temp = ensure_five_xxx(new_temp)\n", " # pprint.pprint(new_temp) \n", " # break\n", "\n", " temp['query'] = new_temp\n", " new_data.append(temp)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'images': ['/data/zbz5349/ICLR_2024/data/blip/general_texts_splits/13_3.png',\n", " '/data/zbz5349/ICLR_2024/data/blip/general_texts_splits/13_2.png',\n", " '/data/zbz5349/ICLR_2024/data/blip/general_texts_splits/13_1.png',\n", " '/data/zbz5349/ICLR_2024/data/blip/general_texts_splits/13_1.png',\n", " '/data/zbz5349/ICLR_2024/data/blip/general_texts_splits/13_1.png'],\n", " 'query': '\\n'\n", " 'Previous Actions: \\n'\n", " ' \"action_type\": \"PRESS_HOME\", \"touch_point\": \"[-1.0, -1.0]\", '\n", " '\"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\" \\n'\n", " ' \"action_type\": \"DUAL_POINT\", \"touch_point\": \"[0.7649, 0.6773]\", '\n", " '\"lift_point\": \"[0.7649, 0.6773]\", \"typed_text\": \"\" Goal: Open a new '\n", " 'Chrome window ',\n", " 'response': 'Action Plan: [STATUS_TASK_COMPLETE]\\n'\n", " '; Action Decision: \"action_type\": \"STATUS_TASK_COMPLETE\", '\n", " '\"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", '\n", " '\"typed_text\": \"\"'}\n" ] } ], "source": [ "\n", "# data[0]\n", "\n", "pprint.pprint(new_data[11]) " ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "new_path = path.split('.')[0] + '_swift_multi_A100.json'\n", "write_json(new_path, new_data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "llava", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }