{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "a8d16d95", "metadata": {}, "outputs": [], "source": [ "# Copyright 2024 ByteDance and/or its affiliates.\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "\n", "# http://www.apache.org/licenses/LICENSE-2.0\n", "\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "code", "execution_count": 3, "id": "81744ffa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Try to find the ccd cache data in the code directory for inference.\n" ] } ], "source": [ "import argparse\n", "import csv\n", "from pathlib import Path\n", "from typing import Optional\n", "\n", "import pandas as pd\n", "from joblib import Parallel, delayed\n", "from tqdm import tqdm\n", "\n", "from protenix.data.data_pipeline import DataPipeline\n", "from protenix.utils.file_io import dump_gzip_pickle" ] }, { "cell_type": "code", "execution_count": null, "id": "02412ab0", "metadata": {}, "outputs": [ { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "\n", "dataset = \"Distillation\"\n", "\n", "sample_indices_list, bioassembly_dict = DataPipeline.get_data_from_mmcif(\n", " mmcif='./dataset/7pzb.cif', pdb_cluster_file=None, dataset=\"Distillation\"\n", " )\n", "print(bioassembly_dict)\n", "\n", "pdb_id = bioassembly_dict[\"pdb_id\"]\n", "# save to output dir\n", "dump_gzip_pickle(bioassembly_dict, f\"./dataset/{pdb_id}.pkl.gz\")" ] }, { "cell_type": "code", "execution_count": 1, "id": "1ff18a14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Try to find the ccd cache data in the code directory for inference.\n" ] }, { "ename": "NameError", "evalue": "name 'pdb_id' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 16\u001b[0m\n\u001b[1;32m 12\u001b[0m data_config \u001b[38;5;241m=\u001b[39m configs\u001b[38;5;241m.\u001b[39mdata\n\u001b[1;32m 13\u001b[0m config_dict \u001b[38;5;241m=\u001b[39m data_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweightedPDB_before2109_wopb_nometalc_0925\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mto_dict()\n\u001b[1;32m 15\u001b[0m params \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[43mpdb_id\u001b[49m,\n\u001b[1;32m 17\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase_info\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 18\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcropping_configs\u001b[39m\u001b[38;5;124m\"\u001b[39m: config_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcropping_configs\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 19\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124merror_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./dataset\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 20\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmsa_featurizer\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 21\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtemplate_featurizer\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 22\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlig_atom_rename\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 23\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshuffle_mols\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 24\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshuffle_sym_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 25\u001b[0m }\n", "\u001b[0;31mNameError\u001b[0m: name 'pdb_id' is not defined" ] } ], "source": [ "from configs.configs_base import configs as configs_base\n", "from configs.configs_data import data_configs\n", "from configs.configs_inference import inference_configs\n", "from protenix.config import parse_configs\n", "arg_str = \"--seeds 101 --dump_dir ./output --input_json_path ./examples/example.json --model.N_cycle 10 --sample_diffusion.N_sample 5 --sample_diffusion.N_step 200 \"\n", "configs = {**configs_base, **{\"data\": data_configs}, **inference_configs}\n", "configs = parse_configs(\n", " configs=configs,\n", " arg_str=arg_str,\n", " fill_required_with_null=True,\n", ")\n", "data_config = configs.data\n", "config_dict = data_config[\"weightedPDB_before2109_wopb_nometalc_0925\"].to_dict()\n", "\n", "params = {\n", " \"name\": pdb_id,\n", " **config_dict[\"base_info\"],\n", " \"cropping_configs\": config_dict[\"cropping_configs\"],\n", " \"error_dir\": './dataset',\n", " \"msa_featurizer\": None,\n", " \"template_featurizer\": None,\n", " \"lig_atom_rename\": False,\n", " \"shuffle_mols\": False,\n", " \"shuffle_sym_ids\": False,\n", " }" ] }, { "cell_type": "code", "execution_count": 5, "id": "768a767a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
3Dmol.js failed to load for some reason. Please check your browser console for error messages.