{ "cells": [ { "cell_type": "code", "execution_count": 13, "id": "32f62046-ef8f-422b-a66d-fab3bd4a85d3", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip install hdbscan -q\n", "!pip install pymatgen -q" ] }, { "cell_type": "code", "execution_count": 1, "id": "bc8c6ef5-5010-46b3-b89b-eab6dc0c00b3", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "from sklearn import manifold\n", "import hdbscan\n", "import os\n", "import json\n", "\n", "import sys\n", "sys.path.append('..')\n", "sys.path.append('../autoencoder')\n", "\n", "\n", "from fastai import *\n", "from fastai.vision.all import *\n", "\n", "from src.band_plotters import *\n", "from src.TensorImageNoised import *\n", "from src.transforms import Binarize\n", "\n", "sys.path.append('/notebooks/band-fingerprint/autoencoder/resnet_autoencoder')\n", "from model import *" ] }, { "cell_type": "markdown", "id": "e486923d-2b4e-47fa-83e2-3a00e78cf965", "metadata": { "tags": [] }, "source": [ "# Select Fingerprint Name and Length Here:" ] }, { "cell_type": "code", "execution_count": 2, "id": "09af96eb-50f6-41dd-b1fe-c320ca91575f", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "224_2channel_resnet_L=98\n" ] } ], "source": [ "FINGERPRINT_LENGTH = 98\n", "# fingerprint length for old all_k_paths fingerprint, no need to change for auto-encoded prints!\n", "FINGERPRINT_LENGTH_old = 60\n", "\n", "#FINGERPRINT_NAME = \"128x128_random_erase_resnet18_VAE_L={0}\".format(FINGERPRINT_LENGTH)\n", "FINGERPRINT_NAME = \"224_2channel_resnet_L={0}\".format(FINGERPRINT_LENGTH)\n", "\n", "WIDTH=128\n", "PERPLEXITY = 30\n", "OUTPUT_NAME = f\"{FINGERPRINT_NAME}_perplexity_{PERPLEXITY}_length_{FINGERPRINT_LENGTH}.csv\"\n", "print(FINGERPRINT_NAME)" ] }, { "cell_type": "markdown", "id": "dfcdda2e-41e4-4556-9799-c1a171759edc", "metadata": { "tags": [] }, "source": [ "To make a new fingerprint all you need to change is the constants above and/or the calc_fingerprint function below." ] }, { "cell_type": "markdown", "id": "f1a859a5-ce17-434f-823b-16c2e9d3ea58", "metadata": { "tags": [] }, "source": [ "# Start with Anupam's list of materials" ] }, { "cell_type": "code", "execution_count": 3, "id": "d485efe0-a52e-408a-9f08-dd49c228554f", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | formula | \n", "gen_formula | \n", "space_group | \n", "segments | \n", "flat_segments | \n", "flatness_score | \n", "discovery | \n", "binary_flatness | \n", "horz_flat_seg | \n", "exfoliation_eg | \n", "... | \n", "C | \n", "D | \n", "E | \n", "F | \n", "radio | \n", "f_orb | \n", "sg_sto_group | \n", "percentage_flat | \n", "relative_id | \n", "crystal_system | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2dm-1 | \n", "IrF2 | \n", "AB2 | \n", "164 | \n", "3 | \n", "0 | \n", "0.095102 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "0.234620 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "non-radioactive | \n", "no-f-in-valence | \n", "NaN | \n", "NaN | \n", "2dm-4963 | \n", "trigonal | \n", "
2dm-2 | \n", "Ba2Sb | \n", "AB2 | \n", "164 | \n", "3 | \n", "1 | \n", "0.387410 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "0.210650 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "non-radioactive | \n", "no-f-in-valence | \n", "NaN | \n", "NaN | \n", "2dm-3279 | \n", "trigonal | \n", "
2dm-3 | \n", "TlS | \n", "AB | \n", "2 | \n", "4 | \n", "4 | \n", "0.846460 | \n", "bottom-up | \n", "1 | \n", "3 | \n", "0.095794 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "non-radioactive | \n", "no-f-in-valence | \n", "276.0 | \n", "24.2 | \n", "2dm-5155 | \n", "triclinic | \n", "
2dm-4 | \n", "MoCl2 | \n", "AB2 | \n", "166 | \n", "5 | \n", "4 | \n", "0.713760 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "-0.055818 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "non-radioactive | \n", "no-f-in-valence | \n", "NaN | \n", "NaN | \n", "2dm-4342 | \n", "trigonal | \n", "
2dm-6 | \n", "RuI2 | \n", "AB2 | \n", "164 | \n", "3 | \n", "1 | \n", "0.264930 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "0.084831 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "non-radioactive | \n", "no-f-in-valence | \n", "NaN | \n", "NaN | \n", "2dm-3574 | \n", "trigonal | \n", "
5 rows × 26 columns
\n", "\n", " | formula | \n", "gen_formula | \n", "space_group | \n", "segments | \n", "flat_segments | \n", "flatness_score | \n", "discovery | \n", "binary_flatness | \n", "horz_flat_seg | \n", "exfoliation_eg | \n", "... | \n", "90 | \n", "91 | \n", "92 | \n", "93 | \n", "94 | \n", "95 | \n", "96 | \n", "97 | \n", "fx | \n", "fy | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2dm-1 | \n", "IrF2 | \n", "AB2 | \n", "164 | \n", "3 | \n", "0 | \n", "0.095102 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "0.234620 | \n", "... | \n", "1.800380 | \n", "1.783413 | \n", "1.751181 | \n", "1.657435 | \n", "1.699241 | \n", "1.757099 | \n", "1.648585 | \n", "1.750903 | \n", "80.652298 | \n", "33.975147 | \n", "
2dm-2 | \n", "Ba2Sb | \n", "AB2 | \n", "164 | \n", "3 | \n", "1 | \n", "0.387410 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "0.210650 | \n", "... | \n", "2.065734 | \n", "1.790841 | \n", "1.797779 | \n", "1.940001 | \n", "1.778363 | \n", "1.646309 | \n", "1.777840 | \n", "1.804859 | \n", "-102.511086 | \n", "76.040359 | \n", "
2dm-3 | \n", "TlS | \n", "AB | \n", "2 | \n", "4 | \n", "4 | \n", "0.846460 | \n", "bottom-up | \n", "1 | \n", "3 | \n", "0.095794 | \n", "... | \n", "2.325100 | \n", "1.733577 | \n", "1.816098 | \n", "1.953408 | \n", "1.904952 | \n", "1.718350 | \n", "1.920829 | \n", "1.940830 | \n", "-17.031164 | \n", "-22.583645 | \n", "
2dm-4 | \n", "MoCl2 | \n", "AB2 | \n", "166 | \n", "5 | \n", "4 | \n", "0.713760 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "-0.055818 | \n", "... | \n", "2.374648 | \n", "2.153072 | \n", "2.535491 | \n", "1.912330 | \n", "2.053569 | \n", "2.283617 | \n", "2.490727 | \n", "2.131947 | \n", "-74.647247 | \n", "38.610275 | \n", "
2dm-6 | \n", "RuI2 | \n", "AB2 | \n", "164 | \n", "3 | \n", "1 | \n", "0.264930 | \n", "bottom-up | \n", "0 | \n", "0 | \n", "0.084831 | \n", "... | \n", "2.629308 | \n", "2.182670 | \n", "2.361369 | \n", "2.758861 | \n", "2.421932 | \n", "2.233414 | \n", "2.194449 | \n", "2.162071 | \n", "74.347855 | \n", "47.444942 | \n", "
5 rows × 126 columns
\n", "