Spaces:

flax-community
/

dalle-mini

Running

Pedro Cuenca commited on Jul 11, 2021

Commit

4b8c3a8

1 Parent(s): eb912a1

* JIT outside the loop.

My tests yesterday were wrong: there is a noticeable performance
improvement doing it this way. Even so, JIT runs twice, we could cut
times in half (for this test) if we could make it run once.

Files changed (1) hide show

encoding/vqgan-jax-encoding.ipynb +26 -14

encoding/vqgan-jax-encoding.ipynb CHANGED Viewed

@@ -363,20 +363,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "id": "c8b1c229",
    "metadata": {},
    "outputs": [],
    "source": [
     "def encode(model, batch):\n",
     "    _, indices = model.encode(batch)\n",
     "    return indices"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "id": "f2aafe7a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -969,15 +970,15 @@
   },
   {
    "cell_type": "markdown",
-   "id": "03643ba1",
    "metadata": {},
    "source": [
-    "It works! Let's wrap it and run the whole process on the 10k images subset."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "1c65d943",
    "metadata": {},
    "source": [
     "## 10k encoding"
@@ -993,8 +994,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 195,
-   "id": "f69e2073",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1004,10 +1015,11 @@
     "    superbatches = superbatch_generator(dataloader)\n",
     "    \n",
     "    # TODO: save to disk as we go, do not accumulate everything in RAM\n",
-    "#     encoder = pmap(lambda batch: encode(model, batch))\n",
     "    results = None\n",
     "    for superbatch in tqdm(superbatches):\n",
-    "        encoded = pmap(lambda batch: encode(model, batch))(superbatch.numpy())\n",
     "        encoded = encoded.reshape(encoded.shape[0] * encoded.shape[1], -1)\n",
     "        results = np.concatenate((results, encoded), axis=0) if results is not None else encoded\n",
     "    return results"
@@ -1015,15 +1027,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 199,
-   "id": "e9a5565e",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "16it [03:38, 13.64s/it]\n"
      ]
     }
    ],

   },
   {
    "cell_type": "code",
+   "execution_count": 76,
+   "id": "fd26cdce",
    "metadata": {},
    "outputs": [],
    "source": [
     "def encode(model, batch):\n",
+    "#     print(\"jitting encode function\")\n",
     "    _, indices = model.encode(batch)\n",
     "    return indices"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
+   "id": "c49181e1",
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "markdown",
+   "id": "48896d5f",
    "metadata": {},
    "source": [
+    "It works! Let's wrap it up and run the whole process on the 10k images subset."
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "029d35d9",
    "metadata": {},
    "source": [
     "## 10k encoding"
   },
   {
    "cell_type": "code",
+   "execution_count": 45,
+   "id": "04b1568b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from functools import partial"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "id": "bfa3073b",
    "metadata": {},
    "outputs": [],
    "source": [
     "    superbatches = superbatch_generator(dataloader)\n",
     "    \n",
     "    # TODO: save to disk as we go, do not accumulate everything in RAM\n",
+    "#     p_encoder = pmap(partial(encode, model), in_axes=(0,), donate_argnums=(0))\n",
+    "    p_encoder = pmap(lambda batch: encode(model, batch))\n",
     "    results = None\n",
     "    for superbatch in tqdm(superbatches):\n",
+    "        encoded = p_encoder(superbatch.numpy())\n",
     "        encoded = encoded.reshape(encoded.shape[0] * encoded.shape[1], -1)\n",
     "        results = np.concatenate((results, encoded), axis=0) if results is not None else encoded\n",
     "    return results"
   },
   {
    "cell_type": "code",
+   "execution_count": 79,
+   "id": "d8d4da18",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "16it [00:41,  2.61s/it]\n"
      ]
     }
    ],