Spaces:

drewgenai
/

midterm_poc

Sleeping

App Files Files Community

drewgenai commited on 27 days ago

Commit

8c640e9

1 Parent(s): 1f3a006

updated notes and poc of agentic. workign but not downloads

Browse files

Files changed (5) hide show

03-testembedtune.ipynb +197 -356
READMEresponses.md +124 -8
app_working_on_agentic.py +304 -0
comparison_results.csv +5 -0
pyproject.toml +3 -3

03-testembedtune.ipynb CHANGED Viewed

@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -49,7 +49,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -58,13 +58,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2098545c1f924b7c85f8b7ca809f6f1a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -74,13 +74,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Token has not been saved to git credential helper.\n"
-     ]
     }
    ],
    "source": [
@@ -90,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -108,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -127,7 +120,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -168,9 +161,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
    "metadata": {},
-   "outputs": [],
    "source": [
     "\n",
     "\n",
@@ -191,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -225,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -255,7 +257,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -273,7 +275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
    "metadata": {},
    "outputs": [
     {
@@ -312,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -326,7 +328,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -350,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -359,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -396,16 +398,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Processing documents: 100%|██████████| 9/9 [00:02<00:00,  4.44it/s]\n",
-      "Processing documents: 100%|██████████| 2/2 [00:01<00:00,  1.74it/s]\n",
-      "Processing documents: 100%|██████████| 3/3 [00:02<00:00,  1.50it/s]\n"
      ]
     }
    ],
@@ -417,7 +419,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -461,7 +463,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -470,7 +472,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -482,7 +484,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -493,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -502,7 +504,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -520,7 +522,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -531,7 +533,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -546,7 +548,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -561,7 +563,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -570,19 +572,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/dummy/dummy/runs/3hjt799n?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>"
       ],
       "text/plain": [
-       "<wandb.sdk.wandb_run.Run at 0x749b55325d10>"
       ]
      },
-     "execution_count": 59,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -596,174 +598,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "400bc1e49a854008a875534a9d3a50d4",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='10' max='10' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [10/10 00:02, Epoch 5/5]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Step</th>\n",
-       "      <th>Training Loss</th>\n",
-       "      <th>Validation Loss</th>\n",
-       "      <th>Cosine Accuracy@1</th>\n",
-       "      <th>Cosine Accuracy@3</th>\n",
-       "      <th>Cosine Accuracy@5</th>\n",
-       "      <th>Cosine Accuracy@10</th>\n",
-       "      <th>Cosine Precision@1</th>\n",
-       "      <th>Cosine Precision@3</th>\n",
-       "      <th>Cosine Precision@5</th>\n",
-       "      <th>Cosine Precision@10</th>\n",
-       "      <th>Cosine Recall@1</th>\n",
-       "      <th>Cosine Recall@3</th>\n",
-       "      <th>Cosine Recall@5</th>\n",
-       "      <th>Cosine Recall@10</th>\n",
-       "      <th>Cosine Ndcg@10</th>\n",
-       "      <th>Cosine Mrr@10</th>\n",
-       "      <th>Cosine Map@100</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>2</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.200000</td>\n",
-       "      <td>0.100000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.907732</td>\n",
-       "      <td>0.875000</td>\n",
-       "      <td>0.875000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>4</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.200000</td>\n",
-       "      <td>0.100000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.907732</td>\n",
-       "      <td>0.875000</td>\n",
-       "      <td>0.875000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>6</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.200000</td>\n",
-       "      <td>0.100000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.907732</td>\n",
-       "      <td>0.875000</td>\n",
-       "      <td>0.875000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>8</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.200000</td>\n",
-       "      <td>0.100000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.907732</td>\n",
-       "      <td>0.875000</td>\n",
-       "      <td>0.875000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>10</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>No log</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.200000</td>\n",
-       "      <td>0.100000</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.907732</td>\n",
-       "      <td>0.875000</td>\n",
-       "      <td>0.875000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
    "source": [
     "#commented out for now as want to run whole notebook but not retrain\n",
     "# warmup_steps = int(len(loader) * EPOCHS * 0.1)\n",
@@ -781,7 +618,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -791,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
    "metadata": {},
    "outputs": [
     {
@@ -816,7 +653,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
    "metadata": {},
    "outputs": [
     {
@@ -826,20 +663,6 @@
       "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
-    },
-    {
-     "ename": "IndexError",
-     "evalue": "list index out of range",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[93], line 17\u001b[0m\n\u001b[1;32m      9\u001b[0m embedding_model \u001b[38;5;241m=\u001b[39m HuggingFaceEmbeddings(model_name\u001b[38;5;241m=\u001b[39mmodel_id)\n\u001b[1;32m     10\u001b[0m \u001b[38;5;66;03m# model_id = \"Snowflake/snowflake-arctic-embed-m\"\u001b[39;00m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;66;03m# embedding_model = HuggingFaceEmbeddings(model_name=model_id)\u001b[39;00m\n\u001b[1;32m     12\u001b[0m \u001b[38;5;66;03m# model_id = \"Snowflake/snowflake-arctic-embed-m-v2.0\"\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     15\u001b[0m \n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# Load documents into Qdrant\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m qdrant_vectorstore \u001b[38;5;241m=\u001b[39m \u001b[43mQdrant\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     18\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdocuments_with_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     19\u001b[0m \u001b[43m    \u001b[49m\u001b[43membedding_model\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     20\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m:memory:\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# In-memory for testing\u001b[39;49;00m\n\u001b[1;32m     21\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdocument_comparison\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     22\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;66;03m# Create a retriever\u001b[39;00m\n\u001b[1;32m     25\u001b[0m qdrant_retriever \u001b[38;5;241m=\u001b[39m qdrant_vectorstore\u001b[38;5;241m.\u001b[39mas_retriever()\n",
-      "File \u001b[0;32m~/Documents/huggingfacetesting/temptest/.venv/lib/python3.13/site-packages/langchain_core/vectorstores/base.py:852\u001b[0m, in \u001b[0;36mVectorStore.from_documents\u001b[0;34m(cls, documents, embedding, **kwargs)\u001b[0m\n\u001b[1;32m    849\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(ids):\n\u001b[1;32m    850\u001b[0m         kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mids\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m ids\n\u001b[0;32m--> 852\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_texts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadatas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadatas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/Documents/huggingfacetesting/temptest/.venv/lib/python3.13/site-packages/langchain_community/vectorstores/qdrant.py:1337\u001b[0m, in \u001b[0;36mQdrant.from_texts\u001b[0;34m(cls, texts, embedding, metadatas, ids, location, url, port, grpc_port, prefer_grpc, https, api_key, prefix, timeout, host, path, collection_name, distance_func, content_payload_key, metadata_payload_key, vector_name, batch_size, shard_number, replication_factor, write_consistency_factor, on_disk_payload, hnsw_config, optimizers_config, wal_config, quantization_config, init_from, on_disk, force_recreate, **kwargs)\u001b[0m\n\u001b[1;32m   1197\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m   1198\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mfrom_texts\u001b[39m(\n\u001b[1;32m   1199\u001b[0m     \u001b[38;5;28mcls\u001b[39m: Type[Qdrant],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1232\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m   1233\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Qdrant:\n\u001b[1;32m   1234\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Construct Qdrant wrapper from a list of texts.\u001b[39;00m\n\u001b[1;32m   1235\u001b[0m \n\u001b[1;32m   1236\u001b[0m \u001b[38;5;124;03m    Args:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1335\u001b[0m \u001b[38;5;124;03m            qdrant = Qdrant.from_texts(texts, embeddings, \"localhost\")\u001b[39;00m\n\u001b[1;32m   1336\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1337\u001b[0m     qdrant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconstruct_instance\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1338\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1339\u001b[0m \u001b[43m        \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1340\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1341\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1342\u001b[0m \u001b[43m        \u001b[49m\u001b[43mport\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1343\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgrpc_port\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1344\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprefer_grpc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1345\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhttps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1346\u001b[0m \u001b[43m        \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1347\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1348\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1349\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhost\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1350\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1351\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1352\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdistance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1353\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcontent_payload_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1354\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetadata_payload_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1355\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvector_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1356\u001b[0m \u001b[43m        \u001b[49m\u001b[43mshard_number\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1357\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreplication_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1358\u001b[0m \u001b[43m        \u001b[49m\u001b[43mwrite_consistency_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1359\u001b[0m \u001b[43m        \u001b[49m\u001b[43mon_disk_payload\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1360\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhnsw_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1361\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptimizers_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1362\u001b[0m \u001b[43m        \u001b[49m\u001b[43mwal_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1363\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquantization_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1364\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit_from\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1365\u001b[0m \u001b[43m        \u001b[49m\u001b[43mon_disk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1366\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_recreate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1367\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1368\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1369\u001b[0m     qdrant\u001b[38;5;241m.\u001b[39madd_texts(texts, metadatas, ids, batch_size)\n\u001b[1;32m   1370\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m qdrant\n",
-      "File \u001b[0;32m~/Documents/huggingfacetesting/temptest/.venv/lib/python3.13/site-packages/langchain_community/vectorstores/qdrant.py:1640\u001b[0m, in \u001b[0;36mQdrant.construct_instance\u001b[0;34m(cls, texts, embedding, location, url, port, grpc_port, prefer_grpc, https, api_key, prefix, timeout, host, path, collection_name, distance_func, content_payload_key, metadata_payload_key, vector_name, shard_number, replication_factor, write_consistency_factor, on_disk_payload, hnsw_config, optimizers_config, wal_config, quantization_config, init_from, on_disk, force_recreate, **kwargs)\u001b[0m\n\u001b[1;32m   1638\u001b[0m \u001b[38;5;66;03m# Just do a single quick embedding to get vector size\u001b[39;00m\n\u001b[1;32m   1639\u001b[0m partial_embeddings \u001b[38;5;241m=\u001b[39m embedding\u001b[38;5;241m.\u001b[39membed_documents(texts[:\u001b[38;5;241m1\u001b[39m])\n\u001b[0;32m-> 1640\u001b[0m vector_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[43mpartial_embeddings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m)\n\u001b[1;32m   1641\u001b[0m collection_name \u001b[38;5;241m=\u001b[39m collection_name \u001b[38;5;129;01mor\u001b[39;00m uuid\u001b[38;5;241m.\u001b[39muuid4()\u001b[38;5;241m.\u001b[39mhex\n\u001b[1;32m   1642\u001b[0m distance_func \u001b[38;5;241m=\u001b[39m distance_func\u001b[38;5;241m.\u001b[39mupper()\n",
-      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
-     ]
     }
    ],
    "source": [
@@ -872,7 +695,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -924,7 +747,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -945,7 +768,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
    "metadata": {},
    "outputs": [
     {
@@ -999,7 +822,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1008,7 +831,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1031,7 +854,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1042,7 +865,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1065,7 +888,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1084,7 +907,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
    "metadata": {},
    "outputs": [
     {
@@ -1092,11 +915,15 @@
      "output_type": "stream",
      "text": [
       "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     }
    ],
    "source": [
     "base_model_id = f\"Snowflake/snowflake-arctic-embed-m\"  \n",
     "base_embedding_model = HuggingFaceEmbeddings(model_name=base_model_id)\n",
     "\n",
@@ -1109,7 +936,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1152,7 +979,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1172,7 +999,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1187,7 +1014,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1200,7 +1027,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 115,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1224,7 +1051,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 103,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1238,13 +1065,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7c3166b3cd08451a9b2d35c0b73581af",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1258,7 +1085,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84fc7afd0ff04c0e8990cb88b9978867",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1273,13 +1100,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Node 77fa3fd5-0ec7-4864-8a9f-fb6df33f64ec does not have a summary. Skipping filtering.\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8e6bcaf303d641fa8c48f3dd8f077771",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1293,7 +1120,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4146d76a8f93496d909b6f56f2b99644",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1307,7 +1134,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7bf10ce73bf04cdf9c8bb81d5134095f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1321,7 +1148,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8c5a3b61bcb94ab19b0478a95b1b43ad",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1335,7 +1162,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "88fb910b941344ea9b2414c3010fad47",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1356,7 +1183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
    "metadata": {},
    "outputs": [
     {
@@ -1389,72 +1216,72 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>How does the Pain Coping Strategy Scale (PCSS-...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
-       "      <td>The Pain Coping Strategy Scale (PCSS-9) measur...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Cud yu pleese explane wut the Pain Coping Stra...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
-       "      <td>The Pain Coping Strategy Scale (PCSS-9) measur...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Wht is the ERI-9 and how does it relate to emo...</td>\n",
        "      <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
-       "      <td>The Emotional Regulation Index (ERI-9) is ment...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>what cognitive load management scale do</td>\n",
        "      <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
-       "      <td>The Cognitive Load Management Scale (CLMS-7) m...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>What does the MRI-6 assessment evaluate?</td>\n",
        "      <td>[The ERI-9 assesses an individual's ability to...</td>\n",
        "      <td>The MRI-6 evaluates short-term and long-term m...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>what scm-6 do for social confidence and public...</td>\n",
        "      <td>[The ERI-9 assesses an individual's ability to...</td>\n",
-       "      <td>The SCM-6 evaluates levels of confidence in so...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>What does the RDMT-6 assess in terms of cognit...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
-       "      <td>The RDMT-6 evaluates logical reasoning and dec...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>What does the CPAI-10 assess in individuals wi...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
-       "      <td>The CPAI-10 evaluates the strategies people us...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>What does the CWT-7 assessment measure in term...</td>\n",
        "      <td>[I feel confident when making important decisi...</td>\n",
        "      <td>The CWT-7 evaluates an individual's ability to...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>What does the Stamina and Endurance Index (SEI...</td>\n",
        "      <td>[I feel confident when making important decisi...</td>\n",
-       "      <td>The Stamina and Endurance Index (SEI-8) measur...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1463,16 +1290,16 @@
       ],
       "text/plain": [
        "                                          user_input  \\\n",
-       "0  How does the Pain Coping Strategy Scale (PCSS-...   \n",
-       "1  Cud yu pleese explane wut the Pain Coping Stra...   \n",
-       "2  Wht is the ERI-9 and how does it relate to emo...   \n",
-       "3            what cognitive load management scale do   \n",
-       "4           What does the MRI-6 assessment evaluate?   \n",
-       "5  what scm-6 do for social confidence and public...   \n",
-       "6  What does the RDMT-6 assess in terms of cognit...   \n",
-       "7  What does the CPAI-10 assess in individuals wi...   \n",
-       "8  What does the CWT-7 assessment measure in term...   \n",
-       "9  What does the Stamina and Endurance Index (SEI...   \n",
        "\n",
        "                                  reference_contexts  \\\n",
        "0  [Linked Psychological & Physical Assessment\\nP...   \n",
@@ -1487,16 +1314,16 @@
        "9  [I feel confident when making important decisi...   \n",
        "\n",
        "                                           reference  \\\n",
-       "0  The Pain Coping Strategy Scale (PCSS-9) measur...   \n",
-       "1  The Pain Coping Strategy Scale (PCSS-9) measur...   \n",
-       "2  The Emotional Regulation Index (ERI-9) is ment...   \n",
-       "3  The Cognitive Load Management Scale (CLMS-7) m...   \n",
        "4  The MRI-6 evaluates short-term and long-term m...   \n",
-       "5  The SCM-6 evaluates levels of confidence in so...   \n",
-       "6  The RDMT-6 evaluates logical reasoning and dec...   \n",
-       "7  The CPAI-10 evaluates the strategies people us...   \n",
        "8  The CWT-7 evaluates an individual's ability to...   \n",
-       "9  The Stamina and Endurance Index (SEI-8) measur...   \n",
        "\n",
        "                       synthesizer_name  \n",
        "0  single_hop_specifc_query_synthesizer  \n",
@@ -1511,7 +1338,7 @@
        "9  single_hop_specifc_query_synthesizer  "
       ]
      },
-     "execution_count": 105,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1529,7 +1356,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 106,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1541,7 +1368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 107,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1552,7 +1379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 108,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1563,13 +1390,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "57340d6c46c347e19fecdc4490574a8b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1584,32 +1411,36 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28698, Requested 2725. Please try again in 2.846s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29211, Requested 2254. Please try again in 2.93s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29563, Requested 2685. Please try again in 4.496s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29189, Requested 2555. Please try again in 3.488s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29993, Requested 2254. Please try again in 4.494s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29502, Requested 2743. Please try again in 4.49s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28840, Requested 2574. Please try again in 2.828s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29928, Requested 2511. Please try again in 4.878s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29823, Requested 2809. Please try again in 5.264s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29637, Requested 2665. Please try again in 4.604s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29185, Requested 2560. Please try again in 3.49s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29749, Requested 1558. Please try again in 2.614s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[5]: TimeoutError()\n",
       "Exception raised in Job[17]: TimeoutError()\n",
-      "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29678, Requested 2514. Please try again in 4.384s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28940, Requested 2499. Please try again in 2.878s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28657, Requested 2254. Please try again in 1.822s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.1973}"
       ]
      },
-     "execution_count": 109,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1638,7 +1469,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1650,7 +1481,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1659,13 +1490,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "758cb2b2b6df49e88c88b1fca6c09f3c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1680,34 +1511,36 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28950, Requested 2254. Please try again in 2.408s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28949, Requested 2254. Please try again in 2.406s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28567, Requested 2751. Please try again in 2.636s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28831, Requested 2511. Please try again in 2.684s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29242, Requested 2254. Please try again in 2.992s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29683, Requested 2555. Please try again in 4.476s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29672, Requested 1515. Please try again in 2.374s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29901, Requested 2743. Please try again in 5.288s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29651, Requested 2574. Please try again in 4.45s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29659, Requested 2771. Please try again in 4.86s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28744, Requested 2265. Please try again in 2.018s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29754, Requested 2665. Please try again in 4.838s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[5]: TimeoutError()\n",
-      "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29775, Requested 2560. Please try again in 4.67s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
       "Exception raised in Job[17]: TimeoutError()\n",
-      "Exception raised in Job[23]: TimeoutError()\n",
-      "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28967, Requested 2254. Please try again in 2.442s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[46]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28976, Requested 2250. Please try again in 2.452s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28735, Requested 2499. Please try again in 2.468s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'context_recall': 1.0000, 'faithfulness': 0.8500, 'factual_correctness': 0.7220, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.7917, 'noise_sensitivity_relevant': 0.1111}"
       ]
      },
-     "execution_count": 112,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1731,7 +1564,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1743,7 +1576,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 117,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1752,13 +1585,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a3f59e7e78294492a701763a859d6239",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1773,29 +1606,37 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28587, Requested 2574. Please try again in 2.322s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29460, Requested 2782. Please try again in 4.484s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29365, Requested 2991. Please try again in 4.712s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29067, Requested 2826. Please try again in 3.786s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28945, Requested 2968. Please try again in 3.826s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29841, Requested 2525. Please try again in 4.732s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29512, Requested 2895. Please try again in 4.814s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29581, Requested 1650. Please try again in 2.462s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29318, Requested 3175. Please try again in 4.986s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28799, Requested 2525. Please try again in 2.648s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[5]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29787, Requested 1465. Please try again in 2.504s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29638, Requested 2265. Please try again in 3.805s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29242, Requested 2736. Please try again in 3.956s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
-      "Exception raised in Job[35]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29647, Requested 1516. Please try again in 2.326s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9463, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.3095}"
       ]
      },
-     "execution_count": 118,
      "metadata": {},
      "output_type": "execute_result"
     }

   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7171fa2fd73446349406e23d4f6b898f",
        "version_major": 2,
        "version_minor": 0
       },
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_5461/2495904805.py:7: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``.\n",
+      "  embedding_model = HuggingFaceEmbeddings(model_name=model_id)\n"
+     ]
+    }
+   ],
    "source": [
     "\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Processing documents: 100%|██████████| 9/9 [00:01<00:00,  7.51it/s]\n",
+      "Processing documents: 100%|██████████| 2/2 [00:00<00:00,  2.35it/s]\n",
+      "Processing documents: 100%|██████████| 3/3 [00:01<00:00,  2.61it/s]\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
+       "<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/dummy/dummy/runs/12mf5zrt?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>"
       ],
       "text/plain": [
+       "<wandb.sdk.wandb_run.Run at 0x789c88185940>"
       ]
      },
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 29,
    "metadata": {},
+   "outputs": [],
    "source": [
     "#commented out for now as want to run whole notebook but not retrain\n",
     "# warmup_steps = int(len(loader) * EPOCHS * 0.1)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
       "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "text": [
       "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "/tmp/ipykernel_5461/1883233562.py:10: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import OpenAIEmbeddings``.\n",
+      "  openai_embedding_model = OpenAIEmbeddings(model=openai_model_id)\n"
      ]
     }
    ],
    "source": [
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "\n",
     "base_model_id = f\"Snowflake/snowflake-arctic-embed-m\"  \n",
     "base_embedding_model = HuggingFaceEmbeddings(model_name=base_model_id)\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4fe18d41fdd74b6fae35ef5380352540",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "23daac967fb44b10becffb30bce2dab4",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Node 12b49e26-ba22-461f-bf51-3356cf7491b7 does not have a summary. Skipping filtering.\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c4e7298ad4f64930a69aba61314833c7",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fd5d9de9850e40b9b349fb288bb2cc74",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1b59d0e922fd4de0a5cf190df7d9d82d",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8ea429520fc846b9a16dc5e95cad0d5c",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2a363f1dfdbd48d58a0955176d1ca934",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>What does the Decision-Making Confidence Scale...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
+       "      <td>The Decision-Making Confidence Scale (DMCS-6) ...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>Wht is the Work-Related Stress Scale and how d...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
+       "      <td>The Work-Related Stress Scale (WRSS-8) evaluat...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
+       "      <td>what cognitive load management scale do, how i...</td>\n",
        "      <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
+       "      <td>The Cognitive Load Management Scale (CLMS-7) m...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
+       "      <td>What is the purpose of the Emotional Regulatio...</td>\n",
        "      <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
+       "      <td>The context does not provide specific informat...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
+       "      <td>What does the MRI-6 assess?</td>\n",
        "      <td>[The ERI-9 assesses an individual's ability to...</td>\n",
        "      <td>The MRI-6 evaluates short-term and long-term m...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
+       "      <td>What does the Social Confidence Measure (SCM-6...</td>\n",
        "      <td>[The ERI-9 assesses an individual's ability to...</td>\n",
+       "      <td>The Social Confidence Measure (SCM-6) evaluate...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
+       "      <td>What OFI-7 do?</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
+       "      <td>The OFI-7 assesses work-related exhaustion and...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
+       "      <td>Cud yu pleese explane wut the Chronic Pain Adj...</td>\n",
        "      <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
+       "      <td>The Chronic Pain Adjustment Index (CPAI-10) ev...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
+       "      <td>What CWT-7 do?</td>\n",
        "      <td>[I feel confident when making important decisi...</td>\n",
        "      <td>The CWT-7 evaluates an individual's ability to...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
+       "      <td>Cud yu pleese explane how the COGNITIVE Worklo...</td>\n",
        "      <td>[I feel confident when making important decisi...</td>\n",
+       "      <td>The Cognitive Workload Tolerance (CWT-7) evalu...</td>\n",
        "      <td>single_hop_specifc_query_synthesizer</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
       ],
       "text/plain": [
        "                                          user_input  \\\n",
+       "0  What does the Decision-Making Confidence Scale...   \n",
+       "1  Wht is the Work-Related Stress Scale and how d...   \n",
+       "2  what cognitive load management scale do, how i...   \n",
+       "3  What is the purpose of the Emotional Regulatio...   \n",
+       "4                        What does the MRI-6 assess?   \n",
+       "5  What does the Social Confidence Measure (SCM-6...   \n",
+       "6                                     What OFI-7 do?   \n",
+       "7  Cud yu pleese explane wut the Chronic Pain Adj...   \n",
+       "8                                     What CWT-7 do?   \n",
+       "9  Cud yu pleese explane how the COGNITIVE Worklo...   \n",
        "\n",
        "                                  reference_contexts  \\\n",
        "0  [Linked Psychological & Physical Assessment\\nP...   \n",
        "9  [I feel confident when making important decisi...   \n",
        "\n",
        "                                           reference  \\\n",
+       "0  The Decision-Making Confidence Scale (DMCS-6) ...   \n",
+       "1  The Work-Related Stress Scale (WRSS-8) evaluat...   \n",
+       "2  The Cognitive Load Management Scale (CLMS-7) m...   \n",
+       "3  The context does not provide specific informat...   \n",
        "4  The MRI-6 evaluates short-term and long-term m...   \n",
+       "5  The Social Confidence Measure (SCM-6) evaluate...   \n",
+       "6  The OFI-7 assesses work-related exhaustion and...   \n",
+       "7  The Chronic Pain Adjustment Index (CPAI-10) ev...   \n",
        "8  The CWT-7 evaluates an individual's ability to...   \n",
+       "9  The Cognitive Workload Tolerance (CWT-7) evalu...   \n",
        "\n",
        "                       synthesizer_name  \n",
        "0  single_hop_specifc_query_synthesizer  \n",
        "9  single_hop_specifc_query_synthesizer  "
       ]
      },
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 54,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a098ed85762d4bbcb3983956c8e4d3e6",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Exception raised in Job[18]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29041, Requested 2597. Please try again in 3.276s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29301, Requested 2254. Please try again in 3.11s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29177, Requested 2714. Please try again in 3.782s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[4]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28995, Requested 2265. Please try again in 2.52s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29045, Requested 2522. Please try again in 3.134s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28880, Requested 2546. Please try again in 2.852s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29400, Requested 2254. Please try again in 3.308s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29047, Requested 2697. Please try again in 3.488s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28801, Requested 2551. Please try again in 2.704s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28962, Requested 2254. Please try again in 2.432s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28955, Requested 2505. Please try again in 2.92s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29121, Requested 2265. Please try again in 2.772s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28650, Requested 2556. Please try again in 2.412s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28712, Requested 2534. Please try again in 2.492s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
       "Exception raised in Job[17]: TimeoutError()\n",
+      "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29375, Requested 2581. Please try again in 3.911s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28555, Requested 2742. Please try again in 2.594s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[46]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29375, Requested 2250. Please try again in 3.25s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29255, Requested 2254. Please try again in 3.018s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28627, Requested 2773. Please try again in 2.8s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[42]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 27737, Requested 2679. Please try again in 832ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
      ]
     },
     {
      "data": {
       "text/plain": [
+       "{'context_recall': 1.0000, 'faithfulness': 0.7778, 'factual_correctness': 0.7140, 'answer_relevancy': 0.9405, 'context_entity_recall': 1.0000, 'noise_sensitivity_relevant': 0.1312}"
       ]
      },
+     "execution_count": 54,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 55,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 57,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "81a8a18f913545438fda4bde69dd52e1",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28993, Requested 2254. Please try again in 2.494s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[18]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 27957, Requested 2597. Please try again in 1.108s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29387, Requested 2528. Please try again in 3.83s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29351, Requested 2546. Please try again in 3.794s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29695, Requested 2714. Please try again in 4.818s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29801, Requested 2827. Please try again in 5.256s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29741, Requested 1449. Please try again in 2.38s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29010, Requested 2254. Please try again in 2.528s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28990, Requested 2254. Please try again in 2.488s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29910, Requested 2551. Please try again in 4.922s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29316, Requested 2505. Please try again in 3.642s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29718, Requested 2581. Please try again in 4.598s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28772, Requested 2534. Please try again in 2.612s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29229, Requested 2556. Please try again in 3.57s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
       "Exception raised in Job[17]: TimeoutError()\n",
+      "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29563, Requested 2254. Please try again in 3.634s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[46]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29198, Requested 2250. Please try again in 2.896s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[49]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29640, Requested 2517. Please try again in 4.314s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[42]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29517, Requested 2679. Please try again in 4.392s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29382, Requested 2778. Please try again in 4.32s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28917, Requested 2742. Please try again in 3.318s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
      ]
     },
     {
      "data": {
       "text/plain": [
+       "{'context_recall': 1.0000, 'faithfulness': 0.6000, 'factual_correctness': 0.7210, 'answer_relevancy': 0.9423, 'context_entity_recall': 1.0000, 'noise_sensitivity_relevant': 0.0781}"
       ]
      },
+     "execution_count": 57,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 59,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 60,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f97492909bd740eca783ffca7d420bab",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Exception raised in Job[10]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28079, Requested 2525. Please try again in 1.208s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28779, Requested 2525. Please try again in 2.608s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29859, Requested 2806. Please try again in 5.33s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28465, Requested 2822. Please try again in 2.574s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28948, Requested 2976. Please try again in 3.848s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29305, Requested 2525. Please try again in 3.66s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[18]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29595, Requested 2868. Please try again in 4.926s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28680, Requested 2525. Please try again in 2.41s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28993, Requested 2776. Please try again in 3.538s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29483, Requested 2793. Please try again in 4.552s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28630, Requested 3159. Please try again in 3.578s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29303, Requested 2794. Please try again in 4.194s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[11]: TimeoutError()\n",
+      "Exception raised in Job[17]: TimeoutError()\n",
+      "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28351, Requested 2840. Please try again in 2.382s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28991, Requested 2525. Please try again in 3.032s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[42]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29445, Requested 2953. Please try again in 4.796s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28683, Requested 2827. Please try again in 3.02s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29750, Requested 3159. Please try again in 5.818s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29070, Requested 2525. Please try again in 3.19s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[48]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29213, Requested 2824. Please try again in 4.074s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
+      "Exception raised in Job[49]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29082, Requested 2788. Please try again in 3.74s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
      ]
     },
     {
      "data": {
       "text/plain": [
+       "{'context_recall': 1.0000, 'faithfulness': 0.6000, 'factual_correctness': 0.7180, 'answer_relevancy': 0.9393, 'context_entity_recall': 1.0000, 'noise_sensitivity_relevant': 0.0781}"
       ]
      },
+     "execution_count": 60,
      "metadata": {},
      "output_type": "execute_result"
     }

READMEresponses.md CHANGED Viewed

@@ -1,11 +1,96 @@
 ## Task 1: Defining your Problem and Audience
 ## Task 2: Propose a Solution
 ## Task 3: Dealing with the Data
@@ -16,10 +101,21 @@ https://huggingface.co/spaces/drewgenai/midterm_poc
 ## Task 5: Creating a Golden Test Data Set
 The dataset is based on the submitted documents and the base model performed well across all metrics.
-Base model evaluation {'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.1973}
 ## Task 6: Fine-Tuning Open-Source Embeddings
@@ -34,15 +130,35 @@ https://huggingface.co/drewgenai/midterm-compare-arctic-embed-m-ft
 I ran the RAGAS evaluation on the finetuned model and the openai model as well.
-The finetuned model performed well across all metrics as well but not quite as well as the base Snowflake/snowflake-arctic-embed-m model where it didn't perform as well in context recall, but slightly in noise sensitivity.
-The openai model performed well across all metrics but not as well as the base Snowflake/snowflake-arctic-embed-m model, but with slightly worse noise sensitivity.
-Finetuned model {'context_recall': 1.0000, 'faithfulness': 0.8500, 'factual_correctness': 0.7220, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.7917, 'noise_sensitivity_relevant': 0.1111}
-Openai model {'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9463, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.3095}
-With the results as they are using the Snowflake/snowflake-arctic-embed-m model makes sense for this use case.
 ## Final Submission

 ## Task 1: Defining your Problem and Audience
+### Problem Statement
+Researchers at Studies Inc. must manually review multiple protocols, a time-consuming and complex process that requires scanning, extracting, and analyzing detailed information to identify patterns between studies.
+### Why This is a Problem for Our Users
+Researchers are spending an excessive amount of time manually reviewing study protocols to standardize data across multiple studies. Each protocol contains detailed and sometimes inconsistent information, making it difficult to quickly extract relevant data and compare it across different studies. The manual nature of this process increases the likelihood of human error, slows down research progress, and diverts time away from higher-value analytical work. Additionally, as the number of studies grows, this problem scales, further straining resources and delaying critical insights needed for scientific advancements.
 ## Task 2: Propose a Solution
+The solution is to build an automated system that streamlines the process of reviewing and extracting key data from research protocols. By leveraging a combination of LLM-based retrieval-augmented generation (RAG) and structured orchestration, the system will allow researchers to quickly scan, extract, and compare relevant information across multiple protocols, significantly reducing manual effort and improving standardization.
+Instead of using agents in the initial prototype, the solution will employ LangChain tools for structured execution, ensuring modular and predictable workflows. Chainlit will provide an event-driven UI that dynamically triggers relevant processing steps based on user interactions. This setup ensures efficiency, flexibility, and the ability to scale as more protocols and datasets are introduced.
+While the initial implementation will rely on structured tool execution, future iterations or final builds may incorporate agentic reasoning to enhance adaptability. Agents could be employed for intelligent workflow management, dynamically selecting tools based on protocol complexity or user preferences. This could enable more sophisticated multi-step reasoning, where an agent determines the best extraction and comparison approach based on the type of research being conducted, but it is not required at this time with just two local documents.
+We will build with the following stack:
+- **LLM**: gpt-4o-mini
+We will leverage a closed-source model because the entire application is constructed using open-source data from public protocols
+- **Embedding Models**: text-embedding-3-small, snowflake-arctic-embed-m
+We will construct a quick prototype using our closed-source OpenAI embedding model, and then we will fine-tune an off-the-shelf embedding model from Snowflake.  We want to demonstrate to leadership that it’s important to be able to work with open-source models and see that they can be as performant as closed-source while providing privacy benefits, especially after fine-tuning.*
+- **Orchestration**: Langchain tools and tool based execution
+The application will orchestrate workflow execution using a structured, tools-based approach. The system will ensure predictable and efficient processing by defining modular tools (such as document comparison and retrieval) and explicitly invoking them in response to user input. Chainlit serves as the event-driven interface, managing tool execution based on user interactions, while LangChain tools handle document retrieval, RAG processing, and structured data export. This approach will provide flexibility and modularity.
+- **Vector Store**: Qdrant
+It will be more than fast enough for our current data needs while maintaining low latency. Overall, it’s a solid, reliable choice, and we can take advantage of the fully open-source version at no cost and host it in memory running within our hugging face space.
+- **Evaluation**: RAGAS
+RAGAS has been a leader in the AI evaluation space for years. We’re particularly interested in leveraging their RAG assessment metrics to test the performance of our embedding models.
+- **User Interface**: Chainlit
+A lightweight, Python-based UIframework designed specifically for LLM applications. It allows us to quickly prototype and deploy conversational interfaces with minimal front-end effort while maintaining flexibility for customization.
+- **Inference & Serving**: Hugging Face
+We will leverage Hugging Face as a platform to serve up our application to users because it’s very fast (1-click deployment status) and very cheap to host.  Additionally, we will use Hugging Face to pull embedding models that we will test finetuning and host in the event it is needed.
 ## Task 3: Dealing with the Data
+### Data Sources and External APIs
+Our system will leverage multiple data sources and external APIs to extract, process, and analyze research protocols effectively. Below are the key sources and their roles in our application:
+#### Uploaded Protocol Documents (PDFs)
+- Researchers will upload research protocol documents in **PDF format**.
+- These documents will be processed using **PyMuPDFLoader**, extracted as text, and chunked for embedding storage.
+- We use **Qdrant** as the vector database to store document chunks for retrieval.
+**Why?**
+Protocols contain structured and semi-structured data critical for comparison. By storing them in a vector database, we enable **semantic search and retrieval** for streamlined analysis.
+---
+#### Hugging Face API
+- Used to access **Snowflake Arctic embedding models** for text embedding and potential fine-tuning.
+**Why?**
+We aim to compare the performance of closed-source OpenAI embeddings with open-source models for **privacy, scalability, and long-term flexibility**.
+---
+#### OpenAI API (GPT-4o-mini)
+- Used for generating **structured comparisons** between protocols.
+**Why?**
+The LLM will process retrieved document chunks and generate **natural-language comparisons** and **structured JSON outputs**.
+---
+#### LangChain Tool Execution
+- We use **LangChain’s tools-based execution** for structured retrieval and document comparison.
+**Why?**
+Instead of an agentic approach, **explicit tool execution** ensures predictable and modular processing.
+Our priority at this time is **reducing API calls** while maintaining efficiency.
+---
+#### Chunking Strategy
+- We will use a **semantic-based chunking strategy** with LangChain’s **SemanticChunker**, powered by **Snowflake Arctic embeddings**.
+**Why this approach?**
+Traditional **fixed-size** or **recursive character-splitting** methods often break up conceptually linked sections.
+Semantic chunking ensures that **chunks maintain their conceptual integrity**, improving retrieval quality.
+This method is especially useful for research protocols, where meaningful sections (e.g., **assessments, methodologies**) must remain intact for comparison.
+As we refine the system, we may adjust chunk sizes based on **real user queries and retrieval performance** to optimize information density and response accuracy.
+---
+### Additional Data Needs
+At present, our application focuses on **structured comparisons** between protocols. However, in future iterations, we may integrate additional data sources such as:
+- **Metadata from Public Research Repositories** (e.g., PubMed, ArXiv API)
+  → To enrich protocol comparisons with **relevant external research**.
+- **Institutional Databases** (if access is provided)
+  → To validate protocol **consistency across multi-site studies**.
+While the current system is **not agentic**, we may explore **agent-based reasoning** in future versions to dynamically adjust retrieval and processing strategies based on protocol complexity.  At this time all the information we need is in the provided local documents.
 ## Task 5: Creating a Golden Test Data Set
 The dataset is based on the submitted documents and the base model performed well across all metrics.
+The base model is the Snowflake/snowflake-arctic-embed-m model.
+### Base model evaluation
+| Metric                        | Value  |
+|-------------------------------|--------|
+| Context Recall                | 1.0000 |
+| Faithfulness                  | 1.0000 |
+| Factual Correctness           | 0.7540 |
+| Answer Relevancy              | 0.9481 |
+| Context Entity Recall         | 0.8095 |
+| Noise Sensitivity Relevant    | 0.1973 |
 ## Task 6: Fine-Tuning Open-Source Embeddings
 I ran the RAGAS evaluation on the finetuned model and the openai model as well.
+The finetuned model performed well across all metrics as well but not quite as well as the base Snowflake/snowflake-arctic-embed-m model where it didn't perform as well in context recall, but slightly in noise sensitivity.
+The openai model performed well across all metrics but not as well as the base Snowflake/snowflake-arctic-embed-m model, but with slightly worse noise sensitivity.
+### Finetuned Model Evaluation
+| Metric                        | Value  |
+|-------------------------------|--------|
+| Context Recall                | 1.0000 |
+| Faithfulness                  | 0.8500 |
+| Factual Correctness           | 0.7220 |
+| Answer Relevancy              | 0.9481 |
+| Context Entity Recall         | 0.7917 |
+| Noise Sensitivity Relevant    | 0.1111 |
+### OpenAI Model Evaluation
+| Metric                        | Value  |
+|-------------------------------|--------|
+| Context Recall                | 1.0000 |
+| Faithfulness                  | 1.0000 |
+| Factual Correctness           | 0.7540 |
+| Answer Relevancy              | 0.9463 |
+| Context Entity Recall         | 0.8095 |
+| Noise Sensitivity Relevant    | 0.3095 |
+The base model is the best performing model for the current use case.
+In the second half of the course I would like to explore having the application look for external standards to compare the protocols to further help with the comparison process.  I would also like it to evaluate the file type and use reasoning to determine the best approach to extracting the information and potentially accept more than 2 files.
 ## Final Submission

app_working_on_agentic.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import os
+import shutil
+import json
+import pandas as pd
+import chainlit as cl
+from dotenv import load_dotenv
+from langchain_core.documents import Document
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_experimental.text_splitter import SemanticChunker
+from langchain_community.vectorstores import Qdrant
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_core.output_parsers import StrOutputParser
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import START, StateGraph
+from langchain.tools import tool
+from langchain.schema import HumanMessage
+from typing_extensions import List, TypedDict
+from operator import itemgetter
+from langchain.agents import AgentExecutor, create_openai_tools_agent
+from langchain_core.prompts import MessagesPlaceholder
+# Load environment variables
+load_dotenv()
+# Define paths
+UPLOAD_PATH = "upload/"
+OUTPUT_PATH = "output/"
+os.makedirs(UPLOAD_PATH, exist_ok=True)
+os.makedirs(OUTPUT_PATH, exist_ok=True)
+# Initialize embeddings model
+model_id = "Snowflake/snowflake-arctic-embed-m"
+embedding_model = HuggingFaceEmbeddings(model_name=model_id)
+# Define semantic chunker
+semantic_splitter = SemanticChunker(embedding_model)
+# Initialize LLM
+llm = ChatOpenAI(model="gpt-4o-mini")
+# Define RAG prompt
+export_prompt = """
+CONTEXT:
+{context}
+QUERY:
+{question}
+You are a helpful assistant. Use the available context to answer the question.
+Between these two files containing protocols, identify and match **entire assessment sections** based on conceptual similarity. Do NOT match individual questions.
+### **Output Format:**
+Return the response in **valid JSON format** structured as a list of dictionaries, where each dictionary contains:
+[
+    {{
+        "Derived Description": "A short name for the matched concept",
+        "Protocol_1": "Protocol 1 - Matching Element",
+        "Protocol_2": "Protocol 2 - Matching Element"
+    }},
+    ...
+]
+### **Example Output:**
+[
+    {{
+        "Derived Description": "Pain Coping Strategies",
+        "Protocol_1": "Pain Coping Strategy Scale (PCSS-9)",
+        "Protocol_2": "Chronic Pain Adjustment Index (CPAI-10)"
+    }},
+    {{
+        "Derived Description": "Work Stress and Fatigue",
+        "Protocol_1": "Work-Related Stress Scale (WRSS-8)",
+        "Protocol_2": "Occupational Fatigue Index (OFI-7)"
+    }},
+    ...
+]
+### Rules:
+1. Only output **valid JSON** with no explanations, summaries, or markdown formatting.
+2. Ensure each entry in the JSON list represents a single matched data element from the two protocols.
+3. If no matching element is found in a protocol, leave it empty ("").
+4. **Do NOT include headers, explanations, or additional formatting**—only return the raw JSON list.
+5. It should include all the elements in the two protocols.
+6. If it cannot match the element, create the row and include the protocol it did find and put "could not match" in the other protocol column.
+7. protocol should be the between
+"""
+compare_export_prompt = ChatPromptTemplate.from_template(export_prompt)
+QUERY_PROMPT = """
+You are a helpful assistant. Use the available context to answer the question concisely and informatively.
+CONTEXT:
+{context}
+QUERY:
+{question}
+Provide a natural-language response using the given information. If you do not know the answer, say so.
+"""
+query_prompt = ChatPromptTemplate.from_template(QUERY_PROMPT)
+@tool
+def document_query_tool(question: str) -> str:
+    """Retrieves relevant document sections and answers questions based on the uploaded documents."""
+    retriever = cl.user_session.get("qdrant_retriever")
+    if not retriever:
+        return "Error: No documents available for retrieval. Please upload documents first."
+    # Retrieve context from the vector database
+    retrieved_docs = retriever.invoke(question)
+    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
+    # Generate response using the natural query prompt
+    messages = query_prompt.format_messages(question=question, context=docs_content)
+    response = llm.invoke(messages)
+    return {
+        "answer": response.content,
+        "context": retrieved_docs
+    }
+@tool
+def document_comparison_tool(question: str) -> str:
+    """Compares the two uploaded documents, identifies matched elements, exports them as JSON, formats into CSV, and provides a download link."""
+    # Retrieve the vector database retriever
+    retriever = cl.user_session.get("qdrant_retriever")
+    if not retriever:
+        return "Error: No documents available for retrieval. Please upload two PDF files first."
+    # Process query using RAG
+    rag_chain = (
+        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
+        | compare_export_prompt | llm | StrOutputParser()
+    )
+    response_text = rag_chain.invoke({"question": question})
+    # Parse response and save as CSV
+    try:
+        structured_data = json.loads(response_text)
+        if not structured_data:
+            return "Error: No matched elements found."
+        # Define output file path
+        file_path = os.path.join(OUTPUT_PATH, "comparison_results.csv")
+        # Save to CSV
+        df = pd.DataFrame(structured_data, columns=["Derived Description", "Protocol_1", "Protocol_2"])
+        df.to_csv(file_path, index=False)
+        # Store the file path in the user session for later retrieval
+        cl.user_session.set("comparison_file_path", file_path)
+        return "Comparison complete! CSV file has been generated."
+    except json.JSONDecodeError:
+        return "Error: Response is not valid JSON."
+# Define tools for the agent
+tools = [document_query_tool, document_comparison_tool]
+# Set up the agent with a system prompt
+system_prompt = """You are an intelligent document analysis assistant. You have access to two tools:
+1. document_query_tool: Use this when a user wants information or has questions about the content of uploaded documents.
+2. document_comparison_tool: Use this when a user wants to compare elements between two uploaded documents or export comparison results.
+Analyze the user's request carefully to determine which tool is most appropriate.
+"""
+# Create the agent using OpenAI function calling
+agent_prompt = ChatPromptTemplate.from_messages([
+    ("system", system_prompt),
+    MessagesPlaceholder(variable_name="chat_history"),
+    ("human", "{input}"),
+    MessagesPlaceholder(variable_name="agent_scratchpad"),
+])
+agent = create_openai_tools_agent(
+    llm=ChatOpenAI(model="gpt-4o", temperature=0),
+    tools=tools,
+    prompt=agent_prompt
+)
+# Create the agent executor
+agent_executor = AgentExecutor.from_agent_and_tools(
+    agent=agent,
+    tools=tools,
+    verbose=True,
+    handle_parsing_errors=True,
+)
+async def process_files(files: list[cl.File]):
+    documents_with_metadata = []
+    for file in files:
+        file_path = os.path.join(UPLOAD_PATH, file.name)
+        shutil.copyfile(file.path, file_path)
+        loader = PyMuPDFLoader(file_path)
+        documents = loader.load()
+        for doc in documents:
+            source_name = file.name
+            chunks = semantic_splitter.split_text(doc.page_content)
+            for chunk in chunks:
+                doc_chunk = Document(page_content=chunk, metadata={"source": source_name})
+                documents_with_metadata.append(doc_chunk)
+    if documents_with_metadata:
+        qdrant_vectorstore = Qdrant.from_documents(
+            documents_with_metadata,
+            embedding_model,
+            location=":memory:",
+            collection_name="document_comparison",
+        )
+        return qdrant_vectorstore.as_retriever()
+    return None
+@cl.on_chat_start
+async def start():
+    # Initialize chat history for the agent
+    cl.user_session.set("chat_history", [])
+    cl.user_session.set("qdrant_retriever", None)
+    files = await cl.AskFileMessage(
+        content="Please upload **two PDF files** for comparison:",
+        accept=["application/pdf"],
+        max_files=2
+    ).send()
+    if len(files) != 2:
+        await cl.Message("Error: You must upload exactly two PDF files.").send()
+        return
+    with cl.Step("Processing files"):
+        retriever = await process_files(files)
+        if retriever:
+            cl.user_session.set("qdrant_retriever", retriever)
+            await cl.Message("Files uploaded and processed successfully! You can now enter your query.").send()
+        else:
+            await cl.Message("Error: Unable to process files. Please try again.").send()
+@cl.on_message
+async def handle_message(message: cl.Message):
+    # Get chat history
+    chat_history = cl.user_session.get("chat_history", [])
+    # Run the agent
+    with cl.Step("Agent thinking"):
+        response = await cl.make_async(agent_executor.invoke)(
+            {"input": message.content, "chat_history": chat_history}
+        )
+    # Handle the response based on the tool that was called
+    output = response["output"]
+    if isinstance(output, dict) and "answer" in output:
+        # This is from document_query_tool
+        await cl.Message(output["answer"]).send()
+    elif "Comparison complete!" in str(output):
+        # This is from document_comparison_tool
+        file_path = cl.user_session.get("comparison_file_path")
+        if file_path and os.path.exists(file_path):
+            # Read the file content
+            with open(file_path, "rb") as f:
+                file_content = f.read()
+            # Create a File element with the content
+            file_element = cl.File(
+                name="comparison_results.csv",
+                content=file_content,
+                display="inline"
+            )
+            # Send the message with the file element
+            await cl.Message(
+                content="Comparison complete! Download the CSV below:",
+                elements=[file_element],
+            ).send()
+        else:
+            await cl.Message(content=str(output)).send()
+    else:
+        # Generic response
+        await cl.Message(content=str(output)).send()
+    # Update chat history with the new exchange
+    chat_history.extend([
+        HumanMessage(content=message.content),
+        HumanMessage(content=str(output))
+    ])
+    cl.user_session.set("chat_history", chat_history)

comparison_results.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+Derived Description,Protocol_1,Protocol_2
+Memory Recall,I struggle to remember names and faces. (Scale: 0-3),I retain new information effectively.
+Mnemonic Techniques,I practice mnemonic techniques to improve recall. (Scale: 0-3),could not match
+Decision Making Confidence,I feel confident when making important decisions. (Scale: 0-3),I second-guess myself often when making choices. (Scale: 0-3)
+Instinct Trust,I trust my instincts when faced with uncertainty.,could not match

pyproject.toml CHANGED Viewed

@@ -51,7 +51,7 @@ dependencies = [
     #"ragas==0.2.10"
     #"FAISS"
     #remove only used for testing
-    "cohere",
-    "langchain_cohere",
-    "arxiv"
 ]

     #"ragas==0.2.10"
     #"FAISS"
     #remove only used for testing
+    #"cohere",
+    #"langchain_cohere",
+    #"arxiv"
 ]