drewgenai commited on
Commit
8c640e9
·
1 Parent(s): 1f3a006

updated notes and poc of agentic. workign but not downloads

Browse files
03-testembedtune.ipynb CHANGED
@@ -36,7 +36,7 @@
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 3,
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
@@ -49,7 +49,7 @@
49
  },
50
  {
51
  "cell_type": "code",
52
- "execution_count": 4,
53
  "metadata": {},
54
  "outputs": [],
55
  "source": [
@@ -58,13 +58,13 @@
58
  },
59
  {
60
  "cell_type": "code",
61
- "execution_count": 5,
62
  "metadata": {},
63
  "outputs": [
64
  {
65
  "data": {
66
  "application/vnd.jupyter.widget-view+json": {
67
- "model_id": "2098545c1f924b7c85f8b7ca809f6f1a",
68
  "version_major": 2,
69
  "version_minor": 0
70
  },
@@ -74,13 +74,6 @@
74
  },
75
  "metadata": {},
76
  "output_type": "display_data"
77
- },
78
- {
79
- "name": "stderr",
80
- "output_type": "stream",
81
- "text": [
82
- "Token has not been saved to git credential helper.\n"
83
- ]
84
  }
85
  ],
86
  "source": [
@@ -90,7 +83,7 @@
90
  },
91
  {
92
  "cell_type": "code",
93
- "execution_count": 6,
94
  "metadata": {},
95
  "outputs": [
96
  {
@@ -108,7 +101,7 @@
108
  },
109
  {
110
  "cell_type": "code",
111
- "execution_count": 7,
112
  "metadata": {},
113
  "outputs": [
114
  {
@@ -127,7 +120,7 @@
127
  },
128
  {
129
  "cell_type": "code",
130
- "execution_count": 8,
131
  "metadata": {},
132
  "outputs": [],
133
  "source": [
@@ -168,9 +161,18 @@
168
  },
169
  {
170
  "cell_type": "code",
171
- "execution_count": 78,
172
  "metadata": {},
173
- "outputs": [],
 
 
 
 
 
 
 
 
 
174
  "source": [
175
  "\n",
176
  "\n",
@@ -191,7 +193,7 @@
191
  },
192
  {
193
  "cell_type": "code",
194
- "execution_count": 10,
195
  "metadata": {},
196
  "outputs": [],
197
  "source": [
@@ -225,7 +227,7 @@
225
  },
226
  {
227
  "cell_type": "code",
228
- "execution_count": 75,
229
  "metadata": {},
230
  "outputs": [],
231
  "source": [
@@ -255,7 +257,7 @@
255
  },
256
  {
257
  "cell_type": "code",
258
- "execution_count": 76,
259
  "metadata": {},
260
  "outputs": [],
261
  "source": [
@@ -273,7 +275,7 @@
273
  },
274
  {
275
  "cell_type": "code",
276
- "execution_count": 77,
277
  "metadata": {},
278
  "outputs": [
279
  {
@@ -312,7 +314,7 @@
312
  },
313
  {
314
  "cell_type": "code",
315
- "execution_count": 44,
316
  "metadata": {},
317
  "outputs": [],
318
  "source": [
@@ -326,7 +328,7 @@
326
  },
327
  {
328
  "cell_type": "code",
329
- "execution_count": 45,
330
  "metadata": {},
331
  "outputs": [],
332
  "source": [
@@ -350,7 +352,7 @@
350
  },
351
  {
352
  "cell_type": "code",
353
- "execution_count": 46,
354
  "metadata": {},
355
  "outputs": [],
356
  "source": [
@@ -359,7 +361,7 @@
359
  },
360
  {
361
  "cell_type": "code",
362
- "execution_count": 47,
363
  "metadata": {},
364
  "outputs": [],
365
  "source": [
@@ -396,16 +398,16 @@
396
  },
397
  {
398
  "cell_type": "code",
399
- "execution_count": 48,
400
  "metadata": {},
401
  "outputs": [
402
  {
403
  "name": "stderr",
404
  "output_type": "stream",
405
  "text": [
406
- "Processing documents: 100%|██████████| 9/9 [00:02<00:00, 4.44it/s]\n",
407
- "Processing documents: 100%|██████████| 2/2 [00:01<00:00, 1.74it/s]\n",
408
- "Processing documents: 100%|██████████| 3/3 [00:02<00:00, 1.50it/s]\n"
409
  ]
410
  }
411
  ],
@@ -417,7 +419,7 @@
417
  },
418
  {
419
  "cell_type": "code",
420
- "execution_count": 49,
421
  "metadata": {},
422
  "outputs": [],
423
  "source": [
@@ -461,7 +463,7 @@
461
  },
462
  {
463
  "cell_type": "code",
464
- "execution_count": 50,
465
  "metadata": {},
466
  "outputs": [],
467
  "source": [
@@ -470,7 +472,7 @@
470
  },
471
  {
472
  "cell_type": "code",
473
- "execution_count": 51,
474
  "metadata": {},
475
  "outputs": [],
476
  "source": [
@@ -482,7 +484,7 @@
482
  },
483
  {
484
  "cell_type": "code",
485
- "execution_count": 52,
486
  "metadata": {},
487
  "outputs": [],
488
  "source": [
@@ -493,7 +495,7 @@
493
  },
494
  {
495
  "cell_type": "code",
496
- "execution_count": 53,
497
  "metadata": {},
498
  "outputs": [],
499
  "source": [
@@ -502,7 +504,7 @@
502
  },
503
  {
504
  "cell_type": "code",
505
- "execution_count": 54,
506
  "metadata": {},
507
  "outputs": [],
508
  "source": [
@@ -520,7 +522,7 @@
520
  },
521
  {
522
  "cell_type": "code",
523
- "execution_count": 55,
524
  "metadata": {},
525
  "outputs": [],
526
  "source": [
@@ -531,7 +533,7 @@
531
  },
532
  {
533
  "cell_type": "code",
534
- "execution_count": 56,
535
  "metadata": {},
536
  "outputs": [],
537
  "source": [
@@ -546,7 +548,7 @@
546
  },
547
  {
548
  "cell_type": "code",
549
- "execution_count": 57,
550
  "metadata": {},
551
  "outputs": [],
552
  "source": [
@@ -561,7 +563,7 @@
561
  },
562
  {
563
  "cell_type": "code",
564
- "execution_count": 58,
565
  "metadata": {},
566
  "outputs": [],
567
  "source": [
@@ -570,19 +572,19 @@
570
  },
571
  {
572
  "cell_type": "code",
573
- "execution_count": 59,
574
  "metadata": {},
575
  "outputs": [
576
  {
577
  "data": {
578
  "text/html": [
579
- "<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/dummy/dummy/runs/3hjt799n?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>"
580
  ],
581
  "text/plain": [
582
- "<wandb.sdk.wandb_run.Run at 0x749b55325d10>"
583
  ]
584
  },
585
- "execution_count": 59,
586
  "metadata": {},
587
  "output_type": "execute_result"
588
  }
@@ -596,174 +598,9 @@
596
  },
597
  {
598
  "cell_type": "code",
599
- "execution_count": 69,
600
  "metadata": {},
601
- "outputs": [
602
- {
603
- "data": {
604
- "application/vnd.jupyter.widget-view+json": {
605
- "model_id": "400bc1e49a854008a875534a9d3a50d4",
606
- "version_major": 2,
607
- "version_minor": 0
608
- },
609
- "text/plain": [
610
- "Computing widget examples: 0%| | 0/1 [00:00<?, ?example/s]"
611
- ]
612
- },
613
- "metadata": {},
614
- "output_type": "display_data"
615
- },
616
- {
617
- "name": "stderr",
618
- "output_type": "stream",
619
- "text": [
620
- "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n"
621
- ]
622
- },
623
- {
624
- "data": {
625
- "text/html": [
626
- "\n",
627
- " <div>\n",
628
- " \n",
629
- " <progress value='10' max='10' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
630
- " [10/10 00:02, Epoch 5/5]\n",
631
- " </div>\n",
632
- " <table border=\"1\" class=\"dataframe\">\n",
633
- " <thead>\n",
634
- " <tr style=\"text-align: left;\">\n",
635
- " <th>Step</th>\n",
636
- " <th>Training Loss</th>\n",
637
- " <th>Validation Loss</th>\n",
638
- " <th>Cosine Accuracy@1</th>\n",
639
- " <th>Cosine Accuracy@3</th>\n",
640
- " <th>Cosine Accuracy@5</th>\n",
641
- " <th>Cosine Accuracy@10</th>\n",
642
- " <th>Cosine Precision@1</th>\n",
643
- " <th>Cosine Precision@3</th>\n",
644
- " <th>Cosine Precision@5</th>\n",
645
- " <th>Cosine Precision@10</th>\n",
646
- " <th>Cosine Recall@1</th>\n",
647
- " <th>Cosine Recall@3</th>\n",
648
- " <th>Cosine Recall@5</th>\n",
649
- " <th>Cosine Recall@10</th>\n",
650
- " <th>Cosine Ndcg@10</th>\n",
651
- " <th>Cosine Mrr@10</th>\n",
652
- " <th>Cosine Map@100</th>\n",
653
- " </tr>\n",
654
- " </thead>\n",
655
- " <tbody>\n",
656
- " <tr>\n",
657
- " <td>2</td>\n",
658
- " <td>No log</td>\n",
659
- " <td>No log</td>\n",
660
- " <td>0.750000</td>\n",
661
- " <td>1.000000</td>\n",
662
- " <td>1.000000</td>\n",
663
- " <td>1.000000</td>\n",
664
- " <td>0.750000</td>\n",
665
- " <td>0.333333</td>\n",
666
- " <td>0.200000</td>\n",
667
- " <td>0.100000</td>\n",
668
- " <td>0.750000</td>\n",
669
- " <td>1.000000</td>\n",
670
- " <td>1.000000</td>\n",
671
- " <td>1.000000</td>\n",
672
- " <td>0.907732</td>\n",
673
- " <td>0.875000</td>\n",
674
- " <td>0.875000</td>\n",
675
- " </tr>\n",
676
- " <tr>\n",
677
- " <td>4</td>\n",
678
- " <td>No log</td>\n",
679
- " <td>No log</td>\n",
680
- " <td>0.750000</td>\n",
681
- " <td>1.000000</td>\n",
682
- " <td>1.000000</td>\n",
683
- " <td>1.000000</td>\n",
684
- " <td>0.750000</td>\n",
685
- " <td>0.333333</td>\n",
686
- " <td>0.200000</td>\n",
687
- " <td>0.100000</td>\n",
688
- " <td>0.750000</td>\n",
689
- " <td>1.000000</td>\n",
690
- " <td>1.000000</td>\n",
691
- " <td>1.000000</td>\n",
692
- " <td>0.907732</td>\n",
693
- " <td>0.875000</td>\n",
694
- " <td>0.875000</td>\n",
695
- " </tr>\n",
696
- " <tr>\n",
697
- " <td>6</td>\n",
698
- " <td>No log</td>\n",
699
- " <td>No log</td>\n",
700
- " <td>0.750000</td>\n",
701
- " <td>1.000000</td>\n",
702
- " <td>1.000000</td>\n",
703
- " <td>1.000000</td>\n",
704
- " <td>0.750000</td>\n",
705
- " <td>0.333333</td>\n",
706
- " <td>0.200000</td>\n",
707
- " <td>0.100000</td>\n",
708
- " <td>0.750000</td>\n",
709
- " <td>1.000000</td>\n",
710
- " <td>1.000000</td>\n",
711
- " <td>1.000000</td>\n",
712
- " <td>0.907732</td>\n",
713
- " <td>0.875000</td>\n",
714
- " <td>0.875000</td>\n",
715
- " </tr>\n",
716
- " <tr>\n",
717
- " <td>8</td>\n",
718
- " <td>No log</td>\n",
719
- " <td>No log</td>\n",
720
- " <td>0.750000</td>\n",
721
- " <td>1.000000</td>\n",
722
- " <td>1.000000</td>\n",
723
- " <td>1.000000</td>\n",
724
- " <td>0.750000</td>\n",
725
- " <td>0.333333</td>\n",
726
- " <td>0.200000</td>\n",
727
- " <td>0.100000</td>\n",
728
- " <td>0.750000</td>\n",
729
- " <td>1.000000</td>\n",
730
- " <td>1.000000</td>\n",
731
- " <td>1.000000</td>\n",
732
- " <td>0.907732</td>\n",
733
- " <td>0.875000</td>\n",
734
- " <td>0.875000</td>\n",
735
- " </tr>\n",
736
- " <tr>\n",
737
- " <td>10</td>\n",
738
- " <td>No log</td>\n",
739
- " <td>No log</td>\n",
740
- " <td>0.750000</td>\n",
741
- " <td>1.000000</td>\n",
742
- " <td>1.000000</td>\n",
743
- " <td>1.000000</td>\n",
744
- " <td>0.750000</td>\n",
745
- " <td>0.333333</td>\n",
746
- " <td>0.200000</td>\n",
747
- " <td>0.100000</td>\n",
748
- " <td>0.750000</td>\n",
749
- " <td>1.000000</td>\n",
750
- " <td>1.000000</td>\n",
751
- " <td>1.000000</td>\n",
752
- " <td>0.907732</td>\n",
753
- " <td>0.875000</td>\n",
754
- " <td>0.875000</td>\n",
755
- " </tr>\n",
756
- " </tbody>\n",
757
- "</table><p>"
758
- ],
759
- "text/plain": [
760
- "<IPython.core.display.HTML object>"
761
- ]
762
- },
763
- "metadata": {},
764
- "output_type": "display_data"
765
- }
766
- ],
767
  "source": [
768
  "#commented out for now as want to run whole notebook but not retrain\n",
769
  "# warmup_steps = int(len(loader) * EPOCHS * 0.1)\n",
@@ -781,7 +618,7 @@
781
  },
782
  {
783
  "cell_type": "code",
784
- "execution_count": 61,
785
  "metadata": {},
786
  "outputs": [],
787
  "source": [
@@ -791,7 +628,7 @@
791
  },
792
  {
793
  "cell_type": "code",
794
- "execution_count": 62,
795
  "metadata": {},
796
  "outputs": [
797
  {
@@ -816,7 +653,7 @@
816
  },
817
  {
818
  "cell_type": "code",
819
- "execution_count": 93,
820
  "metadata": {},
821
  "outputs": [
822
  {
@@ -826,20 +663,6 @@
826
  "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
827
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
828
  ]
829
- },
830
- {
831
- "ename": "IndexError",
832
- "evalue": "list index out of range",
833
- "output_type": "error",
834
- "traceback": [
835
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
836
- "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
837
- "Cell \u001b[0;32mIn[93], line 17\u001b[0m\n\u001b[1;32m 9\u001b[0m embedding_model \u001b[38;5;241m=\u001b[39m HuggingFaceEmbeddings(model_name\u001b[38;5;241m=\u001b[39mmodel_id)\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# model_id = \"Snowflake/snowflake-arctic-embed-m\"\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# embedding_model = HuggingFaceEmbeddings(model_name=model_id)\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# model_id = \"Snowflake/snowflake-arctic-embed-m-v2.0\"\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m \n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Load documents into Qdrant\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m qdrant_vectorstore \u001b[38;5;241m=\u001b[39m \u001b[43mQdrant\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments_with_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding_model\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m:memory:\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# In-memory for testing\u001b[39;49;00m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdocument_comparison\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# Create a retriever\u001b[39;00m\n\u001b[1;32m 25\u001b[0m qdrant_retriever \u001b[38;5;241m=\u001b[39m qdrant_vectorstore\u001b[38;5;241m.\u001b[39mas_retriever()\n",
838
- "File \u001b[0;32m~/Documents/huggingfacetesting/temptest/.venv/lib/python3.13/site-packages/langchain_core/vectorstores/base.py:852\u001b[0m, in \u001b[0;36mVectorStore.from_documents\u001b[0;34m(cls, documents, embedding, **kwargs)\u001b[0m\n\u001b[1;32m 849\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(ids):\n\u001b[1;32m 850\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mids\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m ids\n\u001b[0;32m--> 852\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_texts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadatas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadatas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
839
- "File \u001b[0;32m~/Documents/huggingfacetesting/temptest/.venv/lib/python3.13/site-packages/langchain_community/vectorstores/qdrant.py:1337\u001b[0m, in \u001b[0;36mQdrant.from_texts\u001b[0;34m(cls, texts, embedding, metadatas, ids, location, url, port, grpc_port, prefer_grpc, https, api_key, prefix, timeout, host, path, collection_name, distance_func, content_payload_key, metadata_payload_key, vector_name, batch_size, shard_number, replication_factor, write_consistency_factor, on_disk_payload, hnsw_config, optimizers_config, wal_config, quantization_config, init_from, on_disk, force_recreate, **kwargs)\u001b[0m\n\u001b[1;32m 1197\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 1198\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mfrom_texts\u001b[39m(\n\u001b[1;32m 1199\u001b[0m \u001b[38;5;28mcls\u001b[39m: Type[Qdrant],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1232\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 1233\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Qdrant:\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Construct Qdrant wrapper from a list of texts.\u001b[39;00m\n\u001b[1;32m 1235\u001b[0m \n\u001b[1;32m 1236\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1335\u001b[0m \u001b[38;5;124;03m qdrant = Qdrant.from_texts(texts, embeddings, \"localhost\")\u001b[39;00m\n\u001b[1;32m 1336\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1337\u001b[0m qdrant \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconstruct_instance\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1338\u001b[0m \u001b[43m \u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1339\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1340\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1341\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1342\u001b[0m \u001b[43m \u001b[49m\u001b[43mport\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1343\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrpc_port\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1344\u001b[0m \u001b[43m \u001b[49m\u001b[43mprefer_grpc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1345\u001b[0m \u001b[43m \u001b[49m\u001b[43mhttps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1346\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1347\u001b[0m \u001b[43m \u001b[49m\u001b[43mprefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1348\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1349\u001b[0m \u001b[43m \u001b[49m\u001b[43mhost\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1350\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1351\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1352\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1353\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent_payload_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1354\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata_payload_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1355\u001b[0m \u001b[43m \u001b[49m\u001b[43mvector_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1356\u001b[0m \u001b[43m \u001b[49m\u001b[43mshard_number\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1357\u001b[0m \u001b[43m \u001b[49m\u001b[43mreplication_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1358\u001b[0m \u001b[43m \u001b[49m\u001b[43mwrite_consistency_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1359\u001b[0m \u001b[43m \u001b[49m\u001b[43mon_disk_payload\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1360\u001b[0m \u001b[43m \u001b[49m\u001b[43mhnsw_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1361\u001b[0m \u001b[43m \u001b[49m\u001b[43moptimizers_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1362\u001b[0m \u001b[43m \u001b[49m\u001b[43mwal_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1363\u001b[0m \u001b[43m \u001b[49m\u001b[43mquantization_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1364\u001b[0m \u001b[43m \u001b[49m\u001b[43minit_from\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1365\u001b[0m \u001b[43m \u001b[49m\u001b[43mon_disk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1366\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_recreate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1367\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1368\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1369\u001b[0m qdrant\u001b[38;5;241m.\u001b[39madd_texts(texts, metadatas, ids, batch_size)\n\u001b[1;32m 1370\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m qdrant\n",
840
- "File \u001b[0;32m~/Documents/huggingfacetesting/temptest/.venv/lib/python3.13/site-packages/langchain_community/vectorstores/qdrant.py:1640\u001b[0m, in \u001b[0;36mQdrant.construct_instance\u001b[0;34m(cls, texts, embedding, location, url, port, grpc_port, prefer_grpc, https, api_key, prefix, timeout, host, path, collection_name, distance_func, content_payload_key, metadata_payload_key, vector_name, shard_number, replication_factor, write_consistency_factor, on_disk_payload, hnsw_config, optimizers_config, wal_config, quantization_config, init_from, on_disk, force_recreate, **kwargs)\u001b[0m\n\u001b[1;32m 1638\u001b[0m \u001b[38;5;66;03m# Just do a single quick embedding to get vector size\u001b[39;00m\n\u001b[1;32m 1639\u001b[0m partial_embeddings \u001b[38;5;241m=\u001b[39m embedding\u001b[38;5;241m.\u001b[39membed_documents(texts[:\u001b[38;5;241m1\u001b[39m])\n\u001b[0;32m-> 1640\u001b[0m vector_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[43mpartial_embeddings\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m)\n\u001b[1;32m 1641\u001b[0m collection_name \u001b[38;5;241m=\u001b[39m collection_name \u001b[38;5;129;01mor\u001b[39;00m uuid\u001b[38;5;241m.\u001b[39muuid4()\u001b[38;5;241m.\u001b[39mhex\n\u001b[1;32m 1642\u001b[0m distance_func \u001b[38;5;241m=\u001b[39m distance_func\u001b[38;5;241m.\u001b[39mupper()\n",
841
- "\u001b[0;31mIndexError\u001b[0m: list index out of range"
842
- ]
843
  }
844
  ],
845
  "source": [
@@ -872,7 +695,7 @@
872
  },
873
  {
874
  "cell_type": "code",
875
- "execution_count": 64,
876
  "metadata": {},
877
  "outputs": [],
878
  "source": [
@@ -924,7 +747,7 @@
924
  },
925
  {
926
  "cell_type": "code",
927
- "execution_count": 65,
928
  "metadata": {},
929
  "outputs": [],
930
  "source": [
@@ -945,7 +768,7 @@
945
  },
946
  {
947
  "cell_type": "code",
948
- "execution_count": 66,
949
  "metadata": {},
950
  "outputs": [
951
  {
@@ -999,7 +822,7 @@
999
  },
1000
  {
1001
  "cell_type": "code",
1002
- "execution_count": 67,
1003
  "metadata": {},
1004
  "outputs": [],
1005
  "source": [
@@ -1008,7 +831,7 @@
1008
  },
1009
  {
1010
  "cell_type": "code",
1011
- "execution_count": 68,
1012
  "metadata": {},
1013
  "outputs": [],
1014
  "source": [
@@ -1031,7 +854,7 @@
1031
  },
1032
  {
1033
  "cell_type": "code",
1034
- "execution_count": 96,
1035
  "metadata": {},
1036
  "outputs": [],
1037
  "source": [
@@ -1042,7 +865,7 @@
1042
  },
1043
  {
1044
  "cell_type": "code",
1045
- "execution_count": 91,
1046
  "metadata": {},
1047
  "outputs": [],
1048
  "source": [
@@ -1065,7 +888,7 @@
1065
  },
1066
  {
1067
  "cell_type": "code",
1068
- "execution_count": 92,
1069
  "metadata": {},
1070
  "outputs": [],
1071
  "source": [
@@ -1084,7 +907,7 @@
1084
  },
1085
  {
1086
  "cell_type": "code",
1087
- "execution_count": 113,
1088
  "metadata": {},
1089
  "outputs": [
1090
  {
@@ -1092,11 +915,15 @@
1092
  "output_type": "stream",
1093
  "text": [
1094
  "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
1095
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
 
 
1096
  ]
1097
  }
1098
  ],
1099
  "source": [
 
 
1100
  "base_model_id = f\"Snowflake/snowflake-arctic-embed-m\" \n",
1101
  "base_embedding_model = HuggingFaceEmbeddings(model_name=base_model_id)\n",
1102
  "\n",
@@ -1109,7 +936,7 @@
1109
  },
1110
  {
1111
  "cell_type": "code",
1112
- "execution_count": 114,
1113
  "metadata": {},
1114
  "outputs": [],
1115
  "source": [
@@ -1152,7 +979,7 @@
1152
  },
1153
  {
1154
  "cell_type": "code",
1155
- "execution_count": null,
1156
  "metadata": {},
1157
  "outputs": [],
1158
  "source": [
@@ -1172,7 +999,7 @@
1172
  },
1173
  {
1174
  "cell_type": "code",
1175
- "execution_count": 100,
1176
  "metadata": {},
1177
  "outputs": [],
1178
  "source": [
@@ -1187,7 +1014,7 @@
1187
  },
1188
  {
1189
  "cell_type": "code",
1190
- "execution_count": 102,
1191
  "metadata": {},
1192
  "outputs": [],
1193
  "source": [
@@ -1200,7 +1027,7 @@
1200
  },
1201
  {
1202
  "cell_type": "code",
1203
- "execution_count": 115,
1204
  "metadata": {},
1205
  "outputs": [],
1206
  "source": [
@@ -1224,7 +1051,7 @@
1224
  },
1225
  {
1226
  "cell_type": "code",
1227
- "execution_count": 103,
1228
  "metadata": {},
1229
  "outputs": [],
1230
  "source": [
@@ -1238,13 +1065,13 @@
1238
  },
1239
  {
1240
  "cell_type": "code",
1241
- "execution_count": 104,
1242
  "metadata": {},
1243
  "outputs": [
1244
  {
1245
  "data": {
1246
  "application/vnd.jupyter.widget-view+json": {
1247
- "model_id": "7c3166b3cd08451a9b2d35c0b73581af",
1248
  "version_major": 2,
1249
  "version_minor": 0
1250
  },
@@ -1258,7 +1085,7 @@
1258
  {
1259
  "data": {
1260
  "application/vnd.jupyter.widget-view+json": {
1261
- "model_id": "84fc7afd0ff04c0e8990cb88b9978867",
1262
  "version_major": 2,
1263
  "version_minor": 0
1264
  },
@@ -1273,13 +1100,13 @@
1273
  "name": "stderr",
1274
  "output_type": "stream",
1275
  "text": [
1276
- "Node 77fa3fd5-0ec7-4864-8a9f-fb6df33f64ec does not have a summary. Skipping filtering.\n"
1277
  ]
1278
  },
1279
  {
1280
  "data": {
1281
  "application/vnd.jupyter.widget-view+json": {
1282
- "model_id": "8e6bcaf303d641fa8c48f3dd8f077771",
1283
  "version_major": 2,
1284
  "version_minor": 0
1285
  },
@@ -1293,7 +1120,7 @@
1293
  {
1294
  "data": {
1295
  "application/vnd.jupyter.widget-view+json": {
1296
- "model_id": "4146d76a8f93496d909b6f56f2b99644",
1297
  "version_major": 2,
1298
  "version_minor": 0
1299
  },
@@ -1307,7 +1134,7 @@
1307
  {
1308
  "data": {
1309
  "application/vnd.jupyter.widget-view+json": {
1310
- "model_id": "7bf10ce73bf04cdf9c8bb81d5134095f",
1311
  "version_major": 2,
1312
  "version_minor": 0
1313
  },
@@ -1321,7 +1148,7 @@
1321
  {
1322
  "data": {
1323
  "application/vnd.jupyter.widget-view+json": {
1324
- "model_id": "8c5a3b61bcb94ab19b0478a95b1b43ad",
1325
  "version_major": 2,
1326
  "version_minor": 0
1327
  },
@@ -1335,7 +1162,7 @@
1335
  {
1336
  "data": {
1337
  "application/vnd.jupyter.widget-view+json": {
1338
- "model_id": "88fb910b941344ea9b2414c3010fad47",
1339
  "version_major": 2,
1340
  "version_minor": 0
1341
  },
@@ -1356,7 +1183,7 @@
1356
  },
1357
  {
1358
  "cell_type": "code",
1359
- "execution_count": 105,
1360
  "metadata": {},
1361
  "outputs": [
1362
  {
@@ -1389,72 +1216,72 @@
1389
  " <tbody>\n",
1390
  " <tr>\n",
1391
  " <th>0</th>\n",
1392
- " <td>How does the Pain Coping Strategy Scale (PCSS-...</td>\n",
1393
  " <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
1394
- " <td>The Pain Coping Strategy Scale (PCSS-9) measur...</td>\n",
1395
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1396
  " </tr>\n",
1397
  " <tr>\n",
1398
  " <th>1</th>\n",
1399
- " <td>Cud yu pleese explane wut the Pain Coping Stra...</td>\n",
1400
  " <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
1401
- " <td>The Pain Coping Strategy Scale (PCSS-9) measur...</td>\n",
1402
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1403
  " </tr>\n",
1404
  " <tr>\n",
1405
  " <th>2</th>\n",
1406
- " <td>Wht is the ERI-9 and how does it relate to emo...</td>\n",
1407
  " <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
1408
- " <td>The Emotional Regulation Index (ERI-9) is ment...</td>\n",
1409
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1410
  " </tr>\n",
1411
  " <tr>\n",
1412
  " <th>3</th>\n",
1413
- " <td>what cognitive load management scale do</td>\n",
1414
  " <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
1415
- " <td>The Cognitive Load Management Scale (CLMS-7) m...</td>\n",
1416
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1417
  " </tr>\n",
1418
  " <tr>\n",
1419
  " <th>4</th>\n",
1420
- " <td>What does the MRI-6 assessment evaluate?</td>\n",
1421
  " <td>[The ERI-9 assesses an individual's ability to...</td>\n",
1422
  " <td>The MRI-6 evaluates short-term and long-term m...</td>\n",
1423
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1424
  " </tr>\n",
1425
  " <tr>\n",
1426
  " <th>5</th>\n",
1427
- " <td>what scm-6 do for social confidence and public...</td>\n",
1428
  " <td>[The ERI-9 assesses an individual's ability to...</td>\n",
1429
- " <td>The SCM-6 evaluates levels of confidence in so...</td>\n",
1430
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1431
  " </tr>\n",
1432
  " <tr>\n",
1433
  " <th>6</th>\n",
1434
- " <td>What does the RDMT-6 assess in terms of cognit...</td>\n",
1435
  " <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
1436
- " <td>The RDMT-6 evaluates logical reasoning and dec...</td>\n",
1437
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1438
  " </tr>\n",
1439
  " <tr>\n",
1440
  " <th>7</th>\n",
1441
- " <td>What does the CPAI-10 assess in individuals wi...</td>\n",
1442
  " <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
1443
- " <td>The CPAI-10 evaluates the strategies people us...</td>\n",
1444
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1445
  " </tr>\n",
1446
  " <tr>\n",
1447
  " <th>8</th>\n",
1448
- " <td>What does the CWT-7 assessment measure in term...</td>\n",
1449
  " <td>[I feel confident when making important decisi...</td>\n",
1450
  " <td>The CWT-7 evaluates an individual's ability to...</td>\n",
1451
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1452
  " </tr>\n",
1453
  " <tr>\n",
1454
  " <th>9</th>\n",
1455
- " <td>What does the Stamina and Endurance Index (SEI...</td>\n",
1456
  " <td>[I feel confident when making important decisi...</td>\n",
1457
- " <td>The Stamina and Endurance Index (SEI-8) measur...</td>\n",
1458
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1459
  " </tr>\n",
1460
  " </tbody>\n",
@@ -1463,16 +1290,16 @@
1463
  ],
1464
  "text/plain": [
1465
  " user_input \\\n",
1466
- "0 How does the Pain Coping Strategy Scale (PCSS-... \n",
1467
- "1 Cud yu pleese explane wut the Pain Coping Stra... \n",
1468
- "2 Wht is the ERI-9 and how does it relate to emo... \n",
1469
- "3 what cognitive load management scale do \n",
1470
- "4 What does the MRI-6 assessment evaluate? \n",
1471
- "5 what scm-6 do for social confidence and public... \n",
1472
- "6 What does the RDMT-6 assess in terms of cognit... \n",
1473
- "7 What does the CPAI-10 assess in individuals wi... \n",
1474
- "8 What does the CWT-7 assessment measure in term... \n",
1475
- "9 What does the Stamina and Endurance Index (SEI... \n",
1476
  "\n",
1477
  " reference_contexts \\\n",
1478
  "0 [Linked Psychological & Physical Assessment\\nP... \n",
@@ -1487,16 +1314,16 @@
1487
  "9 [I feel confident when making important decisi... \n",
1488
  "\n",
1489
  " reference \\\n",
1490
- "0 The Pain Coping Strategy Scale (PCSS-9) measur... \n",
1491
- "1 The Pain Coping Strategy Scale (PCSS-9) measur... \n",
1492
- "2 The Emotional Regulation Index (ERI-9) is ment... \n",
1493
- "3 The Cognitive Load Management Scale (CLMS-7) m... \n",
1494
  "4 The MRI-6 evaluates short-term and long-term m... \n",
1495
- "5 The SCM-6 evaluates levels of confidence in so... \n",
1496
- "6 The RDMT-6 evaluates logical reasoning and dec... \n",
1497
- "7 The CPAI-10 evaluates the strategies people us... \n",
1498
  "8 The CWT-7 evaluates an individual's ability to... \n",
1499
- "9 The Stamina and Endurance Index (SEI-8) measur... \n",
1500
  "\n",
1501
  " synthesizer_name \n",
1502
  "0 single_hop_specifc_query_synthesizer \n",
@@ -1511,7 +1338,7 @@
1511
  "9 single_hop_specifc_query_synthesizer "
1512
  ]
1513
  },
1514
- "execution_count": 105,
1515
  "metadata": {},
1516
  "output_type": "execute_result"
1517
  }
@@ -1529,7 +1356,7 @@
1529
  },
1530
  {
1531
  "cell_type": "code",
1532
- "execution_count": 106,
1533
  "metadata": {},
1534
  "outputs": [],
1535
  "source": [
@@ -1541,7 +1368,7 @@
1541
  },
1542
  {
1543
  "cell_type": "code",
1544
- "execution_count": 107,
1545
  "metadata": {},
1546
  "outputs": [],
1547
  "source": [
@@ -1552,7 +1379,7 @@
1552
  },
1553
  {
1554
  "cell_type": "code",
1555
- "execution_count": 108,
1556
  "metadata": {},
1557
  "outputs": [],
1558
  "source": [
@@ -1563,13 +1390,13 @@
1563
  },
1564
  {
1565
  "cell_type": "code",
1566
- "execution_count": 109,
1567
  "metadata": {},
1568
  "outputs": [
1569
  {
1570
  "data": {
1571
  "application/vnd.jupyter.widget-view+json": {
1572
- "model_id": "57340d6c46c347e19fecdc4490574a8b",
1573
  "version_major": 2,
1574
  "version_minor": 0
1575
  },
@@ -1584,32 +1411,36 @@
1584
  "name": "stderr",
1585
  "output_type": "stream",
1586
  "text": [
1587
- "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28698, Requested 2725. Please try again in 2.846s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1588
- "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29211, Requested 2254. Please try again in 2.93s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1589
- "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29563, Requested 2685. Please try again in 4.496s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1590
- "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29189, Requested 2555. Please try again in 3.488s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1591
- "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29993, Requested 2254. Please try again in 4.494s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1592
- "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29502, Requested 2743. Please try again in 4.49s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1593
- "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28840, Requested 2574. Please try again in 2.828s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1594
- "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29928, Requested 2511. Please try again in 4.878s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1595
- "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29823, Requested 2809. Please try again in 5.264s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1596
- "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29637, Requested 2665. Please try again in 4.604s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1597
- "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29185, Requested 2560. Please try again in 3.49s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1598
- "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29749, Requested 1558. Please try again in 2.614s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1599
- "Exception raised in Job[5]: TimeoutError()\n",
 
1600
  "Exception raised in Job[17]: TimeoutError()\n",
1601
- "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29678, Requested 2514. Please try again in 4.384s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1602
- "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28940, Requested 2499. Please try again in 2.878s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1603
- "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28657, Requested 2254. Please try again in 1.822s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
 
 
 
1604
  ]
1605
  },
1606
  {
1607
  "data": {
1608
  "text/plain": [
1609
- "{'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.1973}"
1610
  ]
1611
  },
1612
- "execution_count": 109,
1613
  "metadata": {},
1614
  "output_type": "execute_result"
1615
  }
@@ -1638,7 +1469,7 @@
1638
  },
1639
  {
1640
  "cell_type": "code",
1641
- "execution_count": 110,
1642
  "metadata": {},
1643
  "outputs": [],
1644
  "source": [
@@ -1650,7 +1481,7 @@
1650
  },
1651
  {
1652
  "cell_type": "code",
1653
- "execution_count": 111,
1654
  "metadata": {},
1655
  "outputs": [],
1656
  "source": [
@@ -1659,13 +1490,13 @@
1659
  },
1660
  {
1661
  "cell_type": "code",
1662
- "execution_count": 112,
1663
  "metadata": {},
1664
  "outputs": [
1665
  {
1666
  "data": {
1667
  "application/vnd.jupyter.widget-view+json": {
1668
- "model_id": "758cb2b2b6df49e88c88b1fca6c09f3c",
1669
  "version_major": 2,
1670
  "version_minor": 0
1671
  },
@@ -1680,34 +1511,36 @@
1680
  "name": "stderr",
1681
  "output_type": "stream",
1682
  "text": [
1683
- "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28950, Requested 2254. Please try again in 2.408s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1684
- "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28949, Requested 2254. Please try again in 2.406s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1685
- "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28567, Requested 2751. Please try again in 2.636s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1686
- "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28831, Requested 2511. Please try again in 2.684s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1687
- "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29242, Requested 2254. Please try again in 2.992s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1688
- "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29683, Requested 2555. Please try again in 4.476s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1689
- "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29672, Requested 1515. Please try again in 2.374s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1690
- "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29901, Requested 2743. Please try again in 5.288s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1691
- "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29651, Requested 2574. Please try again in 4.45s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1692
- "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29659, Requested 2771. Please try again in 4.86s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1693
- "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28744, Requested 2265. Please try again in 2.018s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1694
- "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29754, Requested 2665. Please try again in 4.838s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1695
- "Exception raised in Job[5]: TimeoutError()\n",
1696
- "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29775, Requested 2560. Please try again in 4.67s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1697
  "Exception raised in Job[17]: TimeoutError()\n",
1698
- "Exception raised in Job[23]: TimeoutError()\n",
1699
- "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28967, Requested 2254. Please try again in 2.442s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1700
- "Exception raised in Job[46]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28976, Requested 2250. Please try again in 2.452s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1701
- "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28735, Requested 2499. Please try again in 2.468s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
 
 
1702
  ]
1703
  },
1704
  {
1705
  "data": {
1706
  "text/plain": [
1707
- "{'context_recall': 1.0000, 'faithfulness': 0.8500, 'factual_correctness': 0.7220, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.7917, 'noise_sensitivity_relevant': 0.1111}"
1708
  ]
1709
  },
1710
- "execution_count": 112,
1711
  "metadata": {},
1712
  "output_type": "execute_result"
1713
  }
@@ -1731,7 +1564,7 @@
1731
  },
1732
  {
1733
  "cell_type": "code",
1734
- "execution_count": 116,
1735
  "metadata": {},
1736
  "outputs": [],
1737
  "source": [
@@ -1743,7 +1576,7 @@
1743
  },
1744
  {
1745
  "cell_type": "code",
1746
- "execution_count": 117,
1747
  "metadata": {},
1748
  "outputs": [],
1749
  "source": [
@@ -1752,13 +1585,13 @@
1752
  },
1753
  {
1754
  "cell_type": "code",
1755
- "execution_count": 118,
1756
  "metadata": {},
1757
  "outputs": [
1758
  {
1759
  "data": {
1760
  "application/vnd.jupyter.widget-view+json": {
1761
- "model_id": "a3f59e7e78294492a701763a859d6239",
1762
  "version_major": 2,
1763
  "version_minor": 0
1764
  },
@@ -1773,29 +1606,37 @@
1773
  "name": "stderr",
1774
  "output_type": "stream",
1775
  "text": [
1776
- "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28587, Requested 2574. Please try again in 2.322s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1777
- "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29460, Requested 2782. Please try again in 4.484s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1778
- "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29365, Requested 2991. Please try again in 4.712s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1779
- "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29067, Requested 2826. Please try again in 3.786s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1780
- "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28945, Requested 2968. Please try again in 3.826s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1781
- "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29841, Requested 2525. Please try again in 4.732s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1782
- "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29512, Requested 2895. Please try again in 4.814s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1783
- "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29581, Requested 1650. Please try again in 2.462s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1784
- "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29318, Requested 3175. Please try again in 4.986s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1785
- "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28799, Requested 2525. Please try again in 2.648s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1786
- "Exception raised in Job[5]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29787, Requested 1465. Please try again in 2.504s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1787
- "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29638, Requested 2265. Please try again in 3.805s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1788
- "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29242, Requested 2736. Please try again in 3.956s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1789
- "Exception raised in Job[35]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29647, Requested 1516. Please try again in 2.326s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
 
 
 
 
 
 
 
 
1790
  ]
1791
  },
1792
  {
1793
  "data": {
1794
  "text/plain": [
1795
- "{'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9463, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.3095}"
1796
  ]
1797
  },
1798
- "execution_count": 118,
1799
  "metadata": {},
1800
  "output_type": "execute_result"
1801
  }
 
36
  },
37
  {
38
  "cell_type": "code",
39
+ "execution_count": 2,
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
 
49
  },
50
  {
51
  "cell_type": "code",
52
+ "execution_count": 3,
53
  "metadata": {},
54
  "outputs": [],
55
  "source": [
 
58
  },
59
  {
60
  "cell_type": "code",
61
+ "execution_count": 4,
62
  "metadata": {},
63
  "outputs": [
64
  {
65
  "data": {
66
  "application/vnd.jupyter.widget-view+json": {
67
+ "model_id": "7171fa2fd73446349406e23d4f6b898f",
68
  "version_major": 2,
69
  "version_minor": 0
70
  },
 
74
  },
75
  "metadata": {},
76
  "output_type": "display_data"
 
 
 
 
 
 
 
77
  }
78
  ],
79
  "source": [
 
83
  },
84
  {
85
  "cell_type": "code",
86
+ "execution_count": 5,
87
  "metadata": {},
88
  "outputs": [
89
  {
 
101
  },
102
  {
103
  "cell_type": "code",
104
+ "execution_count": 6,
105
  "metadata": {},
106
  "outputs": [
107
  {
 
120
  },
121
  {
122
  "cell_type": "code",
123
+ "execution_count": 7,
124
  "metadata": {},
125
  "outputs": [],
126
  "source": [
 
161
  },
162
  {
163
  "cell_type": "code",
164
+ "execution_count": 8,
165
  "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "name": "stderr",
169
+ "output_type": "stream",
170
+ "text": [
171
+ "/tmp/ipykernel_5461/2495904805.py:7: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``.\n",
172
+ " embedding_model = HuggingFaceEmbeddings(model_name=model_id)\n"
173
+ ]
174
+ }
175
+ ],
176
  "source": [
177
  "\n",
178
  "\n",
 
193
  },
194
  {
195
  "cell_type": "code",
196
+ "execution_count": 9,
197
  "metadata": {},
198
  "outputs": [],
199
  "source": [
 
227
  },
228
  {
229
  "cell_type": "code",
230
+ "execution_count": 10,
231
  "metadata": {},
232
  "outputs": [],
233
  "source": [
 
257
  },
258
  {
259
  "cell_type": "code",
260
+ "execution_count": 11,
261
  "metadata": {},
262
  "outputs": [],
263
  "source": [
 
275
  },
276
  {
277
  "cell_type": "code",
278
+ "execution_count": 12,
279
  "metadata": {},
280
  "outputs": [
281
  {
 
314
  },
315
  {
316
  "cell_type": "code",
317
+ "execution_count": 13,
318
  "metadata": {},
319
  "outputs": [],
320
  "source": [
 
328
  },
329
  {
330
  "cell_type": "code",
331
+ "execution_count": 14,
332
  "metadata": {},
333
  "outputs": [],
334
  "source": [
 
352
  },
353
  {
354
  "cell_type": "code",
355
+ "execution_count": 15,
356
  "metadata": {},
357
  "outputs": [],
358
  "source": [
 
361
  },
362
  {
363
  "cell_type": "code",
364
+ "execution_count": 16,
365
  "metadata": {},
366
  "outputs": [],
367
  "source": [
 
398
  },
399
  {
400
  "cell_type": "code",
401
+ "execution_count": 17,
402
  "metadata": {},
403
  "outputs": [
404
  {
405
  "name": "stderr",
406
  "output_type": "stream",
407
  "text": [
408
+ "Processing documents: 100%|██████████| 9/9 [00:01<00:00, 7.51it/s]\n",
409
+ "Processing documents: 100%|██████████| 2/2 [00:00<00:00, 2.35it/s]\n",
410
+ "Processing documents: 100%|██████████| 3/3 [00:01<00:00, 2.61it/s]\n"
411
  ]
412
  }
413
  ],
 
419
  },
420
  {
421
  "cell_type": "code",
422
+ "execution_count": 18,
423
  "metadata": {},
424
  "outputs": [],
425
  "source": [
 
463
  },
464
  {
465
  "cell_type": "code",
466
+ "execution_count": 19,
467
  "metadata": {},
468
  "outputs": [],
469
  "source": [
 
472
  },
473
  {
474
  "cell_type": "code",
475
+ "execution_count": 20,
476
  "metadata": {},
477
  "outputs": [],
478
  "source": [
 
484
  },
485
  {
486
  "cell_type": "code",
487
+ "execution_count": 21,
488
  "metadata": {},
489
  "outputs": [],
490
  "source": [
 
495
  },
496
  {
497
  "cell_type": "code",
498
+ "execution_count": 22,
499
  "metadata": {},
500
  "outputs": [],
501
  "source": [
 
504
  },
505
  {
506
  "cell_type": "code",
507
+ "execution_count": 23,
508
  "metadata": {},
509
  "outputs": [],
510
  "source": [
 
522
  },
523
  {
524
  "cell_type": "code",
525
+ "execution_count": 24,
526
  "metadata": {},
527
  "outputs": [],
528
  "source": [
 
533
  },
534
  {
535
  "cell_type": "code",
536
+ "execution_count": 25,
537
  "metadata": {},
538
  "outputs": [],
539
  "source": [
 
548
  },
549
  {
550
  "cell_type": "code",
551
+ "execution_count": 26,
552
  "metadata": {},
553
  "outputs": [],
554
  "source": [
 
563
  },
564
  {
565
  "cell_type": "code",
566
+ "execution_count": 27,
567
  "metadata": {},
568
  "outputs": [],
569
  "source": [
 
572
  },
573
  {
574
  "cell_type": "code",
575
+ "execution_count": 28,
576
  "metadata": {},
577
  "outputs": [
578
  {
579
  "data": {
580
  "text/html": [
581
+ "<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/dummy/dummy/runs/12mf5zrt?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>"
582
  ],
583
  "text/plain": [
584
+ "<wandb.sdk.wandb_run.Run at 0x789c88185940>"
585
  ]
586
  },
587
+ "execution_count": 28,
588
  "metadata": {},
589
  "output_type": "execute_result"
590
  }
 
598
  },
599
  {
600
  "cell_type": "code",
601
+ "execution_count": 29,
602
  "metadata": {},
603
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  "source": [
605
  "#commented out for now as want to run whole notebook but not retrain\n",
606
  "# warmup_steps = int(len(loader) * EPOCHS * 0.1)\n",
 
618
  },
619
  {
620
  "cell_type": "code",
621
+ "execution_count": 30,
622
  "metadata": {},
623
  "outputs": [],
624
  "source": [
 
628
  },
629
  {
630
  "cell_type": "code",
631
+ "execution_count": 31,
632
  "metadata": {},
633
  "outputs": [
634
  {
 
653
  },
654
  {
655
  "cell_type": "code",
656
+ "execution_count": 32,
657
  "metadata": {},
658
  "outputs": [
659
  {
 
663
  "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
664
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
665
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  }
667
  ],
668
  "source": [
 
695
  },
696
  {
697
  "cell_type": "code",
698
+ "execution_count": 33,
699
  "metadata": {},
700
  "outputs": [],
701
  "source": [
 
747
  },
748
  {
749
  "cell_type": "code",
750
+ "execution_count": 34,
751
  "metadata": {},
752
  "outputs": [],
753
  "source": [
 
768
  },
769
  {
770
  "cell_type": "code",
771
+ "execution_count": 35,
772
  "metadata": {},
773
  "outputs": [
774
  {
 
822
  },
823
  {
824
  "cell_type": "code",
825
+ "execution_count": 36,
826
  "metadata": {},
827
  "outputs": [],
828
  "source": [
 
831
  },
832
  {
833
  "cell_type": "code",
834
+ "execution_count": 37,
835
  "metadata": {},
836
  "outputs": [],
837
  "source": [
 
854
  },
855
  {
856
  "cell_type": "code",
857
+ "execution_count": 38,
858
  "metadata": {},
859
  "outputs": [],
860
  "source": [
 
865
  },
866
  {
867
  "cell_type": "code",
868
+ "execution_count": 39,
869
  "metadata": {},
870
  "outputs": [],
871
  "source": [
 
888
  },
889
  {
890
  "cell_type": "code",
891
+ "execution_count": 40,
892
  "metadata": {},
893
  "outputs": [],
894
  "source": [
 
907
  },
908
  {
909
  "cell_type": "code",
910
+ "execution_count": 42,
911
  "metadata": {},
912
  "outputs": [
913
  {
 
915
  "output_type": "stream",
916
  "text": [
917
  "Some weights of BertModel were not initialized from the model checkpoint at drewgenai/midterm-compare-arctic-embed-m-ft and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
918
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
919
+ "/tmp/ipykernel_5461/1883233562.py:10: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import OpenAIEmbeddings``.\n",
920
+ " openai_embedding_model = OpenAIEmbeddings(model=openai_model_id)\n"
921
  ]
922
  }
923
  ],
924
  "source": [
925
+ "from langchain.embeddings import OpenAIEmbeddings\n",
926
+ "\n",
927
  "base_model_id = f\"Snowflake/snowflake-arctic-embed-m\" \n",
928
  "base_embedding_model = HuggingFaceEmbeddings(model_name=base_model_id)\n",
929
  "\n",
 
936
  },
937
  {
938
  "cell_type": "code",
939
+ "execution_count": 43,
940
  "metadata": {},
941
  "outputs": [],
942
  "source": [
 
979
  },
980
  {
981
  "cell_type": "code",
982
+ "execution_count": 44,
983
  "metadata": {},
984
  "outputs": [],
985
  "source": [
 
999
  },
1000
  {
1001
  "cell_type": "code",
1002
+ "execution_count": 45,
1003
  "metadata": {},
1004
  "outputs": [],
1005
  "source": [
 
1014
  },
1015
  {
1016
  "cell_type": "code",
1017
+ "execution_count": 46,
1018
  "metadata": {},
1019
  "outputs": [],
1020
  "source": [
 
1027
  },
1028
  {
1029
  "cell_type": "code",
1030
+ "execution_count": 47,
1031
  "metadata": {},
1032
  "outputs": [],
1033
  "source": [
 
1051
  },
1052
  {
1053
  "cell_type": "code",
1054
+ "execution_count": 48,
1055
  "metadata": {},
1056
  "outputs": [],
1057
  "source": [
 
1065
  },
1066
  {
1067
  "cell_type": "code",
1068
+ "execution_count": 49,
1069
  "metadata": {},
1070
  "outputs": [
1071
  {
1072
  "data": {
1073
  "application/vnd.jupyter.widget-view+json": {
1074
+ "model_id": "4fe18d41fdd74b6fae35ef5380352540",
1075
  "version_major": 2,
1076
  "version_minor": 0
1077
  },
 
1085
  {
1086
  "data": {
1087
  "application/vnd.jupyter.widget-view+json": {
1088
+ "model_id": "23daac967fb44b10becffb30bce2dab4",
1089
  "version_major": 2,
1090
  "version_minor": 0
1091
  },
 
1100
  "name": "stderr",
1101
  "output_type": "stream",
1102
  "text": [
1103
+ "Node 12b49e26-ba22-461f-bf51-3356cf7491b7 does not have a summary. Skipping filtering.\n"
1104
  ]
1105
  },
1106
  {
1107
  "data": {
1108
  "application/vnd.jupyter.widget-view+json": {
1109
+ "model_id": "c4e7298ad4f64930a69aba61314833c7",
1110
  "version_major": 2,
1111
  "version_minor": 0
1112
  },
 
1120
  {
1121
  "data": {
1122
  "application/vnd.jupyter.widget-view+json": {
1123
+ "model_id": "fd5d9de9850e40b9b349fb288bb2cc74",
1124
  "version_major": 2,
1125
  "version_minor": 0
1126
  },
 
1134
  {
1135
  "data": {
1136
  "application/vnd.jupyter.widget-view+json": {
1137
+ "model_id": "1b59d0e922fd4de0a5cf190df7d9d82d",
1138
  "version_major": 2,
1139
  "version_minor": 0
1140
  },
 
1148
  {
1149
  "data": {
1150
  "application/vnd.jupyter.widget-view+json": {
1151
+ "model_id": "8ea429520fc846b9a16dc5e95cad0d5c",
1152
  "version_major": 2,
1153
  "version_minor": 0
1154
  },
 
1162
  {
1163
  "data": {
1164
  "application/vnd.jupyter.widget-view+json": {
1165
+ "model_id": "2a363f1dfdbd48d58a0955176d1ca934",
1166
  "version_major": 2,
1167
  "version_minor": 0
1168
  },
 
1183
  },
1184
  {
1185
  "cell_type": "code",
1186
+ "execution_count": 50,
1187
  "metadata": {},
1188
  "outputs": [
1189
  {
 
1216
  " <tbody>\n",
1217
  " <tr>\n",
1218
  " <th>0</th>\n",
1219
+ " <td>What does the Decision-Making Confidence Scale...</td>\n",
1220
  " <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
1221
+ " <td>The Decision-Making Confidence Scale (DMCS-6) ...</td>\n",
1222
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1223
  " </tr>\n",
1224
  " <tr>\n",
1225
  " <th>1</th>\n",
1226
+ " <td>Wht is the Work-Related Stress Scale and how d...</td>\n",
1227
  " <td>[Linked Psychological &amp; Physical Assessment\\nP...</td>\n",
1228
+ " <td>The Work-Related Stress Scale (WRSS-8) evaluat...</td>\n",
1229
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1230
  " </tr>\n",
1231
  " <tr>\n",
1232
  " <th>2</th>\n",
1233
+ " <td>what cognitive load management scale do, how i...</td>\n",
1234
  " <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
1235
+ " <td>The Cognitive Load Management Scale (CLMS-7) m...</td>\n",
1236
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1237
  " </tr>\n",
1238
  " <tr>\n",
1239
  " <th>3</th>\n",
1240
+ " <td>What is the purpose of the Emotional Regulatio...</td>\n",
1241
  " <td>[Financial Stress Index (FSI-6)\\nThe FSI-6 eva...</td>\n",
1242
+ " <td>The context does not provide specific informat...</td>\n",
1243
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1244
  " </tr>\n",
1245
  " <tr>\n",
1246
  " <th>4</th>\n",
1247
+ " <td>What does the MRI-6 assess?</td>\n",
1248
  " <td>[The ERI-9 assesses an individual's ability to...</td>\n",
1249
  " <td>The MRI-6 evaluates short-term and long-term m...</td>\n",
1250
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1251
  " </tr>\n",
1252
  " <tr>\n",
1253
  " <th>5</th>\n",
1254
+ " <td>What does the Social Confidence Measure (SCM-6...</td>\n",
1255
  " <td>[The ERI-9 assesses an individual's ability to...</td>\n",
1256
+ " <td>The Social Confidence Measure (SCM-6) evaluate...</td>\n",
1257
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1258
  " </tr>\n",
1259
  " <tr>\n",
1260
  " <th>6</th>\n",
1261
+ " <td>What OFI-7 do?</td>\n",
1262
  " <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
1263
+ " <td>The OFI-7 assesses work-related exhaustion and...</td>\n",
1264
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1265
  " </tr>\n",
1266
  " <tr>\n",
1267
  " <th>7</th>\n",
1268
+ " <td>Cud yu pleese explane wut the Chronic Pain Adj...</td>\n",
1269
  " <td>[Linked Psychological &amp; Physical Assessment\\nC...</td>\n",
1270
+ " <td>The Chronic Pain Adjustment Index (CPAI-10) ev...</td>\n",
1271
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1272
  " </tr>\n",
1273
  " <tr>\n",
1274
  " <th>8</th>\n",
1275
+ " <td>What CWT-7 do?</td>\n",
1276
  " <td>[I feel confident when making important decisi...</td>\n",
1277
  " <td>The CWT-7 evaluates an individual's ability to...</td>\n",
1278
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1279
  " </tr>\n",
1280
  " <tr>\n",
1281
  " <th>9</th>\n",
1282
+ " <td>Cud yu pleese explane how the COGNITIVE Worklo...</td>\n",
1283
  " <td>[I feel confident when making important decisi...</td>\n",
1284
+ " <td>The Cognitive Workload Tolerance (CWT-7) evalu...</td>\n",
1285
  " <td>single_hop_specifc_query_synthesizer</td>\n",
1286
  " </tr>\n",
1287
  " </tbody>\n",
 
1290
  ],
1291
  "text/plain": [
1292
  " user_input \\\n",
1293
+ "0 What does the Decision-Making Confidence Scale... \n",
1294
+ "1 Wht is the Work-Related Stress Scale and how d... \n",
1295
+ "2 what cognitive load management scale do, how i... \n",
1296
+ "3 What is the purpose of the Emotional Regulatio... \n",
1297
+ "4 What does the MRI-6 assess? \n",
1298
+ "5 What does the Social Confidence Measure (SCM-6... \n",
1299
+ "6 What OFI-7 do? \n",
1300
+ "7 Cud yu pleese explane wut the Chronic Pain Adj... \n",
1301
+ "8 What CWT-7 do? \n",
1302
+ "9 Cud yu pleese explane how the COGNITIVE Worklo... \n",
1303
  "\n",
1304
  " reference_contexts \\\n",
1305
  "0 [Linked Psychological & Physical Assessment\\nP... \n",
 
1314
  "9 [I feel confident when making important decisi... \n",
1315
  "\n",
1316
  " reference \\\n",
1317
+ "0 The Decision-Making Confidence Scale (DMCS-6) ... \n",
1318
+ "1 The Work-Related Stress Scale (WRSS-8) evaluat... \n",
1319
+ "2 The Cognitive Load Management Scale (CLMS-7) m... \n",
1320
+ "3 The context does not provide specific informat... \n",
1321
  "4 The MRI-6 evaluates short-term and long-term m... \n",
1322
+ "5 The Social Confidence Measure (SCM-6) evaluate... \n",
1323
+ "6 The OFI-7 assesses work-related exhaustion and... \n",
1324
+ "7 The Chronic Pain Adjustment Index (CPAI-10) ev... \n",
1325
  "8 The CWT-7 evaluates an individual's ability to... \n",
1326
+ "9 The Cognitive Workload Tolerance (CWT-7) evalu... \n",
1327
  "\n",
1328
  " synthesizer_name \n",
1329
  "0 single_hop_specifc_query_synthesizer \n",
 
1338
  "9 single_hop_specifc_query_synthesizer "
1339
  ]
1340
  },
1341
+ "execution_count": 50,
1342
  "metadata": {},
1343
  "output_type": "execute_result"
1344
  }
 
1356
  },
1357
  {
1358
  "cell_type": "code",
1359
+ "execution_count": 51,
1360
  "metadata": {},
1361
  "outputs": [],
1362
  "source": [
 
1368
  },
1369
  {
1370
  "cell_type": "code",
1371
+ "execution_count": 52,
1372
  "metadata": {},
1373
  "outputs": [],
1374
  "source": [
 
1379
  },
1380
  {
1381
  "cell_type": "code",
1382
+ "execution_count": 53,
1383
  "metadata": {},
1384
  "outputs": [],
1385
  "source": [
 
1390
  },
1391
  {
1392
  "cell_type": "code",
1393
+ "execution_count": 54,
1394
  "metadata": {},
1395
  "outputs": [
1396
  {
1397
  "data": {
1398
  "application/vnd.jupyter.widget-view+json": {
1399
+ "model_id": "a098ed85762d4bbcb3983956c8e4d3e6",
1400
  "version_major": 2,
1401
  "version_minor": 0
1402
  },
 
1411
  "name": "stderr",
1412
  "output_type": "stream",
1413
  "text": [
1414
+ "Exception raised in Job[18]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29041, Requested 2597. Please try again in 3.276s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1415
+ "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29301, Requested 2254. Please try again in 3.11s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1416
+ "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29177, Requested 2714. Please try again in 3.782s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1417
+ "Exception raised in Job[4]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28995, Requested 2265. Please try again in 2.52s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1418
+ "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29045, Requested 2522. Please try again in 3.134s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1419
+ "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28880, Requested 2546. Please try again in 2.852s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1420
+ "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29400, Requested 2254. Please try again in 3.308s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1421
+ "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29047, Requested 2697. Please try again in 3.488s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1422
+ "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28801, Requested 2551. Please try again in 2.704s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1423
+ "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28962, Requested 2254. Please try again in 2.432s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1424
+ "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28955, Requested 2505. Please try again in 2.92s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1425
+ "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29121, Requested 2265. Please try again in 2.772s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1426
+ "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28650, Requested 2556. Please try again in 2.412s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1427
+ "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28712, Requested 2534. Please try again in 2.492s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1428
  "Exception raised in Job[17]: TimeoutError()\n",
1429
+ "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29375, Requested 2581. Please try again in 3.911s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1430
+ "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28555, Requested 2742. Please try again in 2.594s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1431
+ "Exception raised in Job[46]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29375, Requested 2250. Please try again in 3.25s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1432
+ "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29255, Requested 2254. Please try again in 3.018s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1433
+ "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28627, Requested 2773. Please try again in 2.8s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1434
+ "Exception raised in Job[42]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 27737, Requested 2679. Please try again in 832ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
1435
  ]
1436
  },
1437
  {
1438
  "data": {
1439
  "text/plain": [
1440
+ "{'context_recall': 1.0000, 'faithfulness': 0.7778, 'factual_correctness': 0.7140, 'answer_relevancy': 0.9405, 'context_entity_recall': 1.0000, 'noise_sensitivity_relevant': 0.1312}"
1441
  ]
1442
  },
1443
+ "execution_count": 54,
1444
  "metadata": {},
1445
  "output_type": "execute_result"
1446
  }
 
1469
  },
1470
  {
1471
  "cell_type": "code",
1472
+ "execution_count": 55,
1473
  "metadata": {},
1474
  "outputs": [],
1475
  "source": [
 
1481
  },
1482
  {
1483
  "cell_type": "code",
1484
+ "execution_count": 56,
1485
  "metadata": {},
1486
  "outputs": [],
1487
  "source": [
 
1490
  },
1491
  {
1492
  "cell_type": "code",
1493
+ "execution_count": 57,
1494
  "metadata": {},
1495
  "outputs": [
1496
  {
1497
  "data": {
1498
  "application/vnd.jupyter.widget-view+json": {
1499
+ "model_id": "81a8a18f913545438fda4bde69dd52e1",
1500
  "version_major": 2,
1501
  "version_minor": 0
1502
  },
 
1511
  "name": "stderr",
1512
  "output_type": "stream",
1513
  "text": [
1514
+ "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28993, Requested 2254. Please try again in 2.494s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1515
+ "Exception raised in Job[18]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 27957, Requested 2597. Please try again in 1.108s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1516
+ "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29387, Requested 2528. Please try again in 3.83s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1517
+ "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29351, Requested 2546. Please try again in 3.794s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1518
+ "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29695, Requested 2714. Please try again in 4.818s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1519
+ "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29801, Requested 2827. Please try again in 5.256s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1520
+ "Exception raised in Job[11]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29741, Requested 1449. Please try again in 2.38s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1521
+ "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29010, Requested 2254. Please try again in 2.528s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1522
+ "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28990, Requested 2254. Please try again in 2.488s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1523
+ "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29910, Requested 2551. Please try again in 4.922s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1524
+ "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29316, Requested 2505. Please try again in 3.642s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1525
+ "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29718, Requested 2581. Please try again in 4.598s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1526
+ "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28772, Requested 2534. Please try again in 2.612s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1527
+ "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29229, Requested 2556. Please try again in 3.57s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1528
  "Exception raised in Job[17]: TimeoutError()\n",
1529
+ "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29563, Requested 2254. Please try again in 3.634s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1530
+ "Exception raised in Job[46]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29198, Requested 2250. Please try again in 2.896s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1531
+ "Exception raised in Job[49]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29640, Requested 2517. Please try again in 4.314s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1532
+ "Exception raised in Job[42]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29517, Requested 2679. Please try again in 4.392s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1533
+ "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29382, Requested 2778. Please try again in 4.32s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1534
+ "Exception raised in Job[37]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28917, Requested 2742. Please try again in 3.318s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
1535
  ]
1536
  },
1537
  {
1538
  "data": {
1539
  "text/plain": [
1540
+ "{'context_recall': 1.0000, 'faithfulness': 0.6000, 'factual_correctness': 0.7210, 'answer_relevancy': 0.9423, 'context_entity_recall': 1.0000, 'noise_sensitivity_relevant': 0.0781}"
1541
  ]
1542
  },
1543
+ "execution_count": 57,
1544
  "metadata": {},
1545
  "output_type": "execute_result"
1546
  }
 
1564
  },
1565
  {
1566
  "cell_type": "code",
1567
+ "execution_count": 58,
1568
  "metadata": {},
1569
  "outputs": [],
1570
  "source": [
 
1576
  },
1577
  {
1578
  "cell_type": "code",
1579
+ "execution_count": 59,
1580
  "metadata": {},
1581
  "outputs": [],
1582
  "source": [
 
1585
  },
1586
  {
1587
  "cell_type": "code",
1588
+ "execution_count": 60,
1589
  "metadata": {},
1590
  "outputs": [
1591
  {
1592
  "data": {
1593
  "application/vnd.jupyter.widget-view+json": {
1594
+ "model_id": "f97492909bd740eca783ffca7d420bab",
1595
  "version_major": 2,
1596
  "version_minor": 0
1597
  },
 
1606
  "name": "stderr",
1607
  "output_type": "stream",
1608
  "text": [
1609
+ "Exception raised in Job[10]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28079, Requested 2525. Please try again in 1.208s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1610
+ "Exception raised in Job[16]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28779, Requested 2525. Please try again in 2.608s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1611
+ "Exception raised in Job[1]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29859, Requested 2806. Please try again in 5.33s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1612
+ "Exception raised in Job[24]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28465, Requested 2822. Please try again in 2.574s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1613
+ "Exception raised in Job[13]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28948, Requested 2976. Please try again in 3.848s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1614
+ "Exception raised in Job[22]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29305, Requested 2525. Please try again in 3.66s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1615
+ "Exception raised in Job[18]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29595, Requested 2868. Please try again in 4.926s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1616
+ "Exception raised in Job[28]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28680, Requested 2525. Please try again in 2.41s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1617
+ "Exception raised in Job[25]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28993, Requested 2776. Please try again in 3.538s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1618
+ "Exception raised in Job[19]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29483, Requested 2793. Please try again in 4.552s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1619
+ "Exception raised in Job[7]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28630, Requested 3159. Please try again in 3.578s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1620
+ "Exception raised in Job[31]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29303, Requested 2794. Please try again in 4.194s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1621
+ "Exception raised in Job[11]: TimeoutError()\n",
1622
+ "Exception raised in Job[17]: TimeoutError()\n",
1623
+ "Exception raised in Job[30]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28351, Requested 2840. Please try again in 2.382s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1624
+ "Exception raised in Job[34]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28991, Requested 2525. Please try again in 3.032s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1625
+ "Exception raised in Job[42]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29445, Requested 2953. Please try again in 4.796s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1626
+ "Exception raised in Job[36]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 28683, Requested 2827. Please try again in 3.02s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1627
+ "Exception raised in Job[43]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29750, Requested 3159. Please try again in 5.818s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1628
+ "Exception raised in Job[40]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29070, Requested 2525. Please try again in 3.19s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1629
+ "Exception raised in Job[48]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29213, Requested 2824. Please try again in 4.074s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n",
1630
+ "Exception raised in Job[49]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-TU5fm55zJrncrgPcg3lg23B6 on tokens per min (TPM): Limit 30000, Used 29082, Requested 2788. Please try again in 3.74s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})\n"
1631
  ]
1632
  },
1633
  {
1634
  "data": {
1635
  "text/plain": [
1636
+ "{'context_recall': 1.0000, 'faithfulness': 0.6000, 'factual_correctness': 0.7180, 'answer_relevancy': 0.9393, 'context_entity_recall': 1.0000, 'noise_sensitivity_relevant': 0.0781}"
1637
  ]
1638
  },
1639
+ "execution_count": 60,
1640
  "metadata": {},
1641
  "output_type": "execute_result"
1642
  }
READMEresponses.md CHANGED
@@ -1,11 +1,96 @@
1
  ## Task 1: Defining your Problem and Audience
2
-
 
 
 
3
 
4
  ## Task 2: Propose a Solution
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  ## Task 3: Dealing with the Data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
 
@@ -16,10 +101,21 @@ https://huggingface.co/spaces/drewgenai/midterm_poc
16
 
17
  ## Task 5: Creating a Golden Test Data Set
18
 
19
-
20
  The dataset is based on the submitted documents and the base model performed well across all metrics.
21
 
22
- Base model evaluation {'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.1973}
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  ## Task 6: Fine-Tuning Open-Source Embeddings
@@ -34,15 +130,35 @@ https://huggingface.co/drewgenai/midterm-compare-arctic-embed-m-ft
34
 
35
  I ran the RAGAS evaluation on the finetuned model and the openai model as well.
36
 
37
- The finetuned model performed well across all metrics as well but not quite as well as the base Snowflake/snowflake-arctic-embed-m model where it didn't perform as well in context recall, but slightly in noise sensitivity.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- The openai model performed well across all metrics but not as well as the base Snowflake/snowflake-arctic-embed-m model, but with slightly worse noise sensitivity.
40
 
41
- Finetuned model {'context_recall': 1.0000, 'faithfulness': 0.8500, 'factual_correctness': 0.7220, 'answer_relevancy': 0.9481, 'context_entity_recall': 0.7917, 'noise_sensitivity_relevant': 0.1111}
42
 
43
- Openai model {'context_recall': 1.0000, 'faithfulness': 1.0000, 'factual_correctness': 0.7540, 'answer_relevancy': 0.9463, 'context_entity_recall': 0.8095, 'noise_sensitivity_relevant': 0.3095}
44
 
45
- With the results as they are using the Snowflake/snowflake-arctic-embed-m model makes sense for this use case.
46
 
47
 
48
  ## Final Submission
 
1
  ## Task 1: Defining your Problem and Audience
2
+ ### Problem Statement
3
+ Researchers at Studies Inc. must manually review multiple protocols, a time-consuming and complex process that requires scanning, extracting, and analyzing detailed information to identify patterns between studies.
4
+ ### Why This is a Problem for Our Users
5
+ Researchers are spending an excessive amount of time manually reviewing study protocols to standardize data across multiple studies. Each protocol contains detailed and sometimes inconsistent information, making it difficult to quickly extract relevant data and compare it across different studies. The manual nature of this process increases the likelihood of human error, slows down research progress, and diverts time away from higher-value analytical work. Additionally, as the number of studies grows, this problem scales, further straining resources and delaying critical insights needed for scientific advancements.
6
 
7
  ## Task 2: Propose a Solution
8
 
9
+ The solution is to build an automated system that streamlines the process of reviewing and extracting key data from research protocols. By leveraging a combination of LLM-based retrieval-augmented generation (RAG) and structured orchestration, the system will allow researchers to quickly scan, extract, and compare relevant information across multiple protocols, significantly reducing manual effort and improving standardization.
10
+
11
+ Instead of using agents in the initial prototype, the solution will employ LangChain tools for structured execution, ensuring modular and predictable workflows. Chainlit will provide an event-driven UI that dynamically triggers relevant processing steps based on user interactions. This setup ensures efficiency, flexibility, and the ability to scale as more protocols and datasets are introduced.
12
+
13
+ While the initial implementation will rely on structured tool execution, future iterations or final builds may incorporate agentic reasoning to enhance adaptability. Agents could be employed for intelligent workflow management, dynamically selecting tools based on protocol complexity or user preferences. This could enable more sophisticated multi-step reasoning, where an agent determines the best extraction and comparison approach based on the type of research being conducted, but it is not required at this time with just two local documents.
14
+
15
+ We will build with the following stack:
16
+
17
+ - **LLM**: gpt-4o-mini
18
+ We will leverage a closed-source model because the entire application is constructed using open-source data from public protocols
19
+ - **Embedding Models**: text-embedding-3-small, snowflake-arctic-embed-m
20
+ We will construct a quick prototype using our closed-source OpenAI embedding model, and then we will fine-tune an off-the-shelf embedding model from Snowflake. We want to demonstrate to leadership that it’s important to be able to work with open-source models and see that they can be as performant as closed-source while providing privacy benefits, especially after fine-tuning.*
21
+ - **Orchestration**: Langchain tools and tool based execution
22
+ The application will orchestrate workflow execution using a structured, tools-based approach. The system will ensure predictable and efficient processing by defining modular tools (such as document comparison and retrieval) and explicitly invoking them in response to user input. Chainlit serves as the event-driven interface, managing tool execution based on user interactions, while LangChain tools handle document retrieval, RAG processing, and structured data export. This approach will provide flexibility and modularity.
23
+ - **Vector Store**: Qdrant
24
+ It will be more than fast enough for our current data needs while maintaining low latency. Overall, it’s a solid, reliable choice, and we can take advantage of the fully open-source version at no cost and host it in memory running within our hugging face space.
25
+ - **Evaluation**: RAGAS
26
+ RAGAS has been a leader in the AI evaluation space for years. We’re particularly interested in leveraging their RAG assessment metrics to test the performance of our embedding models.
27
+ - **User Interface**: Chainlit
28
+ A lightweight, Python-based UIframework designed specifically for LLM applications. It allows us to quickly prototype and deploy conversational interfaces with minimal front-end effort while maintaining flexibility for customization.
29
+ - **Inference & Serving**: Hugging Face
30
+ We will leverage Hugging Face as a platform to serve up our application to users because it’s very fast (1-click deployment status) and very cheap to host. Additionally, we will use Hugging Face to pull embedding models that we will test finetuning and host in the event it is needed.
31
 
32
 
33
  ## Task 3: Dealing with the Data
34
+ ### Data Sources and External APIs
35
+ Our system will leverage multiple data sources and external APIs to extract, process, and analyze research protocols effectively. Below are the key sources and their roles in our application:
36
+
37
+ #### Uploaded Protocol Documents (PDFs)
38
+ - Researchers will upload research protocol documents in **PDF format**.
39
+ - These documents will be processed using **PyMuPDFLoader**, extracted as text, and chunked for embedding storage.
40
+ - We use **Qdrant** as the vector database to store document chunks for retrieval.
41
+
42
+ **Why?**
43
+ Protocols contain structured and semi-structured data critical for comparison. By storing them in a vector database, we enable **semantic search and retrieval** for streamlined analysis.
44
+
45
+ ---
46
+
47
+ #### Hugging Face API
48
+ - Used to access **Snowflake Arctic embedding models** for text embedding and potential fine-tuning.
49
+
50
+ **Why?**
51
+ We aim to compare the performance of closed-source OpenAI embeddings with open-source models for **privacy, scalability, and long-term flexibility**.
52
+
53
+ ---
54
+
55
+ #### OpenAI API (GPT-4o-mini)
56
+ - Used for generating **structured comparisons** between protocols.
57
+
58
+ **Why?**
59
+ The LLM will process retrieved document chunks and generate **natural-language comparisons** and **structured JSON outputs**.
60
+
61
+ ---
62
+
63
+ #### LangChain Tool Execution
64
+ - We use **LangChain’s tools-based execution** for structured retrieval and document comparison.
65
+
66
+ **Why?**
67
+ Instead of an agentic approach, **explicit tool execution** ensures predictable and modular processing.
68
+ Our priority at this time is **reducing API calls** while maintaining efficiency.
69
+
70
+ ---
71
+
72
+ #### Chunking Strategy
73
+ - We will use a **semantic-based chunking strategy** with LangChain’s **SemanticChunker**, powered by **Snowflake Arctic embeddings**.
74
+
75
+ **Why this approach?**
76
+ Traditional **fixed-size** or **recursive character-splitting** methods often break up conceptually linked sections.
77
+ Semantic chunking ensures that **chunks maintain their conceptual integrity**, improving retrieval quality.
78
+
79
+ This method is especially useful for research protocols, where meaningful sections (e.g., **assessments, methodologies**) must remain intact for comparison.
80
+
81
+ As we refine the system, we may adjust chunk sizes based on **real user queries and retrieval performance** to optimize information density and response accuracy.
82
+
83
+ ---
84
+
85
+ ### Additional Data Needs
86
+ At present, our application focuses on **structured comparisons** between protocols. However, in future iterations, we may integrate additional data sources such as:
87
+
88
+ - **Metadata from Public Research Repositories** (e.g., PubMed, ArXiv API)
89
+ → To enrich protocol comparisons with **relevant external research**.
90
+ - **Institutional Databases** (if access is provided)
91
+ → To validate protocol **consistency across multi-site studies**.
92
+
93
+ While the current system is **not agentic**, we may explore **agent-based reasoning** in future versions to dynamically adjust retrieval and processing strategies based on protocol complexity. At this time all the information we need is in the provided local documents.
94
 
95
 
96
 
 
101
 
102
  ## Task 5: Creating a Golden Test Data Set
103
 
 
104
  The dataset is based on the submitted documents and the base model performed well across all metrics.
105
 
106
+ The base model is the Snowflake/snowflake-arctic-embed-m model.
107
+
108
+ ### Base model evaluation
109
+ | Metric | Value |
110
+ |-------------------------------|--------|
111
+ | Context Recall | 1.0000 |
112
+ | Faithfulness | 1.0000 |
113
+ | Factual Correctness | 0.7540 |
114
+ | Answer Relevancy | 0.9481 |
115
+ | Context Entity Recall | 0.8095 |
116
+ | Noise Sensitivity Relevant | 0.1973 |
117
+
118
+
119
 
120
 
121
  ## Task 6: Fine-Tuning Open-Source Embeddings
 
130
 
131
  I ran the RAGAS evaluation on the finetuned model and the openai model as well.
132
 
133
+ The finetuned model performed well across all metrics as well but not quite as well as the base Snowflake/snowflake-arctic-embed-m model where it didn't perform as well in context recall, but slightly in noise sensitivity.
134
+
135
+ The openai model performed well across all metrics but not as well as the base Snowflake/snowflake-arctic-embed-m model, but with slightly worse noise sensitivity.
136
+
137
+ ### Finetuned Model Evaluation
138
+ | Metric | Value |
139
+ |-------------------------------|--------|
140
+ | Context Recall | 1.0000 |
141
+ | Faithfulness | 0.8500 |
142
+ | Factual Correctness | 0.7220 |
143
+ | Answer Relevancy | 0.9481 |
144
+ | Context Entity Recall | 0.7917 |
145
+ | Noise Sensitivity Relevant | 0.1111 |
146
+
147
+ ### OpenAI Model Evaluation
148
+ | Metric | Value |
149
+ |-------------------------------|--------|
150
+ | Context Recall | 1.0000 |
151
+ | Faithfulness | 1.0000 |
152
+ | Factual Correctness | 0.7540 |
153
+ | Answer Relevancy | 0.9463 |
154
+ | Context Entity Recall | 0.8095 |
155
+ | Noise Sensitivity Relevant | 0.3095 |
156
 
 
157
 
 
158
 
159
+ The base model is the best performing model for the current use case.
160
 
161
+ In the second half of the course I would like to explore having the application look for external standards to compare the protocols to further help with the comparison process. I would also like it to evaluate the file type and use reasoning to determine the best approach to extracting the information and potentially accept more than 2 files.
162
 
163
 
164
  ## Final Submission
app_working_on_agentic.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import json
4
+ import pandas as pd
5
+ import chainlit as cl
6
+ from dotenv import load_dotenv
7
+ from langchain_core.documents import Document
8
+ from langchain_community.document_loaders import PyMuPDFLoader
9
+ from langchain_experimental.text_splitter import SemanticChunker
10
+ from langchain_community.vectorstores import Qdrant
11
+ from langchain_community.embeddings import HuggingFaceEmbeddings
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain_core.prompts import ChatPromptTemplate
15
+ from langgraph.graph import START, StateGraph
16
+ from langchain.tools import tool
17
+ from langchain.schema import HumanMessage
18
+ from typing_extensions import List, TypedDict
19
+ from operator import itemgetter
20
+ from langchain.agents import AgentExecutor, create_openai_tools_agent
21
+ from langchain_core.prompts import MessagesPlaceholder
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ # Define paths
27
+ UPLOAD_PATH = "upload/"
28
+ OUTPUT_PATH = "output/"
29
+ os.makedirs(UPLOAD_PATH, exist_ok=True)
30
+ os.makedirs(OUTPUT_PATH, exist_ok=True)
31
+
32
+ # Initialize embeddings model
33
+ model_id = "Snowflake/snowflake-arctic-embed-m"
34
+ embedding_model = HuggingFaceEmbeddings(model_name=model_id)
35
+
36
+ # Define semantic chunker
37
+ semantic_splitter = SemanticChunker(embedding_model)
38
+
39
+ # Initialize LLM
40
+ llm = ChatOpenAI(model="gpt-4o-mini")
41
+
42
+ # Define RAG prompt
43
+ export_prompt = """
44
+ CONTEXT:
45
+ {context}
46
+
47
+ QUERY:
48
+ {question}
49
+
50
+ You are a helpful assistant. Use the available context to answer the question.
51
+
52
+ Between these two files containing protocols, identify and match **entire assessment sections** based on conceptual similarity. Do NOT match individual questions.
53
+
54
+ ### **Output Format:**
55
+ Return the response in **valid JSON format** structured as a list of dictionaries, where each dictionary contains:
56
+ [
57
+ {{
58
+ "Derived Description": "A short name for the matched concept",
59
+ "Protocol_1": "Protocol 1 - Matching Element",
60
+ "Protocol_2": "Protocol 2 - Matching Element"
61
+ }},
62
+ ...
63
+ ]
64
+ ### **Example Output:**
65
+ [
66
+ {{
67
+ "Derived Description": "Pain Coping Strategies",
68
+ "Protocol_1": "Pain Coping Strategy Scale (PCSS-9)",
69
+ "Protocol_2": "Chronic Pain Adjustment Index (CPAI-10)"
70
+ }},
71
+ {{
72
+ "Derived Description": "Work Stress and Fatigue",
73
+ "Protocol_1": "Work-Related Stress Scale (WRSS-8)",
74
+ "Protocol_2": "Occupational Fatigue Index (OFI-7)"
75
+ }},
76
+ ...
77
+ ]
78
+
79
+ ### Rules:
80
+ 1. Only output **valid JSON** with no explanations, summaries, or markdown formatting.
81
+ 2. Ensure each entry in the JSON list represents a single matched data element from the two protocols.
82
+ 3. If no matching element is found in a protocol, leave it empty ("").
83
+ 4. **Do NOT include headers, explanations, or additional formatting**—only return the raw JSON list.
84
+ 5. It should include all the elements in the two protocols.
85
+ 6. If it cannot match the element, create the row and include the protocol it did find and put "could not match" in the other protocol column.
86
+ 7. protocol should be the between
87
+ """
88
+
89
+ compare_export_prompt = ChatPromptTemplate.from_template(export_prompt)
90
+
91
+ QUERY_PROMPT = """
92
+ You are a helpful assistant. Use the available context to answer the question concisely and informatively.
93
+
94
+ CONTEXT:
95
+ {context}
96
+
97
+ QUERY:
98
+ {question}
99
+
100
+ Provide a natural-language response using the given information. If you do not know the answer, say so.
101
+ """
102
+
103
+ query_prompt = ChatPromptTemplate.from_template(QUERY_PROMPT)
104
+
105
+
106
+ @tool
107
+ def document_query_tool(question: str) -> str:
108
+ """Retrieves relevant document sections and answers questions based on the uploaded documents."""
109
+
110
+ retriever = cl.user_session.get("qdrant_retriever")
111
+ if not retriever:
112
+ return "Error: No documents available for retrieval. Please upload documents first."
113
+
114
+ # Retrieve context from the vector database
115
+ retrieved_docs = retriever.invoke(question)
116
+ docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
117
+
118
+ # Generate response using the natural query prompt
119
+ messages = query_prompt.format_messages(question=question, context=docs_content)
120
+ response = llm.invoke(messages)
121
+
122
+ return {
123
+ "answer": response.content,
124
+ "context": retrieved_docs
125
+ }
126
+
127
+
128
+ @tool
129
+ def document_comparison_tool(question: str) -> str:
130
+ """Compares the two uploaded documents, identifies matched elements, exports them as JSON, formats into CSV, and provides a download link."""
131
+
132
+ # Retrieve the vector database retriever
133
+ retriever = cl.user_session.get("qdrant_retriever")
134
+ if not retriever:
135
+ return "Error: No documents available for retrieval. Please upload two PDF files first."
136
+
137
+ # Process query using RAG
138
+ rag_chain = (
139
+ {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
140
+ | compare_export_prompt | llm | StrOutputParser()
141
+ )
142
+ response_text = rag_chain.invoke({"question": question})
143
+
144
+ # Parse response and save as CSV
145
+ try:
146
+ structured_data = json.loads(response_text)
147
+ if not structured_data:
148
+ return "Error: No matched elements found."
149
+
150
+ # Define output file path
151
+ file_path = os.path.join(OUTPUT_PATH, "comparison_results.csv")
152
+
153
+ # Save to CSV
154
+ df = pd.DataFrame(structured_data, columns=["Derived Description", "Protocol_1", "Protocol_2"])
155
+ df.to_csv(file_path, index=False)
156
+
157
+ # Store the file path in the user session for later retrieval
158
+ cl.user_session.set("comparison_file_path", file_path)
159
+
160
+ return "Comparison complete! CSV file has been generated."
161
+
162
+ except json.JSONDecodeError:
163
+ return "Error: Response is not valid JSON."
164
+
165
+
166
+ # Define tools for the agent
167
+ tools = [document_query_tool, document_comparison_tool]
168
+
169
+ # Set up the agent with a system prompt
170
+ system_prompt = """You are an intelligent document analysis assistant. You have access to two tools:
171
+
172
+ 1. document_query_tool: Use this when a user wants information or has questions about the content of uploaded documents.
173
+ 2. document_comparison_tool: Use this when a user wants to compare elements between two uploaded documents or export comparison results.
174
+
175
+ Analyze the user's request carefully to determine which tool is most appropriate.
176
+ """
177
+
178
+ # Create the agent using OpenAI function calling
179
+ agent_prompt = ChatPromptTemplate.from_messages([
180
+ ("system", system_prompt),
181
+ MessagesPlaceholder(variable_name="chat_history"),
182
+ ("human", "{input}"),
183
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
184
+ ])
185
+
186
+ agent = create_openai_tools_agent(
187
+ llm=ChatOpenAI(model="gpt-4o", temperature=0),
188
+ tools=tools,
189
+ prompt=agent_prompt
190
+ )
191
+
192
+ # Create the agent executor
193
+ agent_executor = AgentExecutor.from_agent_and_tools(
194
+ agent=agent,
195
+ tools=tools,
196
+ verbose=True,
197
+ handle_parsing_errors=True,
198
+ )
199
+
200
+
201
+ async def process_files(files: list[cl.File]):
202
+ documents_with_metadata = []
203
+ for file in files:
204
+ file_path = os.path.join(UPLOAD_PATH, file.name)
205
+ shutil.copyfile(file.path, file_path)
206
+
207
+ loader = PyMuPDFLoader(file_path)
208
+ documents = loader.load()
209
+
210
+ for doc in documents:
211
+ source_name = file.name
212
+ chunks = semantic_splitter.split_text(doc.page_content)
213
+ for chunk in chunks:
214
+ doc_chunk = Document(page_content=chunk, metadata={"source": source_name})
215
+ documents_with_metadata.append(doc_chunk)
216
+
217
+ if documents_with_metadata:
218
+ qdrant_vectorstore = Qdrant.from_documents(
219
+ documents_with_metadata,
220
+ embedding_model,
221
+ location=":memory:",
222
+ collection_name="document_comparison",
223
+ )
224
+ return qdrant_vectorstore.as_retriever()
225
+ return None
226
+
227
+
228
+ @cl.on_chat_start
229
+ async def start():
230
+ # Initialize chat history for the agent
231
+ cl.user_session.set("chat_history", [])
232
+ cl.user_session.set("qdrant_retriever", None)
233
+
234
+ files = await cl.AskFileMessage(
235
+ content="Please upload **two PDF files** for comparison:",
236
+ accept=["application/pdf"],
237
+ max_files=2
238
+ ).send()
239
+
240
+ if len(files) != 2:
241
+ await cl.Message("Error: You must upload exactly two PDF files.").send()
242
+ return
243
+
244
+ with cl.Step("Processing files"):
245
+ retriever = await process_files(files)
246
+ if retriever:
247
+ cl.user_session.set("qdrant_retriever", retriever)
248
+ await cl.Message("Files uploaded and processed successfully! You can now enter your query.").send()
249
+ else:
250
+ await cl.Message("Error: Unable to process files. Please try again.").send()
251
+
252
+
253
+ @cl.on_message
254
+ async def handle_message(message: cl.Message):
255
+ # Get chat history
256
+ chat_history = cl.user_session.get("chat_history", [])
257
+
258
+ # Run the agent
259
+ with cl.Step("Agent thinking"):
260
+ response = await cl.make_async(agent_executor.invoke)(
261
+ {"input": message.content, "chat_history": chat_history}
262
+ )
263
+
264
+ # Handle the response based on the tool that was called
265
+ output = response["output"]
266
+
267
+ if isinstance(output, dict) and "answer" in output:
268
+ # This is from document_query_tool
269
+ await cl.Message(output["answer"]).send()
270
+
271
+ elif "Comparison complete!" in str(output):
272
+ # This is from document_comparison_tool
273
+ file_path = cl.user_session.get("comparison_file_path")
274
+
275
+ if file_path and os.path.exists(file_path):
276
+ # Read the file content
277
+ with open(file_path, "rb") as f:
278
+ file_content = f.read()
279
+
280
+ # Create a File element with the content
281
+ file_element = cl.File(
282
+ name="comparison_results.csv",
283
+ content=file_content,
284
+ display="inline"
285
+ )
286
+
287
+ # Send the message with the file element
288
+ await cl.Message(
289
+ content="Comparison complete! Download the CSV below:",
290
+ elements=[file_element],
291
+ ).send()
292
+ else:
293
+ await cl.Message(content=str(output)).send()
294
+
295
+ else:
296
+ # Generic response
297
+ await cl.Message(content=str(output)).send()
298
+
299
+ # Update chat history with the new exchange
300
+ chat_history.extend([
301
+ HumanMessage(content=message.content),
302
+ HumanMessage(content=str(output))
303
+ ])
304
+ cl.user_session.set("chat_history", chat_history)
comparison_results.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Derived Description,Protocol_1,Protocol_2
2
+ Memory Recall,I struggle to remember names and faces. (Scale: 0-3),I retain new information effectively.
3
+ Mnemonic Techniques,I practice mnemonic techniques to improve recall. (Scale: 0-3),could not match
4
+ Decision Making Confidence,I feel confident when making important decisions. (Scale: 0-3),I second-guess myself often when making choices. (Scale: 0-3)
5
+ Instinct Trust,I trust my instincts when faced with uncertainty.,could not match
pyproject.toml CHANGED
@@ -51,7 +51,7 @@ dependencies = [
51
  #"ragas==0.2.10"
52
  #"FAISS"
53
  #remove only used for testing
54
- "cohere",
55
- "langchain_cohere",
56
- "arxiv"
57
  ]
 
51
  #"ragas==0.2.10"
52
  #"FAISS"
53
  #remove only used for testing
54
+ #"cohere",
55
+ #"langchain_cohere",
56
+ #"arxiv"
57
  ]