scripts

Browse files

Files changed (10) hide show

README.md +78 -0
eval.py +143 -0
eval_mor.sh +21 -0
get_emb.py +142 -0
mor_env.yml +327 -0
prepare_rerank.py +245 -0
requirements.txt +31 -0
run_reasoning.sh +23 -0
train_planner.sh +7 -0
train_reranker.sh +13 -0

README.md ADDED Viewed

	@@ -0,0 +1,78 @@

+# MoR
+# Running the Evaluation and Reranking Script
+## Installation
+To set up the environment, you can install dependencies using Conda or pip:
+### Using Conda
+```bash
+conda env create -f mor_env.yml
+conda activate your_env_name  # Replace with actual environment name
+```
+### Using pip
+```bash
+pip install -r requirements.txt
+```
+### Checkpoints and embeddings download
+Before running the inference, please go to https://drive.google.com/drive/folders/1ldOYiyrIaZ3AVAKAmNeP0ZWfD3DLZu9D?usp=drive_link
+(1) download the "checkpoints" and put it under the directory MoR/Planning/
+(2) download the "data" and put it under the directory MoR/Reasoning/
+(2) download the "model_checkpoint" and put it under the directory MoR/Reasoning/text_retrievers/
+## Inference
+To run the inference script, execute the following command in the terminal:
+```bash
+bash eval_mor.sh
+```
+This script will automatically process three datasets using the pre-trained planning graph generator and the pre-trained reranker.
+## Training (Train MoR from Scratch)
+### Step1: Training the planning graph generator
+```bash
+bash train_planner.sh
+```
+### Step2: Train mixed traversal to collect candidates (note: there is no training process for reasoning)
+```bash
+bash run_reasoning.sh
+```
+### Step3: Training the reranker
+```bash
+bash train_reranker.sh
+```
+## Generating training data of Planner
+### We provide codes to generate your own training data to finetune the Planner by using different LLMs.
+#### If you are using Azure API
+```bash
+python script.py --model "model_name" \
+  --dataset_name "dataset_name" \
+  --azure_api_key "your_azure_key" \
+  --azure_endpoint "your_azure_endpoint" \
+  --azure_api_version "your_azure_version"
+```
+#### If you are using OpenAI API
+```bash
+python script.py --model "model_name" \
+  --dataset_name "dataset_name" \
+  --openai_api_key "your_openai_key" \
+  --openai_endpoint "your_openai_endpoint"
+```

eval.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import argparse
+import sys
+from Reasoning.mor4path import MOR4Path
+from Planning.model import Planner
+from prepare_rerank import prepare_trajectories
+from tqdm import tqdm
+import os
+import pickle as pkl
+import torch
+import numpy as np
+import pandas as pd
+from argparse import ArgumentParser
+from stark_qa import load_qa, load_skb
+import torch.nn as nn
+# make model_name a argument
+parser = ArgumentParser()
+parser.add_argument("--dataset_name", type=str, default="mag")
+# text retriever name
+parser.add_argument("--text_retriever_name", type=str, default="bm25")
+parser.add_argument("--scorer_name", type=str, default="ada", help="contriever, ada") # contriever for prime, ada for amazon and mag
+# mod
+parser.add_argument("--mod", type=str, default="test", help="train, valid, test")
+# device
+parser.add_argument("--device", type=str, default="cuda", help="Device to run the model (e.g., 'cuda' or 'cpu').")
+if __name__ == "__main__":
+    args = parser.parse_args()
+    dataset_name = args.dataset_name
+    scorer_name = args.scorer_name
+    text_retriever_name = args.text_retriever_name
+    skb = load_skb(dataset_name)
+    qa = load_qa(dataset_name, human_generated_eval=False)
+    eval_metrics = [
+        "mrr",
+        "map",
+        "rprecision",
+        "recall@5",
+        "recall@10",
+        "recall@20",
+        "recall@50",
+        "recall@100",
+        "hit@1",
+        "hit@3",
+        "hit@5",
+        "hit@10",
+        "hit@20",
+        "hit@50",
+    ]
+    mor_path = MOR4Path(dataset_name, text_retriever_name, scorer_name, skb)
+    reasoner = Planner(dataset_name)
+    outputs = []
+    topk = 100
+    split_idx = qa.get_idx_split(test_ratio=1.0)
+    mod = args.mod
+    all_indices = split_idx[mod].tolist()
+    eval_csv = pd.DataFrame(columns=["idx", "query_id", "pred_rank"] + eval_metrics)
+    count = 0
+    # ***** planning *****
+    # if the plan cache exists, load it
+    plan_cache_path = f"./cache/{dataset_name}/path/{mod}_20250222.pkl"
+    if os.path.exists(plan_cache_path):
+        with open(plan_cache_path, 'rb') as f:
+            plan_output_list = pkl.load(f)
+    else:
+        plan_output_list = []
+        for idx, i in enumerate(tqdm(all_indices)):
+            plan_output = {}
+            query, q_id, ans_ids, _ = qa[i]
+            rg = reasoner(query)
+            plan_output['query'] = query
+            plan_output['q_id'] = q_id
+            plan_output['ans_ids'] = ans_ids
+            plan_output['rg'] = rg
+            plan_output_list.append(plan_output)
+        # save plan_output_list
+        plan_cache_path = f"./cache/{dataset_name}/path/{mod}_20250222.pkl"
+        os.makedirs(os.path.dirname(plan_cache_path), exist_ok=True)
+        with open(plan_cache_path, 'wb') as f:
+            pkl.dump(plan_output_list, f)
+    # ***** Reasoning *****
+    for idx, i in enumerate(tqdm(all_indices)):
+        query = plan_output_list[idx]['query']
+        q_id = plan_output_list[idx]['q_id']
+        ans_ids = plan_output_list[idx]['ans_ids']
+        rg = plan_output_list[idx]['rg']
+        output = mor_path(query, q_id, ans_ids, rg, args)
+        ans_ids = torch.LongTensor(ans_ids)
+        pred_dict = output['pred_dict']
+        result = mor_path.evaluate(pred_dict, ans_ids, metrics=eval_metrics)
+        result["idx"], result["query_id"] = i, q_id
+        result["pred_rank"] = torch.LongTensor(list(pred_dict.keys()))[
+            torch.argsort(torch.tensor(list(pred_dict.values())), descending=True)[
+                :topk
+            ]
+        ].tolist()
+        eval_csv = pd.concat([eval_csv, pd.DataFrame([result])], ignore_index=True)
+        output['q_id'] = q_id
+        outputs.append(output)
+        count += 1
+        # for metric in eval_metrics:
+        #     print(
+        #         f"{metric}: {np.mean(eval_csv[eval_csv['idx'].isin(all_indices)][metric])}"
+        #     )
+    print(f"MOR count: {mor_path.mor_count}")
+    # prepare trajectories and save
+    bm25 = mor_path.text_retriever
+    test_data = prepare_trajectories(dataset_name, bm25, skb, outputs)
+    save_path = f"{dataset_name}_{mod}.pkl"
+    with open(save_path, 'wb') as f:
+        pkl.dump(test_data, f)

eval_mor.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+# !/bin/bash
+datasets=("mag" "amazon" "prime")
+# Define scorer_name mapping using an associative array
+declare -A dataset_scorer_map=(
+  [mag]="ada"
+  [amazon]="ada"
+  [prime]="contriever"
+)
+for dataset in "${datasets[@]}"; do
+    # Get the corresponding scorer_name for the dataset
+    scorer_name="${dataset_scorer_map[$dataset]}"
+    echo "Processing dataset: $dataset with scorer: $scorer_name"
+    python eval.py --dataset_name "$dataset" --scorer_name "$scorer_name" --mod "test"
+    cd Reranking
+    python rerank.py --dataset_name "$dataset"
+    cd ..
+done

get_emb.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+import os.path as osp
+import random
+import sys
+import argparse
+import pandas as pd
+import torch
+from tqdm import tqdm
+from stark_qa.tools.api_lib.openai_emb import get_contriever, get_contriever_embeddings
+sys.path.append('.')
+from stark_qa import load_skb, load_qa
+from stark_qa.tools.api import get_api_embeddings
+from stark_qa.tools.local_encoder import get_llm2vec_embeddings, get_gritlm_embeddings
+from models.model import get_embeddings
+import argparse
+def parse_args():
+    parser = argparse.ArgumentParser()
+    # Dataset and embedding model selection
+    parser.add_argument('--dataset', default='prime', choices=['amazon', 'prime', 'mag'])
+    parser.add_argument('--emb_model', default='contriever',
+                        choices=[
+                            'text-embedding-ada-002',
+                            'text-embedding-3-small',
+                            'text-embedding-3-large',
+                            'voyage-large-2-instruct',
+                            'GritLM/GritLM-7B',
+                            'McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp',
+                            'all-mpnet-base-v2' # for sentence transformer
+                            ]
+                        )
+    # Mode settings
+    parser.add_argument('--mode', default='query', choices=['doc', 'query'])
+    # Path settings
+    parser.add_argument("--data_dir", default="data/", type=str)
+    parser.add_argument("--emb_dir", default="emb/", type=str)
+    # Text settings
+    parser.add_argument('--add_rel', action='store_true', default=False, help='add relation to the text')
+    parser.add_argument('--compact', action='store_true', default=False, help='make the text compact when input to the model')
+    # Evaluation settings
+    parser.add_argument("--human_generated_eval", action="store_true", help="if mode is `query`, then generating query embeddings on human generated evaluation split")
+    # Batch and node settings
+    parser.add_argument("--batch_size", default=1024, type=int)
+    # encode kwargs
+    parser.add_argument("--n_max_nodes", default=None, type=int, metavar="ENCODE")
+    parser.add_argument("--device", default=None, type=str, metavar="ENCODE")
+    parser.add_argument("--peft_model_name", default=None, type=str, help="llm2vec pdft model", metavar="ENCODE")
+    parser.add_argument("--instruction", type=str, help="gritl/llm2vec instruction", metavar="ENCODE")
+    args = parser.parse_args()
+    # Create encode_kwargs based on the custom metavar "ENCODE"
+    encode_kwargs = {k: v for k, v in vars(args).items() if v is not None and parser._option_string_actions[f'--{k}'].metavar == "ENCODE"}
+    return args, encode_kwargs
+if __name__ == '__main__':
+    args, encode_kwargs = parse_args()
+    args.human_generated_eval = False
+    mode_surfix = '_human_generated_eval' if args.human_generated_eval and args.mode == 'query' else ''
+    mode_surfix += '_no_rel' if not args.add_rel else ''
+    mode_surfix += '_no_compact' if not args.compact else ''
+    emb_dir = osp.join(args.emb_dir, args.dataset, args.emb_model, f'{args.mode}{mode_surfix}')
+    csv_cache = osp.join(args.data_dir, args.dataset, f'{args.mode}{mode_surfix}.csv')
+    print(f'Embedding directory: {emb_dir}')
+    os.makedirs(emb_dir, exist_ok=True)
+    os.makedirs(os.path.dirname(csv_cache), exist_ok=True)
+    if args.mode == 'doc':
+        skb = load_skb(args.dataset)
+        lst = skb.candidate_ids
+        emb_path = osp.join(emb_dir, f'candidate_emb_dict.pt')
+    if args.mode == 'query':
+        qa_dataset = load_qa(args.dataset, human_generated_eval=args.human_generated_eval)
+        lst = [qa_dataset[i][1] for i in range(len(qa_dataset))]
+        emb_path = osp.join(emb_dir, f'query_emb_dict.pt')
+    random.shuffle(lst)
+    # Load existing embeddings if they exist
+    if osp.exists(emb_path):
+        emb_dict = torch.load(emb_path)
+        exist_emb_indices = list(emb_dict.keys())
+        print(f'Loaded existing embeddings from {emb_path}. Size: {len(emb_dict)}')
+    else:
+        emb_dict = {}
+        exist_emb_indices = []
+    # Load existing document cache if it exists (only for doc mode)
+    if args.mode == 'doc' and osp.exists(csv_cache):
+        df = pd.read_csv(csv_cache)
+        cache_dict = dict(zip(df['index'], df['text']))
+        # Ensure that the indices in the cache match the expected indices
+        assert set(cache_dict.keys()) == set(lst), 'Indices in cache do not match the candidate indices.'
+        indices = list(set(lst) - set(exist_emb_indices))
+        texts = [cache_dict[idx] for idx in tqdm(indices, desc="Filtering docs for new embeddings")]
+    else:
+        indices = lst
+        texts = [qa_dataset.get_query_by_qid(idx) if args.mode == 'query'
+                 else skb.get_doc_info(idx, add_rel=args.add_rel, compact=args.compact) for idx in tqdm(indices, desc="Gathering docs")]
+        if args.mode == 'doc':
+            df = pd.DataFrame({'index': indices, 'text': texts})
+            df.to_csv(csv_cache, index=False)
+    print(f'Generating embeddings for {len(texts)} texts...')
+    if args.emb_model == 'contriever':
+        encoder, tokenizer = get_contriever(dataset_name=args.dataset)
+        for i in tqdm(range(0, len(texts), args.batch_size), desc="Generating embeddings"):
+            batch_texts = texts[i:i+args.batch_size]
+            batch_embs = get_contriever_embeddings(batch_texts, encoder=encoder, tokenizer=tokenizer, device='cuda')
+            batch_embs = batch_embs.view(len(batch_texts), -1).cpu()
+            batch_indices = indices[i:i+args.batch_size]
+            for idx, emb in zip(batch_indices, batch_embs):
+                emb_dict[idx] = emb.view(1, -1)
+    else:
+        for i in tqdm(range(0, len(texts), args.batch_size), desc="Generating embeddings"):
+            batch_texts = texts[i:i+args.batch_size]
+            batch_embs = get_embeddings(batch_texts, args.emb_model, **encode_kwargs)
+            batch_embs = batch_embs.view(len(batch_texts), -1).cpu()
+            batch_indices = indices[i:i+args.batch_size]
+            for idx, emb in zip(batch_indices, batch_embs):
+                emb_dict[idx] = emb.view(1, -1)
+    torch.save(emb_dict, emb_path)
+    print(f'Saved {len(emb_dict)} embeddings to {emb_path}!')

mor_env.yml ADDED Viewed

	@@ -0,0 +1,327 @@

+name: your_env_name
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - bzip2=1.0.8=h5eee18b_6
+  - ca-certificates=2024.11.26=h06a4308_0
+  - debugpy=1.6.7=py311h6a678d5_0
+  - decorator=5.1.1=pyhd3eb1b0_0
+  - ipykernel=6.29.5=py311h06a4308_0
+  - jedi=0.19.2=py311h06a4308_0
+  - jupyter_client=8.6.0=py311h06a4308_0
+  - jupyter_core=5.7.2=py311h06a4308_0
+  - ld_impl_linux-64=2.40=h12ee557_0
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libsodium=1.0.18=h7b6447c_0
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.6.0=py311h06a4308_0
+  - openssl=3.0.15=h5eee18b_0
+  - parso=0.8.4=py311h06a4308_0
+  - pip=24.2=py311h06a4308_0
+  - prompt_toolkit=3.0.43=hd3eb1b0_0
+  - ptyprocess=0.7.0=pyhd3eb1b0_2
+  - pure_eval=0.2.2=pyhd3eb1b0_0
+  - python=3.11.11=he870216_0
+  - python-dateutil=2.9.0post0=py311h06a4308_2
+  - pyzmq=25.1.2=py311h6a678d5_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=75.1.0=py311h06a4308_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.45.3=h5eee18b_0
+  - stack_data=0.2.0=pyhd3eb1b0_0
+  - tk=8.6.14=h39e8969_0
+  - tornado=6.4.2=py311h5eee18b_0
+  - traitlets=5.14.3=py311h06a4308_0
+  - typing_extensions=4.11.0=py311h06a4308_0
+  - wheel=0.44.0=py311h06a4308_0
+  - xz=5.4.6=h5eee18b_1
+  - zeromq=4.3.5=h6a678d5_0
+  - zlib=1.2.13=h5eee18b_1
+  - pip:
+      - accelerate==1.1.1
+      - aiobotocore==2.15.2
+      - aiohappyeyeballs==2.4.4
+      - aiohttp==3.11.9
+      - aioitertools==0.12.0
+      - aiolimiter==1.2.0
+      - aiosignal==1.3.1
+      - anndata==0.11.1
+      - annotated-types==0.7.0
+      - anthropic==0.40.0
+      - anyascii==0.3.2
+      - anyio==4.6.2.post1
+      - argon2-cffi==23.1.0
+      - argon2-cffi-bindings==21.2.0
+      - array-api-compat==1.9.1
+      - arrow==1.3.0
+      - asttokens==3.0.0
+      - async-lru==2.0.4
+      - attrs==24.2.0
+      - babel==2.16.0
+      - backports-tarfile==1.2.0
+      - beautifulsoup4==4.12.3
+      - biopython==1.84
+      - biothings-client==0.3.1
+      - bitarray==3.0.0
+      - bitsandbytes==0.44.1
+      - bleach==6.2.0
+      - blinker==1.9.0
+      - bm25s==0.2.5
+      - botocore==1.35.36
+      - bs4==0.0.2
+      - cattrs==24.1.2
+      - cellxgene-census==1.15.0
+      - certifi==2024.8.30
+      - cffi==1.17.1
+      - charset-normalizer==3.4.0
+      - chembl-webresource-client==0.10.9
+      - click==8.1.7
+      - colbert==0.40
+      - colbert-ai==0.2.21
+      - comm==0.2.2
+      - contourpy==1.3.1
+      - contractions==0.1.73
+      - cryptography==44.0.0
+      - cut-cross-entropy==24.11.4
+      - cycler==0.12.1
+      - dataclasses==0.6
+      - datasets==3.1.0
+      - defusedxml==0.7.1
+      - dill==0.3.8
+      - distro==1.9.0
+      - docker-pycreds==0.4.0
+      - docstring-parser==0.16
+      - docutils==0.21.2
+      - easydict==1.13
+      - et-xmlfile==2.0.0
+      - evaluate==0.4.2
+      - executing==2.1.0
+      - fastjsonschema==2.21.1
+      - filelock==3.13.1
+      - flask==3.1.0
+      - fonttools==4.55.1
+      - fqdn==1.5.1
+      - frozenlist==1.5.0
+      - fsspec==2024.2.0
+      - fuzzywuzzy==0.18.0
+      - gdown==5.2.0
+      - gget==0.29.0
+      - git-python==1.0.3
+      - gitdb==4.0.11
+      - gitpython==3.1.43
+      - greenlet==3.1.1
+      - h11==0.14.0
+      - h5py==3.12.1
+      - hf-transfer==0.1.8
+      - httpcore==1.0.7
+      - httpx==0.28.0
+      - huggingface-hub==0.26.3
+      - icalendar==6.1.0
+      - idna==3.10
+      - importlib-metadata==8.5.0
+      - ipython==8.30.0
+      - ipywidgets==8.1.5
+      - isoduration==20.11.0
+      - itsdangerous==2.2.0
+      - jaraco-classes==3.4.0
+      - jaraco-context==6.0.1
+      - jaraco-functools==4.1.0
+      - jeepney==0.8.0
+      - jinja2==3.1.3
+      - jiter==0.8.0
+      - jmespath==1.0.1
+      - joblib==1.4.2
+      - json5==0.10.0
+      - jsonpatch==1.33
+      - jsonpointer==3.0.0
+      - jsonschema==4.23.0
+      - jsonschema-specifications==2024.10.1
+      - jupyter==1.1.1
+      - jupyter-console==6.6.3
+      - jupyter-events==0.11.0
+      - jupyter-lsp==2.2.5
+      - jupyter-server==2.15.0
+      - jupyter-server-terminals==0.5.3
+      - jupyterlab==4.3.4
+      - jupyterlab-pygments==0.3.0
+      - jupyterlab-server==2.27.3
+      - jupyterlab-widgets==3.0.13
+      - keyring==25.5.0
+      - kiwisolver==1.4.7
+      - langchain==0.3.9
+      - langchain-core==0.3.21
+      - langchain-text-splitters==0.3.2
+      - langdetect==1.0.9
+      - langsmith==0.1.147
+      - legacy-api-wrap==1.4.1
+      - levenshtein==0.26.1
+      - lightning-utilities==0.11.9
+      - littleutils==0.2.4
+      - llvmlite==0.43.0
+      - lxml==5.3.0
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.5
+      - matplotlib==3.9.3
+      - matplotlib-inline==0.1.7
+      - matplotlib-venn==1.1.1
+      - mdurl==0.1.2
+      - mistune==3.1.1
+      - moleculeace==3.0.0
+      - more-itertools==10.5.0
+      - mpmath==1.3.0
+      - multidict==6.1.0
+      - multiprocess==0.70.16
+      - mygene==3.2.2
+      - mysql-connector-python==9.1.0
+      - natsort==8.4.0
+      - nbclient==0.10.2
+      - nbconvert==7.16.6
+      - nbformat==5.10.4
+      - networkx==3.2.1
+      - nh3==0.2.19
+      - ninja==1.11.1.2
+      - nltk==3.9.1
+      - notebook==7.3.2
+      - notebook-shim==0.2.4
+      - numba==0.60.0
+      - numpy==1.26.4
+      - nvidia-cublas-cu12==12.4.5.8
+      - nvidia-cuda-cupti-cu12==12.4.127
+      - nvidia-cuda-nvrtc-cu12==12.4.127
+      - nvidia-cuda-runtime-cu12==12.4.127
+      - nvidia-cudnn-cu12==9.1.0.70
+      - nvidia-cufft-cu12==11.2.1.3
+      - nvidia-curand-cu12==10.3.5.147
+      - nvidia-cusolver-cu12==11.6.1.9
+      - nvidia-cusparse-cu12==12.3.1.170
+      - nvidia-nccl-cu12==2.21.5
+      - nvidia-nvjitlink-cu12==12.4.127
+      - nvidia-nvtx-cu12==12.4.127
+      - ogb==1.3.6
+      - openai==1.56.1
+      - openpyxl==3.1.5
+      - orjson==3.10.12
+      - outdated==0.2.2
+      - overrides==7.7.0
+      - packaging==24.2
+      - pandas==2.2.3
+      - pandocfilters==1.5.1
+      - patsy==1.0.1
+      - peft==0.13.2
+      - pexpect==4.9.0
+      - pillow==10.2.0
+      - pkginfo==1.12.0
+      - platformdirs==4.3.6
+      - prometheus-client==0.21.1
+      - prompt-toolkit==3.0.48
+      - propcache==0.2.1
+      - protobuf==3.20.3
+      - psutil==6.1.0
+      - pure-eval==0.2.3
+      - pyahocorasick==2.1.0
+      - pyaml==24.9.0
+      - pyarrow==18.1.0
+      - pyarrow-hotfix==0.6
+      - pycparser==2.22
+      - pydantic==2.10.3
+      - pydantic-core==2.27.1
+      - pygments==2.18.0
+      - pynndescent==0.5.13
+      - pyparsing==3.2.0
+      - pysocks==1.7.1
+      - pytdc==1.1.1
+      - python-dotenv==1.0.1
+      - python-json-logger==3.2.1
+      - python-levenshtein==0.26.1
+      - pytz==2024.2
+      - pyyaml==6.0.2
+      - rapidfuzz==3.10.1
+      - rdkit==2023.9.6
+      - rdkit-pypi==2022.9.5
+      - readme-renderer==44.0
+      - referencing==0.36.2
+      - regex==2024.11.6
+      - requests==2.32.3
+      - requests-cache==1.2.1
+      - requests-toolbelt==1.0.0
+      - rfc3339-validator==0.1.4
+      - rfc3986==2.0.0
+      - rfc3986-validator==0.1.1
+      - rich==13.9.4
+      - rpds-py==0.22.3
+      - s3fs==2024.2.0
+      - safetensors==0.4.5
+      - scanpy==1.10.4
+      - scikit-learn==1.2.2
+      - scikit-optimize==0.10.2
+      - scipy==1.14.1
+      - seaborn==0.13.2
+      - secretstorage==3.3.3
+      - send2trash==1.8.3
+      - sentence-transformers==3.3.1
+      - sentencepiece==0.2.0
+      - sentry-sdk==2.19.0
+      - session-info==1.0.0
+      - setproctitle==1.3.4
+      - shtab==1.7.1
+      - smmap==5.0.1
+      - sniffio==1.3.1
+      - somacore==1.0.11
+      - soupsieve==2.6
+      - sqlalchemy==2.0.36
+      - stack-data==0.6.3
+      - statsmodels==0.14.4
+      - stdlib-list==0.11.0
+      - sympy==1.13.1
+      - tenacity==9.0.0
+      - terminado==0.18.1
+      - textsearch==0.0.24
+      - threadpoolctl==3.5.0
+      - tiledb==0.29.1
+      - tiledbsoma==1.11.4
+      - tinycss2==1.4.0
+      - tokenizers==0.20.3
+      - torch==2.5.1+cu124
+      - torch-geometric==2.6.1
+      - torch-scatter==2.1.2+pt25cu124
+      - torchaudio==2.5.1+cu124
+      - torchmetrics==1.6.0
+      - torchvision==0.20.1+cu124
+      - tqdm==4.67.1
+      - transformers==4.46.3
+      - triton==3.1.0
+      - trl==0.12.1
+      - twine==6.0.1
+      - typeguard==4.4.1
+      - types-python-dateutil==2.9.0.20241206
+      - typing-extensions==4.12.2
+      - tyro==0.9.2
+      - tzdata==2024.2
+      - ujson==5.10.0
+      - umap-learn==0.5.7
+      - unsloth==2025.1.8
+      - unsloth-zoo==2025.1.5
+      - uri-template==1.3.0
+      - url-normalize==1.4.3
+      - urllib3==2.2.3
+      - voyageai==0.3.2
+      - wandb==0.18.7
+      - wcwidth==0.2.13
+      - webcolors==24.11.1
+      - webencodings==0.5.1
+      - websocket-client==1.8.0
+      - werkzeug==3.1.3
+      - widgetsnbextension==4.0.13
+      - wrapt==1.17.0
+      - xformers==0.0.28.post3
+      - xxhash==3.5.0
+      - yapf==0.43.0
+      - yarl==1.18.3
+      - zipp==3.21.0
+prefix: /home/yongjia/.conda/envs/g_traversal

prepare_rerank.py ADDED Viewed

	@@ -0,0 +1,245 @@

+from Reasoning.text_retrievers.contriever import Contriever
+from Reasoning.text_retrievers.ada import Ada
+from stark_qa import load_qa, load_skb
+import pickle as pkl
+from tqdm import tqdm
+from transformers import BertTokenizer, BertModel
+model_name = f"bert-base-uncased"
+tokenizer = BertTokenizer.from_pretrained(model_name)
+encoder = BertModel.from_pretrained(model_name)
+def get_bm25_scores(dataset_name, bm25, outputs):
+    new_outputs = []
+    # use tqdm to visualize the progress
+    for i in range(len(outputs)):
+        query, q_id, ans_ids = outputs[i]['query'], outputs[i]['q_id'], outputs[i]['ans_ids']
+        paths= outputs[i]['paths']
+        rg = outputs[i]['rg']
+        if dataset_name == 'prime':
+            new_path_dict = paths
+        else:
+            # make new path dict and remove the -1 from the path
+            new_path_dict = {}
+            for key in paths.keys():
+                new_path = [x for x in paths[key] if x != -1]
+                new_path_dict[key] = new_path
+        # collect all values of the path without the first element
+        candidates_ids = []
+        for key in new_path_dict.keys():
+            candidates_ids.extend(new_path_dict[key][1:])
+            candidates_ids.extend(ans_ids)
+        candidates_ids = list(set(candidates_ids))
+        # get the bm25 score
+        bm_score_dict = bm25.score(query, q_id, candidate_ids=candidates_ids)
+        outputs[i]['bm_score_dict'] = bm_score_dict
+        # replace -1 in the bm_vector_dict with the bm_score
+        bm_vector_dict = outputs[i]['bm_vector_dict']
+        for key in bm_vector_dict.keys():
+            if -1 in bm_vector_dict[key]:
+                path = new_path_dict[key]
+                assert len(path) == len(bm_vector_dict[key])
+                bm_vector_dict[key] = [bm_score_dict[path[j]] if x == -1 else x for j, x in enumerate(bm_vector_dict[key])]
+        outputs[i]['bm_vector_dict'] = bm_vector_dict
+        # fix length of paths in prime
+        if dataset_name == 'prime':
+            max_len = 3
+            new_paths = {}
+            for key in paths:
+                new_path = paths[key]
+                if len(paths[key]) < max_len:
+                    new_path = [-1] * (max_len - len(paths[key])) + paths[key]
+                elif len(paths[key]) > max_len:
+                    new_path = paths[key][-max_len:]
+                new_paths[key] = new_path
+            # assign the new path to the paths
+            outputs[i]['paths'] = new_paths
+        new_outputs.append(outputs[i])
+    return new_outputs
+def prepare_score_vector_dict(raw_data):
+    # make the score_vector_dict: [bm_score, bm_score, bm_score, ada_score/contriver_score]
+    for i in range(len(raw_data)):
+        # get the pred_dict
+        pred_dict = raw_data[i]['pred_dict']
+        # get the bm_vector_dict
+        bm_vector_dict = raw_data[i]['bm_vector_dict']
+        # initialize the score_vector_dict
+        raw_data[i]['score_vector_dict'] = {}
+        # add the value of pred_dict to the end of the bm_vector_dict
+        for key in pred_dict:
+            # get the bm_score, last element of the bm_vector_dict
+            bm_vector = bm_vector_dict[key]
+            # get the ranking score
+            rk_score = pred_dict[key]
+            # make the score_vector_dict
+            score_vector = bm_vector + [rk_score]
+            # check the length of the score_vector, if less than 4, pad with 0 at the beginning
+            if len(score_vector) < 4:
+                score_vector = [0] * (4 - len(score_vector)) + score_vector
+            elif len(score_vector) > 4:
+                score_vector = score_vector[-4:]
+            # make the score_vector_dict
+            raw_data[i]['score_vector_dict'][key] = score_vector
+    return raw_data
+def prepare_text_emb_symb_enc(raw_data, skb):
+    # add the text_emb to the raw_data
+    text2emb_list = []
+    text2emb_dict = {}
+    symbolic_encode_dict = {
+    3: [0, 1, 1],
+    2: [2, 0, 1],
+    1: [2, 2, 0],
+    }
+    for i in range(len(raw_data)):
+        # get the paths
+        paths = raw_data[i]['paths']
+        preds = raw_data[i]['pred_dict']
+        assert len(paths) == len(preds)
+        # initialize the text_emb_dict
+        raw_data[i]['text_emb_dict'] = {}
+        # initialize the symb_enc_dict
+        raw_data[i]['symb_enc_dict'] = {}
+        for key in paths:
+            # get the path
+            path = paths[key]
+            # make uniquee text_emb_path and make dict
+            text_path_li = [skb.get_node_type_by_id(node_id) if node_id != -1 else "padding" for node_id in path]
+            text_path_str = " ".join(text_path_li)
+            if text_path_str not in text2emb_list:
+                text2emb_list.append(text_path_str)
+                text2emb_dict[text_path_str] = -1
+            # assgin thte text_path to the raw_data
+            raw_data[i]['text_emb_dict'][key] = text_path_str
+            # ***** make the symb_enc_dict *****
+            # number of non -1 in the path
+            num_non_1 = len([p for p in path if p != -1])
+            # get the symbolic encoding
+            symb_enc = symbolic_encode_dict[num_non_1]
+            # make the symb_enc_dict
+            raw_data[i]['symb_enc_dict'][key] = symb_enc
+    # ***** get the text2emb_dict embeddings *****
+    for key in text2emb_dict.keys():
+        # get the tokens for the node type using th tokenizer
+        text_enc = tokenizer(key, return_tensors='pt')['input_ids']
+        outputs = encoder(text_enc)
+        last_hidden_states = outputs.last_hidden_state.mean(dim=1)
+        text2emb_dict[key] = last_hidden_states.detach()
+    new_data = {'data': raw_data, 'text2emb_dict': text2emb_dict}
+    return new_data
+def prepare_trajectories(dataset_name, bm25, skb, outputs):
+    # get the bm25 scores
+    new_outputs = get_bm25_scores(dataset_name, bm25, outputs) # return list
+    # prepare the score_vector_dict
+    new_outputs = prepare_score_vector_dict(new_outputs) # return list
+    # prepare the text_emb and symb_enc_dict
+    new_data = prepare_text_emb_symb_enc(new_outputs, skb) # return dict
+    return new_data
+def get_contriever_scores(dataset_name, mod, skb, path):
+    with open(path, 'rb') as f:
+        data = pkl.load(f)
+    raw_data = data['data']
+    qa = load_qa(dataset_name, human_generated_eval=False)
+    contriever = Contriever(skb, dataset_name, device='cuda')
+    split_idx = qa.get_idx_split(test_ratio=1.0)
+    all_indices = split_idx[mod].tolist()
+    # use tqdm to visualize the progress
+    for idx, i in enumerate(tqdm(all_indices)):
+        query, q_id, ans_ids, _ = qa[i]
+        assert query == raw_data[idx]['query']
+        pred_ids = list(raw_data[idx]['pred_dict'].keys())
+        candidates_ids = list(set(pred_ids))
+        candidates_ids.extend(ans_ids)
+        # get contriever score
+        contriever_score_dict = contriever.score(query, q_id, candidate_ids=candidates_ids)
+        raw_data[idx]['contriever_score_dict'] = contriever_score_dict
+    data['data'] = raw_data
+    with open(path, 'wb') as f:
+        pkl.dump(data, f)
+def get_ada_scores(dataset_name, mod, skb, path):
+    with open(path, 'rb') as f:
+        data = pkl.load(f)
+    raw_data = data['data']
+    qa = load_qa(dataset_name, human_generated_eval=False)
+    ada = Ada(skb, dataset_name, device='cuda')
+    split_idx = qa.get_idx_split(test_ratio=1.0)
+    all_indices = split_idx[mod].tolist()
+    # use tqdm to visualize the progress
+    for idx, i in enumerate(tqdm(all_indices)):
+        query, q_id, ans_ids, _ = qa[i]
+        assert query == raw_data[idx]['query']
+        pred_ids = list(raw_data[idx]['pred_dict'].keys())
+        candidates_ids = list(set(pred_ids))
+        candidates_ids.extend(ans_ids)
+        # get ada score
+        ada_score_dict = ada.score(query, q_id, candidate_ids=candidates_ids)
+        raw_data[idx]['ada_score_dict'] = ada_score_dict
+    data['data'] = raw_data
+    with open(path, 'wb') as f:
+        pkl.dump(data, f)
+if __name__ == '__main__':
+    print(f"Test prepare_rerank")

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+anthropic==0.45.2
+beautifulsoup4==4.13.3
+bm25s==0.2.5
+Colbert==0.40
+colbert_ai==0.2.21
+contractions==0.1.73
+datasets==3.1.0
+gdown==5.2.0
+gritlm==1.0.2
+huggingface_hub==0.26.3
+langchain==0.3.18
+langdetect==1.0.9
+llm2vec==0.2.3
+nltk==3.9.1
+numpy==2.2.3
+ogb==1.3.6
+openai==1.63.0
+pandas==2.2.3
+PyTDC==1.1.1
+scikit_learn==1.2.2
+sentence_transformers==3.3.1
+torch==2.5.1+cu124
+torch_geometric==2.6.1
+torch_scatter==2.1.2+pt25cu124
+torchmetrics==1.6.0
+tqdm==4.67.1
+transformers==4.46.3
+trl==0.12.1
+unsloth==2025.1.8
+voyageai==0.3.2
+wandb==0.19.6

run_reasoning.sh ADDED Viewed

	@@ -0,0 +1,23 @@

+#!/bin/bash
+# Define datasets and mods
+datasets=("prime")
+mods=("test" "val" "train")
+# Define scorer_name mapping using an associative array
+declare -A dataset_scorer_map=(
+  [mag]="ada"
+  [amazon]="ada"
+  [prime]="contriever"
+)
+# Loop through datasets and mods
+for dataset in "${datasets[@]}"; do
+  # Get the corresponding scorer_name for the dataset
+  scorer_name="${dataset_scorer_map[$dataset]}"
+  for mod in "${mods[@]}"; do
+    echo "Processing dataset: $dataset with mod: $mod and scorer: $scorer_name"
+    python eval.py --dataset_name "$dataset" --scorer_name "$scorer_name" --mod "$mod"
+  done
+done

train_planner.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/bin/bash
+# Navigate to the Planning directory
+cd Planning
+# Run the training script
+python train_eval.py

train_reranker.sh ADDED Viewed

	@@ -0,0 +1,13 @@

+#!/bin/bash
+# Navigate to the Reranking directory
+cd Reranking
+# Run the training script
+# amazon
+python train_eval_path_amazon.py
+# # mag
+python train_eval_path_mag.py
+# prime
+python train_eval_path_prime.py