File size: 591 Bytes
76b04ec
304fad8
76b04ec
304fad8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# dataset_loader.py
from datasets import load_dataset

def load_medmcqa_subset(limit=5000):
    dataset = load_dataset("medmcqa", split=f"train[:{limit}]")

    def format_entry(entry):
        return {
            "question": entry["question"],
            "formatted": (
                f"Q: {entry['question']}\n"
                f"A. {entry['opa']}  B. {entry['opb']}  C. {entry['opc']}  D. {entry['opd']}\n"
                f"Correct Answer: {entry['cop']}\n"
                f"Explanation: {entry['exp']}"
            )
        }

    return [format_entry(entry) for entry in dataset]