medical-llm-chatbot / dataset_loader.py
SankethHonavar's picture
Initial commit for Hugging Face Space
304fad8
raw
history blame contribute delete
591 Bytes
# dataset_loader.py
from datasets import load_dataset
def load_medmcqa_subset(limit=5000):
dataset = load_dataset("medmcqa", split=f"train[:{limit}]")
def format_entry(entry):
return {
"question": entry["question"],
"formatted": (
f"Q: {entry['question']}\n"
f"A. {entry['opa']} B. {entry['opb']} C. {entry['opc']} D. {entry['opd']}\n"
f"Correct Answer: {entry['cop']}\n"
f"Explanation: {entry['exp']}"
)
}
return [format_entry(entry) for entry in dataset]