Spaces:
Runtime error
Runtime error
File size: 635 Bytes
6ab3b20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import PyPDF2
import json
# Load the medical book PDF
pdf_path = "gale_of_medicin.pdf"
def extract_text(pdf_path):
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text
# Extract and save text as JSON dataset
text_data = extract_text(pdf_path)
dataset = [{"prompt": "Medical Query", "response": text_data}]
with open("medical_dataset.json", "w", encoding="utf-8") as f:
json.dump(dataset, f, ensure_ascii=False, indent=4)
print("β
Extracted text saved as `medical_dataset.json`")
|