|
|
|
""" |
|
Module to convert table of contents of PDF file to mindmap format. |
|
|
|
@author Lucas Koelman |
|
@date 20/02/2018 |
|
|
|
@see https://github.com/pdfminer/pdfminer.six/blob/master/tools/dumppdf.py |
|
@see https://github.com/xmindltd/xmind-sdk-python/blob/master/example.py |
|
""" |
|
|
|
|
|
import sys, os, re |
|
import xml.etree.ElementTree as etree |
|
try: |
|
import cStringIO as io |
|
except (ImportError, ModuleNotFoundError): |
|
import io |
|
import dumppdf |
|
|
|
import xmind |
|
from xmind.core.topic import TopicElement |
|
|
|
|
|
def toc_to_xmind(outfp, pdf_filename): |
|
""" |
|
Convert table of contents of given PDF file to XMind document. |
|
""" |
|
out_str = io.StringIO() |
|
dumppdf.dumpoutline(out_str, pdf_filename, [], set()) |
|
|
|
|
|
toc_xml = out_str.getvalue() |
|
out_str.close() |
|
root_elem = etree.fromstring(toc_xml) |
|
|
|
|
|
xwb = xmind.load(outfp) |
|
|
|
|
|
s1 = xwb.getPrimarySheet() |
|
s1.setTitle(os.path.split(pdf_filename)[-1]) |
|
root_topic = s1.getRootTopic() |
|
root_topic.setTitle("Contents") |
|
|
|
|
|
topic_stack = [root_topic] |
|
prev_level = 0 |
|
for node in root_elem.iter(): |
|
if 'level' not in node.attrib: |
|
continue |
|
node_level = int(node.attrib['level']) |
|
|
|
|
|
topic = TopicElement(ownerWorkbook=xwb) |
|
title = re.sub(r"^[a-zA-Z]'(.*)'$", r'\1', node.attrib['title']) |
|
topic.setTitle(title) |
|
|
|
|
|
level_difference = node_level - prev_level |
|
for _ in range(-level_difference+1): |
|
topic_stack.pop() |
|
topic_stack[-1].addSubTopic(topic) |
|
topic_stack.append(topic) |
|
prev_level = node_level |
|
|
|
xmind.save(xwb) |
|
|
|
|
|
def main(argv): |
|
""" |
|
Run conversion tool from command line. |
|
""" |
|
import getopt |
|
def usage(): |
|
print ('usage: %s -o outfile.xmind pdf_file.pdf' % argv[0]) |
|
return 100 |
|
try: |
|
(opts, args) = getopt.getopt(argv[1:], 'o:') |
|
except getopt.GetoptError: |
|
return usage() |
|
if not args: |
|
return usage() |
|
|
|
dopts = dict(opts) |
|
outfp = dopts['-o'] |
|
pdf_filename = args[0] |
|
toc_to_xmind(outfp, pdf_filename) |
|
|
|
|
|
if __name__ == '__main__': |
|
sys.exit(main(sys.argv)) |