File size: 2,628 Bytes
f867c73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#-*- coding: utf-8 -*-
"""
Module to convert table of contents of PDF file to mindmap format.

@author     Lucas Koelman
@date       20/02/2018

@see        https://github.com/pdfminer/pdfminer.six/blob/master/tools/dumppdf.py
@see        https://github.com/xmindltd/xmind-sdk-python/blob/master/example.py
"""

# standard library
import sys, os, re
import xml.etree.ElementTree as etree
try:
    import cStringIO as io # Python 2
except (ImportError, ModuleNotFoundError):
    import io # Python 3
import dumppdf

import xmind
from xmind.core.topic import TopicElement


def toc_to_xmind(outfp, pdf_filename):
    """
    Convert table of contents of given PDF file to XMind document.
    """
    out_str = io.StringIO()
    dumppdf.dumpoutline(out_str, pdf_filename, [], set())

    # Parse XML
    toc_xml = out_str.getvalue()
    out_str.close() # no 'with' statement possible
    root_elem = etree.fromstring(toc_xml)

    # Convert XML to XMind document
    xwb = xmind.load(outfp) # load an existing file or create a new workbook if nothing is found

    # Create XMind workbook
    s1 = xwb.getPrimarySheet()
    s1.setTitle(os.path.split(pdf_filename)[-1])
    root_topic = s1.getRootTopic()
    root_topic.setTitle("Contents")

    # Transform each XML node into a mindmap node
    topic_stack = [root_topic] # length will always equal depth/level during traversal
    prev_level = 0
    for node in root_elem.iter(): # depth-first traversal
        if 'level' not in node.attrib:
            continue # irrelevant node
        node_level = int(node.attrib['level'])
        
        # Create topic for this node
        topic = TopicElement(ownerWorkbook=xwb)
        title = re.sub(r"^[a-zA-Z]'(.*)'$", r'\1', node.attrib['title'])
        topic.setTitle(title)
        
        # Add it to the topic tree
        level_difference = node_level - prev_level
        for _ in range(-level_difference+1): # negative yields empty list
            topic_stack.pop()
        topic_stack[-1].addSubTopic(topic)
        topic_stack.append(topic)
        prev_level = node_level

    xmind.save(xwb)


def main(argv):
    """
    Run conversion tool from command line.
    """
    import getopt
    def usage():
        print ('usage: %s -o outfile.xmind pdf_file.pdf' % argv[0])
        return 100
    try:
        (opts, args) = getopt.getopt(argv[1:], 'o:')
    except getopt.GetoptError:
        return usage()
    if not args:
        return usage()

    dopts = dict(opts)
    outfp = dopts['-o']
    pdf_filename = args[0]
    toc_to_xmind(outfp, pdf_filename)


if __name__ == '__main__':
    sys.exit(main(sys.argv))