File size: 4,884 Bytes
91eaff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Convert the markdown generated from Jupyter notebooks to preserve
rendered images, etc.
"""

import os
import pathlib
import re
import sys
import time
import traceback
import typing

from icecream import ic  # pylint: disable=E0401
from selenium import webdriver  # pylint: disable=E0401


class Converter:
    """
HTML/Markdown conversion
    """
    PAT_HEADER = re.compile(r"^(```python\n\# for use.*production:\n.*\n```\n)", re.MULTILINE)
    PAT_SOURCE = re.compile(r"\s+src\=\"(\S+)\"")
    REPLACEMENT_HEADER: str = """
!!! note
    To run this notebook in JupyterLab, load [`examples/{}.ipynb`]({}/examples/{}.ipynb)

    """

    def __init__ (
        self,
        src_url: str,
        ) -> None:
        """
Constructor.
        """
        self.src_url: str = src_url


    def replace_sys_header (
        self,
        text: str,
        stem: str,
        *,
        debug: bool = False,
        ) -> str:
        """
Replace the initial cell in a tutorial notebook.
        """
        output: typing.List[ str ] = []

        for chunk in self.PAT_HEADER.split(text):
            m_header: typing.Optional[ re.Match ] = self.PAT_HEADER.match(chunk)

            if debug:
                ic(m_header)

            if m_header:
                header: str = self.REPLACEMENT_HEADER.format(stem, self.src_url, stem)
                output.append(header)
            else:
                output.append(chunk)

        return "\n".join(output)


    def get_pyvis_html (
        self,
        iframe: str,
        *,
        debug: bool = False,
        ) -> str:
        """
Locate the HTML files generated by `PyVis` if any.
This assumes the HTML files are named `tmp.fig*.*`
        """
        source_html: typing.Optional[ str ] = None
        m_source: typing.Optional[ re.Match ] = self.PAT_SOURCE.search(iframe)

        if m_source:
            source_html = m_source.group(1)

            if debug:
                ic(source_html)

            if "tmp.fig" not in source_html:  # type: ignore
                # <iframe/> wasn't generated by PyVis
                source_html = None

        return source_html  # type: ignore


    def render_screenshot (
        self,
        source_html: str,
        source_png,
        ) -> None:
        """
use Selenium to render `source_png` from `source_html`
        """
        #chrome_path = os.getcwd() + "/chromedriver"
        #chrome_options = Options()

        browser: webdriver.Chrome = webdriver.Chrome()
        browser.get(source_html)
        time.sleep(10)

        browser.get_screenshot_as_file(source_png)
        browser.quit()


    def replace_pyvis_iframe (
        self,
        text: str,
        parent: pathlib.Path,
        stem: str,
        *,
        debug: bool = False,
        ) -> str:
        """
Substitute static images for the rendered graphs.
        """
        output: typing.List[ str ] = []
        in_iframe: bool = False

        for line in text.split("\n"):
            if line.startswith("<iframe"):
                in_iframe = True

            if not in_iframe:
                output.append(line)
            elif line.strip().startswith("src="):
                src_html: str = self.get_pyvis_html(line)
                src_png: str = src_html.replace(".html", ".png")

                if debug:
                    ic(src_png)

                try:
                    os.mkdir(f"{parent}/{stem}_files")
                except:  # pylint: disable=W0702
                    pass

                self.render_screenshot(
                    f"file://{os.getcwd()}/examples/{src_html}",
                    f"{parent}/{stem}_files/{src_png}",
                )

                output.append(f"![png]({stem}_files/{src_png})")

            if line.startswith("></iframe>"):
                in_iframe = False

        return "\n".join(output)


if __name__ == "__main__":
    try:
        conv: Converter = Converter(
            "https://github.com/DerwenAI/textgraphs/blob/main",
        )

        filename: pathlib.Path = pathlib.Path(sys.argv[1])
        _parent: pathlib.Path = filename.parent
        _stem: str = filename.stem

        ic(filename, _parent, _stem)

        with open(filename, "r", encoding = "utf-8") as fp:
            html: str = fp.read()

        html = conv.replace_sys_header(  # pylint: disable=C0103
            html,
            _stem,
            debug = False, # True
        )

        #print(text)
        #sys.exit(0)

        html = conv.replace_pyvis_iframe(  # pylint: disable=C0103
            html,
            _parent,
            _stem,
            debug = True, # False
        )

        with open(filename, "w", encoding = "utf-8") as fp:
            fp.write(html)

    except Exception as ex:  # pylint: disable=W0718
        ic(ex)
        traceback.print_exc()