#!/usr/bin/env python # -*- coding: utf-8 -*- """ Convert the markdown generated from Jupyter notebooks to preserve rendered images, etc. """ import os import pathlib import re import sys import time import traceback import typing from icecream import ic # pylint: disable=E0401 from selenium import webdriver # pylint: disable=E0401 class Converter: """ HTML/Markdown conversion """ PAT_HEADER = re.compile(r"^(```python\n\# for use.*production:\n.*\n```\n)", re.MULTILINE) PAT_SOURCE = re.compile(r"\s+src\=\"(\S+)\"") REPLACEMENT_HEADER: str = """ !!! note To run this notebook in JupyterLab, load [`examples/{}.ipynb`]({}/examples/{}.ipynb) """ def __init__ ( self, src_url: str, ) -> None: """ Constructor. """ self.src_url: str = src_url def replace_sys_header ( self, text: str, stem: str, *, debug: bool = False, ) -> str: """ Replace the initial cell in a tutorial notebook. """ output: typing.List[ str ] = [] for chunk in self.PAT_HEADER.split(text): m_header: typing.Optional[ re.Match ] = self.PAT_HEADER.match(chunk) if debug: ic(m_header) if m_header: header: str = self.REPLACEMENT_HEADER.format(stem, self.src_url, stem) output.append(header) else: output.append(chunk) return "\n".join(output) def get_pyvis_html ( self, iframe: str, *, debug: bool = False, ) -> str: """ Locate the HTML files generated by `PyVis` if any. This assumes the HTML files are named `tmp.fig*.*` """ source_html: typing.Optional[ str ] = None m_source: typing.Optional[ re.Match ] = self.PAT_SOURCE.search(iframe) if m_source: source_html = m_source.group(1) if debug: ic(source_html) if "tmp.fig" not in source_html: # type: ignore # <iframe/> wasn't generated by PyVis source_html = None return source_html # type: ignore def render_screenshot ( self, source_html: str, source_png, ) -> None: """ use Selenium to render `source_png` from `source_html` """ #chrome_path = os.getcwd() + "/chromedriver" #chrome_options = Options() browser: webdriver.Chrome = webdriver.Chrome() browser.get(source_html) time.sleep(10) browser.get_screenshot_as_file(source_png) browser.quit() def replace_pyvis_iframe ( self, text: str, parent: pathlib.Path, stem: str, *, debug: bool = False, ) -> str: """ Substitute static images for the rendered graphs. """ output: typing.List[ str ] = [] in_iframe: bool = False for line in text.split("\n"): if line.startswith("<iframe"): in_iframe = True if not in_iframe: output.append(line) elif line.strip().startswith("src="): src_html: str = self.get_pyvis_html(line) src_png: str = src_html.replace(".html", ".png") if debug: ic(src_png) try: os.mkdir(f"{parent}/{stem}_files") except: # pylint: disable=W0702 pass self.render_screenshot( f"file://{os.getcwd()}/examples/{src_html}", f"{parent}/{stem}_files/{src_png}", ) output.append(f"") if line.startswith("></iframe>"): in_iframe = False return "\n".join(output) if __name__ == "__main__": try: conv: Converter = Converter( "https://github.com/DerwenAI/textgraphs/blob/main", ) filename: pathlib.Path = pathlib.Path(sys.argv[1]) _parent: pathlib.Path = filename.parent _stem: str = filename.stem ic(filename, _parent, _stem) with open(filename, "r", encoding = "utf-8") as fp: html: str = fp.read() html = conv.replace_sys_header( # pylint: disable=C0103 html, _stem, debug = False, # True ) #print(text) #sys.exit(0) html = conv.replace_pyvis_iframe( # pylint: disable=C0103 html, _parent, _stem, debug = True, # False ) with open(filename, "w", encoding = "utf-8") as fp: fp.write(html) except Exception as ex: # pylint: disable=W0718 ic(ex) traceback.print_exc()