"""Diff visualization utilities.""" import difflib from typing import Iterator def generate_html_diff(original: str, cleaned: str) -> str: """Generate HTML diff between original and cleaned text.""" differ = difflib.HtmlDiff(tabsize=2, wrapcolumn=80) original_lines = original.splitlines(keepends=True) cleaned_lines = cleaned.splitlines(keepends=True) html_diff = differ.make_file( original_lines, cleaned_lines, "Original Text", "Cleaned Text", context=True, numlines=3 ) return _style_diff_html(html_diff) def _style_diff_html(html_diff: str) -> str: """Add custom styling to the diff HTML.""" custom_style = """ """ # Insert custom style before closing tag styled_html = html_diff.replace('', f'{custom_style}') # Add diff class to the table styled_html = styled_html.replace('
', '
') return styled_html def get_diff_stats(original: str, cleaned: str) -> dict[str, int]: """Get statistics about the diff between original and cleaned text.""" differ = difflib.SequenceMatcher(None, original, cleaned) return { 'original_length': len(original), 'cleaned_length': len(cleaned), 'similarity_ratio': round(differ.ratio() * 100, 2), 'characters_removed': len(original) - len(cleaned) }