import re from typing import List, Dict import os def parse_generation_history(file_path: str) -> Dict[int, List[str]]: """Improved parser that handles math symbols and spaces correctly""" history = {} token_pattern = re.compile(r"\*([^&]*)&?") with open(file_path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue try: step_part, content_part = line.split(',', 1) step = int(step_part.strip()) except ValueError: continue tokens = [] for match in token_pattern.finditer(content_part): raw_token = match.group(1).strip() if raw_token == "": tokens.append(" ") elif raw_token == "*": tokens.append("*") else: tokens.append(raw_token) while len(tokens) < 64: tokens.append(" ") if len(tokens) > 64: print(f"Truncating extra tokens: Step {step} ({len(tokens)} tokens)") tokens = tokens[:64] elif len(tokens) < 64: print(f"Padding missing tokens: Step {step} ({len(tokens)} tokens)") tokens += [" "] * (64 - len(tokens)) tokens = tokens[:62] history[step] = tokens return history def track_token_positions(history: Dict[int, List[str]]) -> List[int]: """Track the first generation step for each token""" num_positions = 64 steps_to_unmask = [-1] * num_positions for step in sorted(history.keys()): tokens = history[step] for idx in range(num_positions): if idx >= len(tokens): continue token = tokens[idx] if steps_to_unmask[idx] == -1 and token != '<|mdm_mask|>': steps_to_unmask[idx] = step return steps_to_unmask def generate_background_color(step: int, max_step: int) -> str: """Generate gradient color for text (darker version)""" color_stops = [ (176, 202, 224), (143, 179, 207), (110, 156, 191), (80, 130, 240), (40, 90, 200), (20, 70, 180), (0, 50, 160), ] color_index = min(int(step ** 0.7 / max_step ** 0.7 * 6), 6) ratio = (step % 2) / 2 start = color_stops[color_index] end = color_stops[min(color_index + 1, 6)] r = int(start[0] + (end[0] - start[0]) * ratio) g = int(start[1] + (end[1] - start[1]) * ratio) b = int(start[2] + (end[2] - start[2]) * ratio) return f"#{r:02x}{g:02x}{b:02x}" def generate_step_visualization(current_step: int, current_tokens: List[str], token_steps: List[int], max_step: int) -> str: """Final visualization version (completely borderless)""" html = [] for idx, token in enumerate(current_tokens): style = [ "padding: 6px 8px", "margin: 2px", "border-radius: 6px", "display: inline-block", "font-weight: 600", "font-size: 16px", "font-family: 'Segoe UI', sans-serif", "transition: all 0.2s ease", "width: 120px", "min-width: 120px", "text-align: center", "white-space: nowrap", "overflow: hidden", "text-overflow: ellipsis", "box-sizing: border-box", "vertical-align: middle", "border: 0 !important", "outline: 0 !important", "box-shadow: none !important", "position: relative", "z-index: 1" ] if token == '<|mdm_mask|>': style.extend([ "color: transparent", "background: #f8fafc", "text-shadow: none" ]) display_text = "" else: text_color = generate_background_color(token_steps[idx], max_step) style.append(f"color: {text_color}") display_text = token if token != " " else "␣" html.append(f'''