import re
import base64
import json
from typing import Dict, List, Optional, Tuple, Union
from pathlib import Path
from utils import apply_search_replace_changes, validate_video_html
from media_generation import generate_image_with_qwen, generate_image_to_image, generate_video_from_image, generate_video_from_text, generate_music_from_text
from config import SEARCH_START, DIVIDER, REPLACE_END
class CodeProcessor:
"""Handles processing and transformation of various code formats"""
@staticmethod
def is_streamlit_code(code: str) -> bool:
"""Check if Python code is a Streamlit app"""
if not code:
return False
lowered = code.lower()
return ("import streamlit" in lowered) or ("from streamlit" in lowered) or ("st." in code and "streamlit" in lowered)
@staticmethod
def is_gradio_code(code: str) -> bool:
"""Check if Python code is a Gradio app"""
if not code:
return False
lowered = code.lower()
return (
"import gradio" in lowered or
"from gradio" in lowered or
"gr.Interface(" in code or
"gr.Blocks(" in code
)
@staticmethod
def extract_html_document(text: str) -> str:
"""Extract HTML document from text, ignoring planning notes"""
if not text:
return text
lower = text.lower()
idx = lower.find(" Dict[str, str]:
"""Parse transformers.js output and extract the three files"""
files = {
'index.html': '',
'index.js': '',
'style.css': ''
}
if not text:
return files
# Multiple patterns for different code block variations
html_patterns = [
r'```html\s*\n([\s\S]*?)(?:```|\Z)',
r'```htm\s*\n([\s\S]*?)(?:```|\Z)',
r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)'
]
js_patterns = [
r'```javascript\s*\n([\s\S]*?)(?:```|\Z)',
r'```js\s*\n([\s\S]*?)(?:```|\Z)',
r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)'
]
css_patterns = [
r'```css\s*\n([\s\S]*?)(?:```|\Z)',
r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)'
]
# Extract content using patterns
for pattern in html_patterns:
html_match = re.search(pattern, text, re.IGNORECASE)
if html_match:
files['index.html'] = html_match.group(1).strip()
break
for pattern in js_patterns:
js_match = re.search(pattern, text, re.IGNORECASE)
if js_match:
files['index.js'] = js_match.group(1).strip()
break
for pattern in css_patterns:
css_match = re.search(pattern, text, re.IGNORECASE)
if css_match:
files['style.css'] = css_match.group(1).strip()
break
# Fallback: support === filename === format
if not (files['index.html'] and files['index.js'] and files['style.css']):
fallback_files = MultipageProcessor.parse_multipage_html_output(text)
for key in files.keys():
if key in fallback_files:
files[key] = fallback_files[key]
return files
@staticmethod
def format_transformers_js_output(files: Dict[str, str]) -> str:
"""Format the three files into a single display string"""
output = []
output.append("=== index.html ===")
output.append(files.get('index.html', ''))
output.append("\n=== index.js ===")
output.append(files.get('index.js', ''))
output.append("\n=== style.css ===")
output.append(files.get('style.css', ''))
return '\n'.join(output)
@staticmethod
def build_transformers_inline_html(files: Dict[str, str]) -> str:
"""Merge transformers.js files into a single HTML document"""
html = files.get('index.html') or ''
js = files.get('index.js') or ''
css = files.get('style.css') or ''
# Normalize JS imports to stable CDN
cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2"
def _normalize_imports(_code: str) -> str:
if not _code:
return _code or ""
_code = re.sub(r"from\s+['\"]@huggingface/transformers['\"]", f"from '{cdn_url}'", _code)
_code = re.sub(r"from\s+['\"]@xenova/transformers['\"]", f"from '{cdn_url}'", _code)
_code = re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code)
_code = re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code)
return _code
# Extract and merge inline module scripts
inline_modules = []
try:
for _m in re.finditer(r"", html, flags=re.IGNORECASE):
inline_modules.append(_m.group(1))
if inline_modules:
html = re.sub(r"\s*", "", html, flags=re.IGNORECASE)
except Exception:
pass
# Combine JS code
combined_js_parts = []
if inline_modules:
combined_js_parts.append("\n\n".join(inline_modules))
if js:
combined_js_parts.append(js)
js = "\n\n".join([p for p in combined_js_parts if (p and p.strip())])
js = _normalize_imports(js)
# Add prelude for better compatibility
if js.strip():
prelude = (
f"import {{ env }} from '{cdn_url}';\n"
"try { env.useBrowserCache = false; } catch (e) {}\n"
"try { if (env && env.backends && env.backends.onnx && env.backends.onnx.wasm) { env.backends.onnx.wasm.numThreads = 1; env.backends.onnx.wasm.proxy = false; } } catch (e) {}\n"
f"(async () => {{ try {{ if (typeof globalThis.transformers === 'undefined') {{ const m = await import('{cdn_url}'); globalThis.transformers = m; }} }} catch (e) {{}} }})();\n"
)
js = prelude + js
# Create minimal shell if needed
doc = html.strip()
if not doc or ('\n"
"\n
\n\n\nTransformers.js App\n\n"
"\n\n\n"
)
# Remove local file references
doc = re.sub(r"]+href=\"[^\"]*style\.css\"[^>]*>\s*", "", doc, flags=re.IGNORECASE)
doc = re.sub(r"\s*", "", doc, flags=re.IGNORECASE)
# Inline CSS
if css:
style_tag = f""
if '' in doc.lower():
match = re.search(r"", doc, flags=re.IGNORECASE)
if match:
idx = match.start()
doc = doc[:idx] + style_tag + doc[idx:]
else:
match = re.search(r"]*>", doc, flags=re.IGNORECASE)
if match:
idx = match.end()
doc = doc[:idx] + "\n" + style_tag + doc[idx:]
else:
doc = style_tag + doc
# Inline JS with debugging and cleanup
if js:
script_tag = f""
debug_overlay = TransformersJSProcessor._create_debug_overlay()
cleanup_tag = TransformersJSProcessor._create_cleanup_script()
match = re.search(r"", doc, flags=re.IGNORECASE)
if match:
idx = match.start()
doc = doc[:idx] + debug_overlay + script_tag + cleanup_tag + doc[idx:]
else:
doc = doc + debug_overlay + script_tag + cleanup_tag
return doc
@staticmethod
def _create_debug_overlay() -> str:
"""Create debug overlay for transformers.js apps"""
return (
"\n"
"\n"
""
)
@staticmethod
def _create_cleanup_script() -> str:
"""Create cleanup script for transformers.js apps"""
return (
""
)
class SvelteProcessor:
"""Handles Svelte specific code processing"""
@staticmethod
def parse_svelte_output(text: str) -> Dict[str, str]:
"""Parse Svelte output to extract individual files"""
files = {
'src/App.svelte': '',
'src/app.css': ''
}
if not text:
return files
# Extract using code block patterns
svelte_pattern = r'```svelte\s*\n([\s\S]+?)\n```'
css_pattern = r'```css\s*\n([\s\S]+?)\n```'
svelte_match = re.search(svelte_pattern, text, re.IGNORECASE)
css_match = re.search(css_pattern, text, re.IGNORECASE)
if svelte_match:
files['src/App.svelte'] = svelte_match.group(1).strip()
if css_match:
files['src/app.css'] = css_match.group(1).strip()
# Fallback: support === filename === format
if not (files['src/App.svelte'] and files['src/app.css']):
fallback_files = MultipageProcessor.parse_multipage_html_output(text)
for key in files.keys():
if key in fallback_files:
files[key] = fallback_files[key]
return files
@staticmethod
def format_svelte_output(files: Dict[str, str]) -> str:
"""Format Svelte files into a single display string"""
output = []
output.append("=== src/App.svelte ===")
output.append(files.get('src/App.svelte', ''))
output.append("\n=== src/app.css ===")
output.append(files.get('src/app.css', ''))
return '\n'.join(output)
class MultipageProcessor:
"""Handles multi-page HTML projects"""
@staticmethod
def parse_multipage_html_output(text: str) -> Dict[str, str]:
"""Parse multi-page HTML output formatted as === filename === sections"""
if not text:
return {}
from utils import remove_code_block
cleaned = remove_code_block(text)
files: Dict[str, str] = {}
pattern = re.compile(r"^===\s*([^=\n]+?)\s*===\s*\n([\s\S]*?)(?=\n===\s*[^=\n]+?\s*===|\Z)", re.MULTILINE)
for m in pattern.finditer(cleaned):
name = m.group(1).strip()
content = m.group(2).strip()
# Remove accidental trailing fences
content = re.sub(r"^```\w*\s*\n|\n```\s*$", "", content)
files[name] = content
return files
@staticmethod
def format_multipage_output(files: Dict[str, str]) -> str:
"""Format files back into === filename === sections"""
if not isinstance(files, dict) or not files:
return ""
# Order with index.html first
ordered_paths = []
if 'index.html' in files:
ordered_paths.append('index.html')
for path in sorted(files.keys()):
if path == 'index.html':
continue
ordered_paths.append(path)
parts: List[str] = []
for path in ordered_paths:
parts.append(f"=== {path} ===")
parts.append((files.get(path) or '').rstrip())
return "\n".join(parts)
@staticmethod
def validate_and_autofix_files(files: Dict[str, str]) -> Dict[str, str]:
"""Ensure minimal contract for multi-file sites"""
if not isinstance(files, dict) or not files:
return files or {}
normalized: Dict[str, str] = {}
for k, v in files.items():
safe_key = k.strip().lstrip('/')
normalized[safe_key] = v
html_files = [p for p in normalized.keys() if p.lower().endswith('.html')]
has_index = 'index.html' in normalized
# Create index.html if missing but other HTML files exist
if not has_index and html_files:
links = '\n'.join([f"{p}" for p in html_files])
normalized['index.html'] = (
"\n\n\n\n"
"\n"
"Site Index\n\n\nSite
\n\n\n"
)
# Collect asset references
asset_refs: set[str] = set()
patterns = [
re.compile(r"]+href=\"([^\"]+)\""),
re.compile(r""
return match.group(0)
doc = re.sub(r"", _inline_js, doc, flags=re.IGNORECASE)
# Add client-side navigation for other pages
MultipageProcessor._add_client_side_navigation(doc, files)
return doc
@staticmethod
def _add_client_side_navigation(doc: str, files: Dict[str, str]) -> str:
"""Add client-side navigation for multi-page preview"""
try:
html_pages = {k: v for k, v in files.items() if k.lower().endswith('.html')}
# Extract body content for each page
_index_body = re.search(r"]*>([\s\S]*?)", doc, flags=re.IGNORECASE)
html_pages['index.html'] = _index_body.group(1) if _index_body else doc
encoded = base64.b64encode(json.dumps(html_pages).encode('utf-8')).decode('ascii')
nav_script = (
""
)
m = re.search(r"