from typing import List, Optional, Dict from smolagents import Tool class HFLinkReportTool(Tool): """Generate a single-layout HTML report (cards + counters) from a final textual answer. The tool extracts links from the provided text, categorizes them (HF models/datasets/spaces/papers, blogs, repos, videos, news), and renders a consistent link report. Always returns a full HTML document (starts with ).""" name = "hf_links_to_report" description = ( "Create an HTML report from a final answer text. The tool parses links, groups them into categories " "(Hugging Face models/datasets/spaces/papers and external resources like blogs/repos/videos/news), and renders cards. " "Inputs: final_answer (string, required), query (string, optional), title (string, optional). Returns an HTML document." ) inputs = { "final_answer": {"type": "string", "description": "Final answer text containing inline links"}, "query": {"type": "string", "description": "Original user intent or topic", "nullable": True}, "title": {"type": "string", "description": "Dashboard title", "nullable": True}, } output_type = "string" def forward(self, final_answer: str, query: Optional[str] = None, title: Optional[str] = None) -> str: try: import re import json as _json doc_title = title or "Report" query = (query or "").strip() # Extract URLs urls = re.findall(r"https?://[^\s)\]]+", final_answer or "") # Categorize cats = { "models": [], "datasets": [], "spaces": [], "papers": [], "blogs": [], "repos": [], "videos": [], "news": [], "other": [] } for u in urls: low = u.lower() if "huggingface.co/" in low: # Prefer explicit kinds first to avoid misclassifying /datasets/* as generic owner/repo if "/datasets/" in low: cats["datasets"].append(u) elif "/spaces/" in low: cats["spaces"].append(u) elif "/papers/" in low: cats["papers"].append(u) elif "/models/" in low: cats["models"].append(u) else: # Treat bare owner/repo as models only if it is NOT under known sections # e.g., huggingface.co/owner/repo → model repo; huggingface.co/blog/... → blog m = re.search(r"huggingface\.co/([^/]+)/([^/]+)$", low) if m and m.group(1) not in {"datasets", "spaces", "papers", "blog", "learn", "docs", "organizations", "collections"}: cats["models"].append(u) else: cats["blogs"].append(u) elif "github.com" in low: cats["repos"].append(u) elif "youtube.com" in low or "youtu.be" in low: cats["videos"].append(u) elif any(d in low for d in ["arxiv.org", "medium.com", "towardsdatascience.com", "huggingface.co/blog", "huggingface.co/learn"]): cats["blogs"].append(u) elif any(d in low for d in ["theverge.com", "techcrunch.com", "venturebeat.com", "wired.com", "mit.edu"]): cats["news"].append(u) else: cats["other"].append(u) def chips_section(): chips = [ ("Models", len(cats["models"])), ("Datasets", len(cats["datasets"])), ("Spaces", len(cats["spaces"])), ("Papers", len(cats["papers"])), ("Blogs/Docs", len(cats["blogs"])), ("Repos", len(cats["repos"])), ("Videos", len(cats["videos"])), ("News", len(cats["news"])) ] return "\n".join([f"
Error generating report: {str(e)}"