# arxiv_fetcher.py import arxiv from typing import List, Dict, Any import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def fetch_arxiv_metadata(query: str, max_results: int = 10) -> List[Dict[str, Any]]: logging.info(f"Fetching arXiv metadata for query: {query}") if not query.strip(): logging.warning("Empty or whitespace-only query provided") return [] client = arxiv.Client(page_size=max_results, delay_seconds=3, num_retries=3) search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate) results = [] try: for result in client.results(search): metadata = { "title": result.title, "authors": [author.name for author in result.authors], "published": result.published.isoformat(), "updated": result.updated.isoformat(), "pdf_url": result.pdf_url, "entry_id": result.entry_id, "summary": result.summary, "categories": result.categories, "primary_category": result.primary_category, "html_url": f"http://arxiv.org/abs/{result.entry_id.split('/')[-1]}" } results.append(metadata) logging.info(f"Fetched metadata for {len(results)} papers") except Exception as e: logging.error(f"Error fetching metadata: {str(e)}") return results