# arxiv_fetcher.py

import arxiv
from typing import List, Dict, Any
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def fetch_arxiv_metadata(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
    logging.info(f"Fetching arXiv metadata for query: {query}")
    if not query.strip():
        logging.warning("Empty or whitespace-only query provided")
        return []
    
    client = arxiv.Client(page_size=max_results, delay_seconds=3, num_retries=3)
    search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate)
    
    results = []
    try:
        for result in client.results(search):
            metadata = {
                "title": result.title,
                "authors": [author.name for author in result.authors],
                "published": result.published.isoformat(),
                "updated": result.updated.isoformat(),
                "pdf_url": result.pdf_url,
                "entry_id": result.entry_id,
                "summary": result.summary,
                "categories": result.categories,
                "primary_category": result.primary_category,
                "html_url": f"http://arxiv.org/abs/{result.entry_id.split('/')[-1]}"
            }
            results.append(metadata)
        logging.info(f"Fetched metadata for {len(results)} papers")
    except Exception as e:
        logging.error(f"Error fetching metadata: {str(e)}")
    
    return results