Spaces:
Paused
Paused
File size: 1,670 Bytes
84deff7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# arxiv_metadata_service.py
from typing import List, Dict, Any
import logging
from huggingface_dataset_manager import HuggingFaceDatasetManager
from arxiv_fetcher import fetch_arxiv_metadata
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class ArxivMetadataService:
def __init__(self, dataset_name: str):
self.dataset_manager = HuggingFaceDatasetManager(dataset_name)
def fetch_and_persist_metadata(self, query: str, max_results: int = 10) -> bool:
metadata_list = fetch_arxiv_metadata(query, max_results)
if not metadata_list:
logging.warning("No metadata fetched to persist.")
return False
return self.dataset_manager.persist_to_dataset(metadata_list)
def test_arxiv_metadata_service():
# Use a test dataset name
test_dataset_name = "dwb2023/arxiv-papers-dataset"
service = ArxivMetadataService(test_dataset_name)
# Test query
test_query = "quantum computing"
max_results = 5
success = service.fetch_and_persist_metadata(test_query, max_results)
if success:
print(f"Successfully fetched and persisted metadata for query: '{test_query}'")
else:
print(f"Failed to fetch and persist metadata for query: '{test_query}'")
# Get and print dataset info
info = service.dataset_manager.get_dataset_info()
print("\nDataset Info:")
print(f"Number of rows: {info.get('num_rows', 'N/A')}")
print(f"Features: {info.get('features', 'N/A')}")
print(f"Last modified: {info.get('last_modified', 'N/A')}")
if __name__ == "__main__":
test_arxiv_metadata_service()
|