Spaces:
Sleeping
Sleeping
import yaml | |
import requests | |
from datetime import datetime | |
from typing import Dict, List, Any | |
def fetch_conference_files() -> List[Dict[str, Any]]: | |
"""Fetch all conference YAML files from ccfddl repository.""" | |
# First get the directory listing from GitHub API | |
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI" | |
response = requests.get(api_url) | |
files = response.json() | |
conferences = [] | |
for file in files: | |
if file['name'].endswith('.yml'): | |
yaml_content = requests.get(file['download_url']).text | |
conf_data = yaml.safe_load(yaml_content) | |
# The data is a list with a single item | |
if isinstance(conf_data, list) and len(conf_data) > 0: | |
conferences.append(conf_data[0]) | |
return conferences | |
def parse_date_range(date_str: str, year: str) -> tuple[str, str]: | |
"""Parse various date formats and return start and end dates.""" | |
# Remove the year if it appears at the end of the string | |
date_str = date_str.replace(f", {year}", "") | |
# Handle various date formats | |
try: | |
# Split into start and end dates | |
if ' - ' in date_str: | |
start, end = date_str.split(' - ') | |
elif '-' in date_str: | |
start, end = date_str.split('-') | |
else: | |
# For single date format like "May 19, 2025" | |
start = end = date_str | |
# Clean up month abbreviations | |
month_map = { | |
'Sept': 'September', # Handle Sept before Sep | |
'Jan': 'January', | |
'Feb': 'February', | |
'Mar': 'March', | |
'Apr': 'April', | |
'Jun': 'June', | |
'Jul': 'July', | |
'Aug': 'August', | |
'Sep': 'September', | |
'Oct': 'October', | |
'Nov': 'November', | |
'Dec': 'December' | |
} | |
# Create a set of all month names (full and abbreviated) | |
all_months = set(month_map.keys()) | set(month_map.values()) | |
# Handle cases like "April 29-May 4" | |
has_month = any(month in end for month in all_months) | |
if not has_month: | |
# End is just a day number, use start's month | |
start_parts = start.split() | |
if len(start_parts) >= 1: | |
end = f"{start_parts[0]} {end.strip()}" | |
# Replace month abbreviations | |
for abbr, full in month_map.items(): | |
start = start.replace(abbr, full) | |
end = end.replace(abbr, full) | |
# Clean up any extra spaces | |
start = ' '.join(start.split()) | |
end = ' '.join(end.split()) | |
# Parse start date | |
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y") | |
# Parse end date | |
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y") | |
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') | |
except Exception as e: | |
raise ValueError(f"Could not parse date: {date_str} ({e})") | |
def transform_conference_data(conferences: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | |
"""Transform ccfddl format to our format.""" | |
transformed = [] | |
current_year = datetime.now().year | |
for conf in conferences: | |
# Get the most recent or upcoming conference instance | |
recent_conf = None | |
if 'confs' in conf: | |
for instance in conf['confs']: | |
if instance['year'] >= current_year: | |
recent_conf = instance | |
break | |
if not recent_conf: | |
continue | |
# Transform to our format | |
transformed_conf = { | |
'title': conf.get('title', ''), | |
'year': recent_conf['year'], | |
'id': recent_conf['id'], | |
'full_name': conf.get('description', ''), | |
'link': recent_conf.get('link', ''), | |
'deadline': recent_conf.get('timeline', [{}])[0].get('deadline', ''), | |
'timezone': recent_conf.get('timezone', ''), | |
'place': recent_conf.get('place', ''), | |
'date': recent_conf.get('date', ''), | |
'tags': [], # We'll need to maintain a mapping for tags | |
} | |
# Add optional fields | |
timeline = recent_conf.get('timeline', [{}])[0] | |
if 'abstract_deadline' in timeline: | |
transformed_conf['abstract_deadline'] = timeline['abstract_deadline'] | |
# Parse date range for start/end | |
try: | |
if transformed_conf['date']: | |
start_date, end_date = parse_date_range( | |
transformed_conf['date'], | |
str(transformed_conf['year']) | |
) | |
transformed_conf['start'] = start_date | |
transformed_conf['end'] = end_date | |
except Exception as e: | |
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}") | |
# Add rankings as separate field | |
if 'rank' in conf: | |
rankings = [] | |
for rank_type, rank_value in conf['rank'].items(): | |
rankings.append(f"{rank_type.upper()}: {rank_value}") | |
if rankings: | |
transformed_conf['rankings'] = ', '.join(rankings) | |
transformed.append(transformed_conf) | |
return transformed | |
def main(): | |
try: | |
# Fetch current conferences.yml | |
current_file = 'src/data/conferences.yml' | |
with open(current_file, 'r') as f: | |
current_conferences = yaml.safe_load(f) | |
# Fetch and transform new data | |
new_conferences = fetch_conference_files() | |
if not new_conferences: | |
print("Warning: No conferences fetched from ccfddl") | |
return | |
transformed_conferences = transform_conference_data(new_conferences) | |
if not transformed_conferences: | |
print("Warning: No conferences transformed") | |
return | |
# Create a dictionary of current conferences by ID | |
current_conf_dict = {conf['id']: conf for conf in current_conferences} | |
# Update or add new conferences while preserving existing ones | |
for new_conf in transformed_conferences: | |
if new_conf['id'] in current_conf_dict: | |
# Update existing conference while preserving fields | |
curr_conf = current_conf_dict[new_conf['id']] | |
# Preserve existing fields | |
preserved_fields = [ | |
'tags', 'venue', 'hindex', 'submission_deadline', | |
'timezone_submission', 'rebuttal_period_start', | |
'rebuttal_period_end', 'final_decision_date', | |
'review_release_date', 'commitment_deadline', | |
'start', 'end', 'note' # Added note to preserved fields | |
] | |
for field in preserved_fields: | |
if field in curr_conf: | |
new_conf[field] = curr_conf[field] | |
# If start/end not in current conference but we parsed them, keep the parsed ones | |
if 'start' not in curr_conf and 'start' in new_conf: | |
new_conf['start'] = new_conf['start'] | |
if 'end' not in curr_conf and 'end' in new_conf: | |
new_conf['end'] = new_conf['end'] | |
# Preserve existing rankings if available | |
if 'rankings' in curr_conf: | |
new_conf['rankings'] = curr_conf['rankings'] | |
# Update the conference in the dictionary | |
current_conf_dict[new_conf['id']] = new_conf | |
else: | |
# Add new conference to the dictionary | |
current_conf_dict[new_conf['id']] = new_conf | |
# Convert back to list and sort by deadline | |
all_conferences = list(current_conf_dict.values()) | |
all_conferences.sort(key=lambda x: x.get('deadline', '9999')) | |
# Write back to file with newlines between conferences | |
with open(current_file, 'w') as f: | |
for i, conf in enumerate(all_conferences): | |
if i > 0: | |
f.write('\n\n') # Add two newlines between conferences | |
yaml_str = yaml.dump( | |
[conf], | |
allow_unicode=True, | |
sort_keys=False, | |
default_flow_style=False, | |
explicit_start=False, | |
explicit_end=False, | |
width=float("inf"), | |
indent=2, | |
default_style=None, | |
) | |
f.write(yaml_str.rstrip()) # Remove trailing whitespace | |
# Add final newline | |
f.write('\n') | |
print(f"Successfully updated {len(all_conferences)} conferences") | |
except Exception as e: | |
print(f"Error: {e}") | |
raise | |
if __name__ == "__main__": | |
main() |