Spaces:
Running
Running
File size: 9,164 Bytes
bd501fe 5c4b399 bd501fe 5c4b399 bd501fe 5c4b399 bd501fe 5c4b399 bd501fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
import yaml
import requests
from datetime import datetime
from typing import Dict, List, Any
def fetch_conference_files() -> List[Dict[str, Any]]:
"""Fetch all conference YAML files from ccfddl repository."""
# First get the directory listing from GitHub API
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI"
response = requests.get(api_url)
files = response.json()
conferences = []
for file in files:
if file['name'].endswith('.yml'):
yaml_content = requests.get(file['download_url']).text
conf_data = yaml.safe_load(yaml_content)
# The data is a list with a single item
if isinstance(conf_data, list) and len(conf_data) > 0:
conferences.append(conf_data[0])
return conferences
def parse_date_range(date_str: str, year: str) -> tuple[str, str]:
"""Parse various date formats and return start and end dates."""
# Remove the year if it appears at the end of the string
date_str = date_str.replace(f", {year}", "")
# Handle various date formats
try:
# Split into start and end dates
if ' - ' in date_str:
start, end = date_str.split(' - ')
elif '-' in date_str:
start, end = date_str.split('-')
else:
# For single date format like "May 19, 2025"
start = end = date_str
# Clean up month abbreviations
month_map = {
'Sept': 'September', # Handle Sept before Sep
'Jan': 'January',
'Feb': 'February',
'Mar': 'March',
'Apr': 'April',
'Jun': 'June',
'Jul': 'July',
'Aug': 'August',
'Sep': 'September',
'Oct': 'October',
'Nov': 'November',
'Dec': 'December'
}
# Create a set of all month names (full and abbreviated)
all_months = set(month_map.keys()) | set(month_map.values())
# Handle cases like "April 29-May 4"
has_month = any(month in end for month in all_months)
if not has_month:
# End is just a day number, use start's month
start_parts = start.split()
if len(start_parts) >= 1:
end = f"{start_parts[0]} {end.strip()}"
# Replace month abbreviations
for abbr, full in month_map.items():
start = start.replace(abbr, full)
end = end.replace(abbr, full)
# Clean up any extra spaces
start = ' '.join(start.split())
end = ' '.join(end.split())
# Parse start date
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y")
# Parse end date
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y")
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')
except Exception as e:
raise ValueError(f"Could not parse date: {date_str} ({e})")
def transform_conference_data(conferences: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Transform ccfddl format to our format."""
transformed = []
current_year = datetime.now().year
for conf in conferences:
# Get the most recent or upcoming conference instance
recent_conf = None
if 'confs' in conf:
for instance in conf['confs']:
if instance['year'] >= current_year:
recent_conf = instance
break
if not recent_conf:
continue
# Transform to our format
transformed_conf = {
'title': conf.get('title', ''),
'year': recent_conf['year'],
'id': recent_conf['id'],
'full_name': conf.get('description', ''),
'link': recent_conf.get('link', ''),
'deadline': recent_conf.get('timeline', [{}])[0].get('deadline', ''),
'timezone': recent_conf.get('timezone', ''),
'place': recent_conf.get('place', ''),
'date': recent_conf.get('date', ''),
'tags': [], # We'll need to maintain a mapping for tags
}
# Add optional fields
timeline = recent_conf.get('timeline', [{}])[0]
if 'abstract_deadline' in timeline:
transformed_conf['abstract_deadline'] = timeline['abstract_deadline']
# Parse date range for start/end
try:
if transformed_conf['date']:
start_date, end_date = parse_date_range(
transformed_conf['date'],
str(transformed_conf['year'])
)
transformed_conf['start'] = start_date
transformed_conf['end'] = end_date
except Exception as e:
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}")
# Add rankings as separate field
if 'rank' in conf:
rankings = []
for rank_type, rank_value in conf['rank'].items():
rankings.append(f"{rank_type.upper()}: {rank_value}")
if rankings:
transformed_conf['rankings'] = ', '.join(rankings)
transformed.append(transformed_conf)
return transformed
def main():
try:
# Fetch current conferences.yml
current_file = 'src/data/conferences.yml'
with open(current_file, 'r') as f:
current_conferences = yaml.safe_load(f)
# Fetch and transform new data
new_conferences = fetch_conference_files()
if not new_conferences:
print("Warning: No conferences fetched from ccfddl")
return
transformed_conferences = transform_conference_data(new_conferences)
if not transformed_conferences:
print("Warning: No conferences transformed")
return
# Create a dictionary of current conferences by ID
current_conf_dict = {conf['id']: conf for conf in current_conferences}
# Update or add new conferences while preserving existing ones
for new_conf in transformed_conferences:
if new_conf['id'] in current_conf_dict:
# Update existing conference while preserving fields
curr_conf = current_conf_dict[new_conf['id']]
# Preserve existing fields
preserved_fields = [
'tags', 'venue', 'hindex', 'submission_deadline',
'timezone_submission', 'rebuttal_period_start',
'rebuttal_period_end', 'final_decision_date',
'review_release_date', 'commitment_deadline',
'start', 'end', 'note' # Added note to preserved fields
]
for field in preserved_fields:
if field in curr_conf:
new_conf[field] = curr_conf[field]
# If start/end not in current conference but we parsed them, keep the parsed ones
if 'start' not in curr_conf and 'start' in new_conf:
new_conf['start'] = new_conf['start']
if 'end' not in curr_conf and 'end' in new_conf:
new_conf['end'] = new_conf['end']
# Preserve existing rankings if available
if 'rankings' in curr_conf:
new_conf['rankings'] = curr_conf['rankings']
# Update the conference in the dictionary
current_conf_dict[new_conf['id']] = new_conf
else:
# Add new conference to the dictionary
current_conf_dict[new_conf['id']] = new_conf
# Convert back to list and sort by deadline
all_conferences = list(current_conf_dict.values())
all_conferences.sort(key=lambda x: x.get('deadline', '9999'))
# Write back to file with newlines between conferences
with open(current_file, 'w') as f:
for i, conf in enumerate(all_conferences):
if i > 0:
f.write('\n\n') # Add two newlines between conferences
yaml_str = yaml.dump(
[conf],
allow_unicode=True,
sort_keys=False,
default_flow_style=False,
explicit_start=False,
explicit_end=False,
width=float("inf"),
indent=2,
default_style=None,
)
f.write(yaml_str.rstrip()) # Remove trailing whitespace
# Add final newline
f.write('\n')
print(f"Successfully updated {len(all_conferences)} conferences")
except Exception as e:
print(f"Error: {e}")
raise
if __name__ == "__main__":
main() |