import yaml import requests from datetime import datetime from typing import Dict, List, Any def fetch_conference_files() -> List[Dict[str, Any]]: """Fetch all conference YAML files from ccfddl repository.""" # First get the directory listing from GitHub API api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI" response = requests.get(api_url) files = response.json() conferences = [] for file in files: if file['name'].endswith('.yml'): yaml_content = requests.get(file['download_url']).text conf_data = yaml.safe_load(yaml_content) # The data is a list with a single item if isinstance(conf_data, list) and len(conf_data) > 0: conferences.append(conf_data[0]) return conferences def parse_date_range(date_str: str, year: str) -> tuple[str, str]: """Parse various date formats and return start and end dates.""" # Remove the year if it appears at the end of the string date_str = date_str.replace(f", {year}", "") # Handle various date formats try: # Split into start and end dates if ' - ' in date_str: start, end = date_str.split(' - ') elif '-' in date_str: start, end = date_str.split('-') else: # For single date format like "May 19, 2025" start = end = date_str # Clean up month abbreviations month_map = { 'Sept': 'September', # Handle Sept before Sep 'Jan': 'January', 'Feb': 'February', 'Mar': 'March', 'Apr': 'April', 'Jun': 'June', 'Jul': 'July', 'Aug': 'August', 'Sep': 'September', 'Oct': 'October', 'Nov': 'November', 'Dec': 'December' } # Create a set of all month names (full and abbreviated) all_months = set(month_map.keys()) | set(month_map.values()) # Handle cases like "April 29-May 4" has_month = any(month in end for month in all_months) if not has_month: # End is just a day number, use start's month start_parts = start.split() if len(start_parts) >= 1: end = f"{start_parts[0]} {end.strip()}" # Replace month abbreviations for abbr, full in month_map.items(): start = start.replace(abbr, full) end = end.replace(abbr, full) # Clean up any extra spaces start = ' '.join(start.split()) end = ' '.join(end.split()) # Parse start date start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y") # Parse end date end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y") return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') except Exception as e: raise ValueError(f"Could not parse date: {date_str} ({e})") def transform_conference_data(conferences: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Transform ccfddl format to our format.""" transformed = [] current_year = datetime.now().year for conf in conferences: # Get the most recent or upcoming conference instance recent_conf = None if 'confs' in conf: for instance in conf['confs']: if instance['year'] >= current_year: recent_conf = instance break if not recent_conf: continue # Transform to our format transformed_conf = { 'title': conf.get('title', ''), 'year': recent_conf['year'], 'id': recent_conf['id'], 'full_name': conf.get('description', ''), 'link': recent_conf.get('link', ''), 'deadline': recent_conf.get('timeline', [{}])[0].get('deadline', ''), 'timezone': recent_conf.get('timezone', ''), 'place': recent_conf.get('place', ''), 'date': recent_conf.get('date', ''), 'tags': [], # We'll need to maintain a mapping for tags } # Add optional fields timeline = recent_conf.get('timeline', [{}])[0] if 'abstract_deadline' in timeline: transformed_conf['abstract_deadline'] = timeline['abstract_deadline'] # Parse date range for start/end try: if transformed_conf['date']: start_date, end_date = parse_date_range( transformed_conf['date'], str(transformed_conf['year']) ) transformed_conf['start'] = start_date transformed_conf['end'] = end_date except Exception as e: print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}") # Add rankings as separate field if 'rank' in conf: rankings = [] for rank_type, rank_value in conf['rank'].items(): rankings.append(f"{rank_type.upper()}: {rank_value}") if rankings: transformed_conf['rankings'] = ', '.join(rankings) transformed.append(transformed_conf) return transformed def main(): try: # Fetch current conferences.yml current_file = 'src/data/conferences.yml' with open(current_file, 'r') as f: current_conferences = yaml.safe_load(f) # Fetch and transform new data new_conferences = fetch_conference_files() if not new_conferences: print("Warning: No conferences fetched from ccfddl") return transformed_conferences = transform_conference_data(new_conferences) if not transformed_conferences: print("Warning: No conferences transformed") return # Create a dictionary of current conferences by ID current_conf_dict = {conf['id']: conf for conf in current_conferences} # Update or add new conferences while preserving existing ones for new_conf in transformed_conferences: if new_conf['id'] in current_conf_dict: # Update existing conference while preserving fields curr_conf = current_conf_dict[new_conf['id']] # Preserve existing fields preserved_fields = [ 'tags', 'venue', 'hindex', 'submission_deadline', 'timezone_submission', 'rebuttal_period_start', 'rebuttal_period_end', 'final_decision_date', 'review_release_date', 'commitment_deadline', 'start', 'end', 'note' # Added note to preserved fields ] for field in preserved_fields: if field in curr_conf: new_conf[field] = curr_conf[field] # If start/end not in current conference but we parsed them, keep the parsed ones if 'start' not in curr_conf and 'start' in new_conf: new_conf['start'] = new_conf['start'] if 'end' not in curr_conf and 'end' in new_conf: new_conf['end'] = new_conf['end'] # Preserve existing rankings if available if 'rankings' in curr_conf: new_conf['rankings'] = curr_conf['rankings'] # Update the conference in the dictionary current_conf_dict[new_conf['id']] = new_conf else: # Add new conference to the dictionary current_conf_dict[new_conf['id']] = new_conf # Convert back to list and sort by deadline all_conferences = list(current_conf_dict.values()) all_conferences.sort(key=lambda x: x.get('deadline', '9999')) # Write back to file with newlines between conferences with open(current_file, 'w') as f: for i, conf in enumerate(all_conferences): if i > 0: f.write('\n\n') # Add two newlines between conferences yaml_str = yaml.dump( [conf], allow_unicode=True, sort_keys=False, default_flow_style=False, explicit_start=False, explicit_end=False, width=float("inf"), indent=2, default_style=None, ) f.write(yaml_str.rstrip()) # Remove trailing whitespace # Add final newline f.write('\n') print(f"Successfully updated {len(all_conferences)} conferences") except Exception as e: print(f"Error: {e}") raise if __name__ == "__main__": main()