File size: 9,164 Bytes
bd501fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c4b399
bd501fe
 
 
 
 
5c4b399
bd501fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c4b399
bd501fe
 
 
 
 
 
 
 
 
 
 
5c4b399
 
 
bd501fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import yaml
import requests
from datetime import datetime
from typing import Dict, List, Any


def fetch_conference_files() -> List[Dict[str, Any]]:
    """Fetch all conference YAML files from ccfddl repository."""
    
    # First get the directory listing from GitHub API
    api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI"
    response = requests.get(api_url)
    files = response.json()
    
    conferences = []
    for file in files:
        if file['name'].endswith('.yml'):
            yaml_content = requests.get(file['download_url']).text
            conf_data = yaml.safe_load(yaml_content)
            # The data is a list with a single item
            if isinstance(conf_data, list) and len(conf_data) > 0:
                conferences.append(conf_data[0])
    
    return conferences


def parse_date_range(date_str: str, year: str) -> tuple[str, str]:
    """Parse various date formats and return start and end dates."""
    # Remove the year if it appears at the end of the string
    date_str = date_str.replace(f", {year}", "")
    
    # Handle various date formats
    try:
        # Split into start and end dates
        if ' - ' in date_str:
            start, end = date_str.split(' - ')
        elif '-' in date_str:
            start, end = date_str.split('-')
        else:
            # For single date format like "May 19, 2025"
            start = end = date_str
            
        # Clean up month abbreviations
        month_map = {
            'Sept': 'September',  # Handle Sept before Sep
            'Jan': 'January', 
            'Feb': 'February', 
            'Mar': 'March',
            'Apr': 'April', 
            'Jun': 'June', 
            'Jul': 'July',
            'Aug': 'August', 
            'Sep': 'September', 
            'Oct': 'October', 
            'Nov': 'November', 
            'Dec': 'December'
        }
        
        # Create a set of all month names (full and abbreviated)
        all_months = set(month_map.keys()) | set(month_map.values())
        
        # Handle cases like "April 29-May 4"
        has_month = any(month in end for month in all_months)
        if not has_month:
            # End is just a day number, use start's month
            start_parts = start.split()
            if len(start_parts) >= 1:
                end = f"{start_parts[0]} {end.strip()}"
        
        # Replace month abbreviations
        for abbr, full in month_map.items():
            start = start.replace(abbr, full)
            end = end.replace(abbr, full)
        
        # Clean up any extra spaces
        start = ' '.join(start.split())
        end = ' '.join(end.split())
        
        # Parse start date
        start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y")
        
        # Parse end date
        end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y")
        
        return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')
        
    except Exception as e:
        raise ValueError(f"Could not parse date: {date_str} ({e})")


def transform_conference_data(conferences: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Transform ccfddl format to our format."""
    transformed = []
    current_year = datetime.now().year
    
    for conf in conferences:
        # Get the most recent or upcoming conference instance
        recent_conf = None
        if 'confs' in conf:
            for instance in conf['confs']:
                if instance['year'] >= current_year:
                    recent_conf = instance
                    break
        
        if not recent_conf:
            continue
            
        # Transform to our format
        transformed_conf = {
            'title': conf.get('title', ''),
            'year': recent_conf['year'],
            'id': recent_conf['id'],
            'full_name': conf.get('description', ''),
            'link': recent_conf.get('link', ''),
            'deadline': recent_conf.get('timeline', [{}])[0].get('deadline', ''),
            'timezone': recent_conf.get('timezone', ''),
            'place': recent_conf.get('place', ''),
            'date': recent_conf.get('date', ''),
            'tags': [],  # We'll need to maintain a mapping for tags
        }
        
        # Add optional fields
        timeline = recent_conf.get('timeline', [{}])[0]
        if 'abstract_deadline' in timeline:
            transformed_conf['abstract_deadline'] = timeline['abstract_deadline']
            
        # Parse date range for start/end
        try:
            if transformed_conf['date']:
                start_date, end_date = parse_date_range(
                    transformed_conf['date'], 
                    str(transformed_conf['year'])
                )
                transformed_conf['start'] = start_date
                transformed_conf['end'] = end_date
        except Exception as e:
            print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}")
            
        # Add rankings as separate field
        if 'rank' in conf:
            rankings = []
            for rank_type, rank_value in conf['rank'].items():
                rankings.append(f"{rank_type.upper()}: {rank_value}")
            if rankings:
                transformed_conf['rankings'] = ', '.join(rankings)
            
        transformed.append(transformed_conf)
    
    return transformed


def main():
    try:
        # Fetch current conferences.yml
        current_file = 'src/data/conferences.yml'
        with open(current_file, 'r') as f:
            current_conferences = yaml.safe_load(f)
        
        # Fetch and transform new data
        new_conferences = fetch_conference_files()
        if not new_conferences:
            print("Warning: No conferences fetched from ccfddl")
            return
            
        transformed_conferences = transform_conference_data(new_conferences)
        if not transformed_conferences:
            print("Warning: No conferences transformed")
            return
        
        # Create a dictionary of current conferences by ID
        current_conf_dict = {conf['id']: conf for conf in current_conferences}
        
        # Update or add new conferences while preserving existing ones
        for new_conf in transformed_conferences:
            if new_conf['id'] in current_conf_dict:
                # Update existing conference while preserving fields
                curr_conf = current_conf_dict[new_conf['id']]
                
                # Preserve existing fields
                preserved_fields = [
                    'tags', 'venue', 'hindex', 'submission_deadline',
                    'timezone_submission', 'rebuttal_period_start',
                    'rebuttal_period_end', 'final_decision_date',
                    'review_release_date', 'commitment_deadline',
                    'start', 'end', 'note'  # Added note to preserved fields
                ]
                for field in preserved_fields:
                    if field in curr_conf:
                        new_conf[field] = curr_conf[field]
                
                # If start/end not in current conference but we parsed them, keep the parsed ones
                if 'start' not in curr_conf and 'start' in new_conf:
                    new_conf['start'] = new_conf['start']
                if 'end' not in curr_conf and 'end' in new_conf:
                    new_conf['end'] = new_conf['end']
                
                # Preserve existing rankings if available
                if 'rankings' in curr_conf:
                    new_conf['rankings'] = curr_conf['rankings']
                
                # Update the conference in the dictionary
                current_conf_dict[new_conf['id']] = new_conf
            else:
                # Add new conference to the dictionary
                current_conf_dict[new_conf['id']] = new_conf
        
        # Convert back to list and sort by deadline
        all_conferences = list(current_conf_dict.values())
        all_conferences.sort(key=lambda x: x.get('deadline', '9999'))
        
        # Write back to file with newlines between conferences
        with open(current_file, 'w') as f:
            for i, conf in enumerate(all_conferences):
                if i > 0:
                    f.write('\n\n')  # Add two newlines between conferences
                
                yaml_str = yaml.dump(
                    [conf],
                    allow_unicode=True,
                    sort_keys=False,
                    default_flow_style=False,
                    explicit_start=False,
                    explicit_end=False,
                    width=float("inf"),
                    indent=2,
                    default_style=None,
                )
                f.write(yaml_str.rstrip())  # Remove trailing whitespace
            
            # Add final newline
            f.write('\n')
            
        print(f"Successfully updated {len(all_conferences)} conferences")
        
    except Exception as e:
        print(f"Error: {e}")
        raise


if __name__ == "__main__":
    main()