badaoui HF Staff commited on
Commit
0f8d3a8
·
1 Parent(s): f3f4c77

more factorization

Browse files
Files changed (1) hide show
  1. data.py +140 -262
data.py CHANGED
@@ -6,7 +6,8 @@ import threading
6
  import traceback
7
  import json
8
  import re
9
- from typing import List, Tuple, Optional
 
10
 
11
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
12
  fs = HfFileSystem()
@@ -56,7 +57,35 @@ KEYS_TO_KEEP = [
56
  "job_link_nvidia",
57
  ]
58
 
 
 
 
 
 
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def log_dataframe_link(link: str) -> str:
61
  """
62
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
@@ -109,69 +138,37 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
109
  def get_available_dates() -> List[str]:
110
  """Get list of available dates from both AMD and NVIDIA datasets."""
111
  try:
112
- # Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
113
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
114
- files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
115
- logger.info(f"Found {len(files_amd)} AMD files")
116
-
117
- # Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
118
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
 
 
119
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
120
- logger.info(f"Found {len(files_nvidia)} NVIDIA files")
121
-
122
- # Extract dates from file paths
123
- amd_dates = set()
124
- for file_path in files_amd:
125
- # Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
126
- pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
127
- match = re.search(pattern, file_path)
128
- if match:
129
- amd_dates.add(match.group(1))
130
- else:
131
- # Log unmatched paths for debugging
132
- logger.debug(f"AMD file path didn't match pattern: {file_path}")
133
 
134
- # Log a few example AMD file paths for debugging
135
- if files_amd:
136
- logger.info(f"Example AMD file paths: {files_amd[:3]}")
137
 
138
- nvidia_dates = set()
139
- for file_path in files_nvidia:
140
- # Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
141
- pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
142
- match = re.search(pattern, file_path)
143
- if match:
144
- nvidia_dates.add(match.group(1))
145
 
146
- logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...") # Show first 5
147
- logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...") # Show first 5
148
 
149
- # Return intersection of both datasets (dates where both have data)
 
 
 
 
 
 
150
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
151
  logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
152
 
153
- if common_dates:
154
- return common_dates[:30] # Limit to last 30 days for performance
155
- else:
156
- # If no real dates available, generate fake dates for the last 7 days
157
- logger.warning("No real dates available, generating fake dates for demo purposes")
158
- fake_dates = []
159
- today = datetime.now()
160
- for i in range(7):
161
- date = today - timedelta(days=i)
162
- fake_dates.append(date.strftime("%Y-%m-%d"))
163
- return fake_dates
164
 
165
  except Exception as e:
166
  logger.error(f"Error getting available dates: {e}")
167
- # Generate fake dates when there's an error
168
- logger.info("Generating fake dates due to error")
169
- fake_dates = []
170
- today = datetime.now()
171
- for i in range(7):
172
- date = today - timedelta(days=i)
173
- fake_dates.append(date.strftime("%Y-%m-%d"))
174
- return fake_dates
175
 
176
 
177
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
@@ -242,37 +239,30 @@ def get_historical_data(start_date: str, end_date: str, sample_data = False) ->
242
  """Get historical data for a date range."""
243
  if sample_data:
244
  return get_fake_historical_data(start_date, end_date)
 
245
  try:
246
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
247
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
248
-
249
  historical_data = []
250
- current_dt = start_dt
251
 
 
 
252
  while current_dt <= end_dt:
253
  date_str = current_dt.strftime("%Y-%m-%d")
254
  try:
255
  df, _ = get_data_for_date(date_str)
256
- # Only add non-empty dataframes
257
  if not df.empty:
258
  df['date'] = date_str
259
  historical_data.append(df)
260
  logger.info(f"Loaded data for {date_str}")
261
- else:
262
- logger.warning(f"No data available for {date_str}")
263
  except Exception as e:
264
  logger.warning(f"Could not load data for {date_str}: {e}")
265
-
266
  current_dt += timedelta(days=1)
267
 
268
- # Combine all dataframes
269
- combined_df = pd.concat(historical_data, ignore_index=False)
270
- return combined_df
271
 
272
  except Exception as e:
273
  logger.error(f"Error getting historical data: {e}")
274
- # Fall back to fake data when there's an error
275
- logger.info("Falling back to fake historical data due to error")
276
  return get_fake_historical_data(start_date, end_date)
277
 
278
 
@@ -326,49 +316,36 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
326
  try:
327
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
328
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
329
-
330
- # Generate fake data for each date in the range
331
- historical_data = []
332
- current_dt = start_dt
333
-
334
- # Get base sample data to use as template
335
  sample_df, _ = get_sample_data()
 
336
 
 
 
337
  while current_dt <= end_dt:
338
- date_str = current_dt.strftime("%Y-%m-%d")
339
-
340
- # Create a copy of sample data for this date with some random variations
341
  date_df = sample_df.copy()
342
- date_df['date'] = date_str
343
 
344
- # Add some random variation to make it look more realistic
345
- import random
346
  for idx in date_df.index:
347
- # Vary the success/failure counts slightly (±20%)
348
  for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
349
- if col in date_df.columns:
350
- original_val = date_df.loc[idx, col]
351
- if pd.notna(original_val) and original_val > 0:
352
- variation = random.uniform(0.8, 1.2)
353
- date_df.loc[idx, col] = max(0, int(original_val * variation))
354
 
355
- # Vary failure counts more dramatically to show trends
356
  for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
357
- if col in date_df.columns:
358
- original_val = date_df.loc[idx, col]
359
- if pd.notna(original_val):
360
- # Sometimes have more failures, sometimes fewer
361
- variation = random.uniform(0.5, 2.0)
362
- date_df.loc[idx, col] = max(0, int(original_val * variation))
363
 
364
  historical_data.append(date_df)
365
  current_dt += timedelta(days=1)
366
 
367
  if not historical_data:
368
- logger.warning("No fake historical data generated")
369
  return pd.DataFrame()
370
 
371
- # Combine all dataframes
372
  combined_df = pd.concat(historical_data, ignore_index=False)
373
  logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
374
  return combined_df
@@ -377,53 +354,23 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
377
  logger.error(f"Error generating fake historical data: {e}")
378
  return pd.DataFrame()
379
 
380
- def safe_extract(row: pd.DataFrame, key: str) -> int:
381
- return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
382
-
383
-
384
  def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
385
- """
386
- Find the first date when a specific test failure appeared in historical data.
387
- """
388
  if historical_df.empty:
389
  return None
390
 
391
  try:
392
- # Normalize model name to match DataFrame index
393
- model_name_lower = model_name.lower()
394
-
395
- # Filter historical data for this model
396
- model_data = historical_df[historical_df.index == model_name_lower].copy()
397
-
398
  if model_data.empty:
399
  return None
400
 
401
- # Sort by date (oldest first)
402
- model_data = model_data.sort_values('date')
403
-
404
- # Check each date for this failure
405
- for idx, row in model_data.iterrows():
406
- failures = row.get(f'failures_{device}', None)
407
-
408
- if failures is None or pd.isna(failures):
409
- continue
410
-
411
- # Handle case where failures might be a string (JSON)
412
- if isinstance(failures, str):
413
- try:
414
- import json
415
- failures = json.loads(failures)
416
- except:
417
- continue
418
-
419
- # Check if this test appears in the failures for this gpu_type
420
  if gpu_type in failures:
421
  for test in failures[gpu_type]:
422
- test_line = test.get('line', '')
423
- if test_line == test_name:
424
- # Found the first occurrence
425
- return row.get('date', None)
426
-
427
  return None
428
 
429
  except Exception as e:
@@ -431,148 +378,89 @@ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_n
431
  return None
432
 
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
435
- """
436
- Compare CURRENT failures against PREVIOUS day's failures to find NEW regressions.
437
-
438
- A regression is a test that:
439
- - Is failing in the CURRENT/LATEST run (current_df)
440
- - Was NOT failing in the PREVIOUS run (yesterday in historical_df)
441
- """
442
  if current_df.empty or historical_df.empty:
443
  return []
444
 
445
- new_regressions = []
446
-
447
- # Get the most recent date from historical data (this is "yesterday")
448
  available_dates = sorted(historical_df['date'].unique(), reverse=True)
449
- if len(available_dates) < 1:
450
- # No history to compare against
451
  return []
452
 
453
- yesterday_date = available_dates[0]
454
- yesterday_data = historical_df[historical_df['date'] == yesterday_date]
455
 
456
- # For each model in current data, compare against yesterday
457
  for model_name in current_df.index:
458
- model_name_lower = model_name.lower()
459
-
460
- # Get CURRENT failures from current_df
461
  current_row = current_df.loc[model_name]
 
462
 
463
- # Get YESTERDAY's failures from historical_df
464
- yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
465
- yesterday_failures_amd = {}
466
- yesterday_failures_nvidia = {}
467
 
 
 
 
468
  if not yesterday_row.empty:
469
  yesterday_row = yesterday_row.iloc[0]
470
- yesterday_failures_amd = yesterday_row.get('failures_amd', {})
471
- yesterday_failures_nvidia = yesterday_row.get('failures_nvidia', {})
472
-
473
- # Handle string/dict conversion
474
- if isinstance(yesterday_failures_amd, str):
475
- try:
476
- yesterday_failures_amd = json.loads(yesterday_failures_amd)
477
- except:
478
- yesterday_failures_amd = {}
479
- if isinstance(yesterday_failures_nvidia, str):
480
- try:
481
- yesterday_failures_nvidia = json.loads(yesterday_failures_nvidia)
482
- except:
483
- yesterday_failures_nvidia = {}
484
-
485
- # Get CURRENT failures
486
- current_failures_amd = current_row.get('failures_amd', {})
487
- current_failures_nvidia = current_row.get('failures_nvidia', {})
488
-
489
- # Handle string/dict conversion
490
- if isinstance(current_failures_amd, str):
491
- try:
492
- current_failures_amd = json.loads(current_failures_amd)
493
- except:
494
- current_failures_amd = {}
495
- if isinstance(current_failures_nvidia, str):
496
- try:
497
- current_failures_nvidia = json.loads(current_failures_nvidia)
498
- except:
499
- current_failures_nvidia = {}
500
-
501
- # Check AMD failures - find tests failing NOW but NOT yesterday
502
- for gpu_type in ['single', 'multi']:
503
- current_tests = current_failures_amd.get(gpu_type, [])
504
- yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
505
-
506
- # Get test names
507
- current_test_names = {test.get('line', '') for test in current_tests}
508
- yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
509
-
510
- # Find NEW failures: failing NOW but NOT yesterday
511
- new_tests = current_test_names - yesterday_test_names
512
- for test_name in new_tests:
513
- if test_name: # Skip empty names
514
- new_regressions.append({
515
- 'model': model_name,
516
- 'test': test_name.split('::')[-1], # Short name
517
- 'test_full': test_name, # Full name
518
- 'device': 'amd',
519
- 'gpu_type': gpu_type
520
- })
521
-
522
- # Check NVIDIA failures - find tests failing NOW but NOT yesterday
523
- for gpu_type in ['single', 'multi']:
524
- current_tests = current_failures_nvidia.get(gpu_type, [])
525
- yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
526
-
527
- # Get test names
528
- current_test_names = {test.get('line', '') for test in current_tests}
529
- yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
530
-
531
- # Find NEW failures: failing NOW but NOT yesterday
532
- new_tests = current_test_names - yesterday_test_names
533
- for test_name in new_tests:
534
- if test_name: # Skip empty names
535
- new_regressions.append({
536
- 'model': model_name,
537
- 'test': test_name.split('::')[-1], # Short name
538
- 'test_full': test_name, # Full name
539
- 'device': 'nvidia',
540
- 'gpu_type': gpu_type
541
- })
542
 
543
  return new_regressions
544
 
545
 
546
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
547
  """Extract and process model data from DataFrame row."""
548
- # Handle missing values and get counts directly from dataframe
549
- success_nvidia = safe_extract(row, "success_nvidia")
550
- success_amd = safe_extract(row, "success_amd")
551
-
552
- skipped_nvidia = safe_extract(row, "skipped_nvidia")
553
- skipped_amd = safe_extract(row, "skipped_amd")
554
 
555
- failed_multi_amd = safe_extract(row, 'failed_multi_no_amd')
556
- failed_multi_nvidia = safe_extract(row, 'failed_multi_no_nvidia')
557
- failed_single_amd = safe_extract(row, 'failed_single_no_amd')
558
- failed_single_nvidia = safe_extract(row, 'failed_single_no_nvidia')
559
- # Calculate total failures
560
- total_failed_amd = failed_multi_amd + failed_single_amd
561
- total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
562
- # Create stats dictionaries directly from dataframe values
563
  amd_stats = {
564
- 'passed': success_amd,
565
- 'failed': total_failed_amd,
566
- 'skipped': skipped_amd,
567
- 'error': 0 # Not available in this dataset
568
  }
569
  nvidia_stats = {
570
- 'passed': success_nvidia,
571
- 'failed': total_failed_nvidia,
572
- 'skipped': skipped_nvidia,
573
- 'error': 0 # Not available in this dataset
574
  }
575
- return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia
 
 
 
576
 
577
 
578
 
@@ -643,19 +531,11 @@ class CIResults:
643
  """Load all available historical data at startup."""
644
  try:
645
  if not self.available_dates:
646
- # Generate fake dates when no real dates are available
647
- fake_dates = []
648
- today = datetime.now()
649
- for i in range(7):
650
- date = today - timedelta(days=i)
651
- fake_dates.append(date.strftime("%Y-%m-%d"))
652
- self.available_dates = fake_dates
653
  logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
654
-
655
- logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
656
- start_date = self.available_dates[-1] # Oldest date
657
- end_date = self.available_dates[0] # Newest date
658
 
 
 
659
  self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
660
  logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
661
  except Exception as e:
@@ -672,17 +552,15 @@ class CIResults:
672
  self.historical_df = pd.DataFrame()
673
  return
674
 
675
- # Filter the pre-loaded data by date range
676
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
677
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
678
 
679
- # Filter data within the date range
680
- filtered_data = []
681
- for date_str in self.all_historical_data['date'].unique():
682
- date_dt = datetime.strptime(date_str, "%Y-%m-%d")
683
- if start_dt <= date_dt <= end_dt:
684
- date_data = self.all_historical_data[self.all_historical_data['date'] == date_str]
685
- filtered_data.append(date_data)
686
 
687
  if filtered_data:
688
  self.historical_df = pd.concat(filtered_data, ignore_index=False)
 
6
  import traceback
7
  import json
8
  import re
9
+ import random
10
+ from typing import List, Tuple, Optional, Dict
11
 
12
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
13
  fs = HfFileSystem()
 
57
  "job_link_nvidia",
58
  ]
59
 
60
+ # HELPER FUNCTIONS
61
+ def generate_fake_dates(num_days: int = 7) -> List[str]:
62
+ """Generate fake dates for the last N days."""
63
+ today = datetime.now()
64
+ return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(num_days)]
65
 
66
+ def parse_json_field(value) -> dict:
67
+ """Safely parse a JSON field that might be a string or dict."""
68
+ if isinstance(value, str):
69
+ try:
70
+ return json.loads(value)
71
+ except:
72
+ return {}
73
+ return value if isinstance(value, dict) else {}
74
+
75
+ def extract_date_from_path(path: str, pattern: str) -> Optional[str]:
76
+ """Extract date from file path using regex pattern."""
77
+ match = re.search(pattern, path)
78
+ return match.group(1) if match else None
79
+
80
+ def get_test_names(tests: list) -> set:
81
+ """Extract test names from a list of test dictionaries."""
82
+ return {test.get('line', '') for test in tests}
83
+
84
+ def safe_extract(row: pd.Series, key: str) -> int:
85
+ """Safely extract an integer value from a DataFrame row."""
86
+ return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
87
+
88
+ # DATA LOADING FUNCTIONS
89
  def log_dataframe_link(link: str) -> str:
90
  """
91
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
 
138
  def get_available_dates() -> List[str]:
139
  """Get list of available dates from both AMD and NVIDIA datasets."""
140
  try:
141
+ # Get file lists
142
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
 
 
 
 
143
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
144
+
145
+ files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
146
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ logger.info(f"Found {len(files_amd)} AMD files, {len(files_nvidia)} NVIDIA files")
 
 
149
 
150
+ # Extract dates using patterns
151
+ amd_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
152
+ nvidia_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
 
 
 
 
153
 
154
+ amd_dates = {extract_date_from_path(f, amd_pattern) for f in files_amd}
155
+ amd_dates.discard(None) # Remove None values
156
 
157
+ nvidia_dates = {extract_date_from_path(f, nvidia_pattern) for f in files_nvidia}
158
+ nvidia_dates.discard(None)
159
+
160
+ logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...")
161
+ logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...")
162
+
163
+ # Return intersection of both datasets
164
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
165
  logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
166
 
167
+ return common_dates[:30] if common_dates else generate_fake_dates()
 
 
 
 
 
 
 
 
 
 
168
 
169
  except Exception as e:
170
  logger.error(f"Error getting available dates: {e}")
171
+ return generate_fake_dates()
 
 
 
 
 
 
 
172
 
173
 
174
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
 
239
  """Get historical data for a date range."""
240
  if sample_data:
241
  return get_fake_historical_data(start_date, end_date)
242
+
243
  try:
244
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
245
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 
246
  historical_data = []
 
247
 
248
+ # Load data for each day in range
249
+ current_dt = start_dt
250
  while current_dt <= end_dt:
251
  date_str = current_dt.strftime("%Y-%m-%d")
252
  try:
253
  df, _ = get_data_for_date(date_str)
 
254
  if not df.empty:
255
  df['date'] = date_str
256
  historical_data.append(df)
257
  logger.info(f"Loaded data for {date_str}")
 
 
258
  except Exception as e:
259
  logger.warning(f"Could not load data for {date_str}: {e}")
 
260
  current_dt += timedelta(days=1)
261
 
262
+ return pd.concat(historical_data, ignore_index=False) if historical_data else pd.DataFrame()
 
 
263
 
264
  except Exception as e:
265
  logger.error(f"Error getting historical data: {e}")
 
 
266
  return get_fake_historical_data(start_date, end_date)
267
 
268
 
 
316
  try:
317
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
318
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
 
 
 
 
 
 
319
  sample_df, _ = get_sample_data()
320
+ historical_data = []
321
 
322
+ # Generate data for each date
323
+ current_dt = start_dt
324
  while current_dt <= end_dt:
 
 
 
325
  date_df = sample_df.copy()
326
+ date_df['date'] = current_dt.strftime("%Y-%m-%d")
327
 
328
+ # Add random variations to make it realistic
 
329
  for idx in date_df.index:
330
+ # Vary success/skipped counts (±20%)
331
  for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
332
+ if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
333
+ val = date_df.loc[idx, col]
334
+ if val > 0:
335
+ date_df.loc[idx, col] = max(0, int(val * random.uniform(0.8, 1.2)))
 
336
 
337
+ # Vary failure counts more dramatically (±50-100%)
338
  for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
339
+ if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
340
+ val = date_df.loc[idx, col]
341
+ date_df.loc[idx, col] = max(0, int(val * random.uniform(0.5, 2.0)))
 
 
 
342
 
343
  historical_data.append(date_df)
344
  current_dt += timedelta(days=1)
345
 
346
  if not historical_data:
 
347
  return pd.DataFrame()
348
 
 
349
  combined_df = pd.concat(historical_data, ignore_index=False)
350
  logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
351
  return combined_df
 
354
  logger.error(f"Error generating fake historical data: {e}")
355
  return pd.DataFrame()
356
 
 
 
 
 
357
  def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
358
+ """Find the first date when a specific test failure appeared in historical data."""
 
 
359
  if historical_df.empty:
360
  return None
361
 
362
  try:
363
+ model_data = historical_df[historical_df.index == model_name.lower()].copy()
 
 
 
 
 
364
  if model_data.empty:
365
  return None
366
 
367
+ # Check each date (oldest first) for this failure
368
+ for _, row in model_data.sort_values('date').iterrows():
369
+ failures = parse_json_field(row.get(f'failures_{device}'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  if gpu_type in failures:
371
  for test in failures[gpu_type]:
372
+ if test.get('line', '') == test_name:
373
+ return row.get('date')
 
 
 
374
  return None
375
 
376
  except Exception as e:
 
378
  return None
379
 
380
 
381
+ def _find_device_regressions(model_name: str, current_failures: dict, yesterday_failures: dict, device: str) -> list[dict]:
382
+ """Helper to find regressions for a specific device."""
383
+ regressions = []
384
+ for gpu_type in ['single', 'multi']:
385
+ current_tests = get_test_names(current_failures.get(gpu_type, []))
386
+ yesterday_tests = get_test_names(yesterday_failures.get(gpu_type, []))
387
+
388
+ # Find NEW failures: failing NOW but NOT yesterday
389
+ new_tests = current_tests - yesterday_tests
390
+ for test_name in new_tests:
391
+ if test_name: # Skip empty names
392
+ regressions.append({
393
+ 'model': model_name,
394
+ 'test': test_name.split('::')[-1], # Short name
395
+ 'test_full': test_name, # Full name
396
+ 'device': device,
397
+ 'gpu_type': gpu_type
398
+ })
399
+ return regressions
400
+
401
  def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
402
+ """Compare current failures against previous day's failures to find new regressions."""
 
 
 
 
 
 
403
  if current_df.empty or historical_df.empty:
404
  return []
405
 
406
+ # Get yesterday's data
 
 
407
  available_dates = sorted(historical_df['date'].unique(), reverse=True)
408
+ if not available_dates:
 
409
  return []
410
 
411
+ yesterday_data = historical_df[historical_df['date'] == available_dates[0]]
412
+ new_regressions = []
413
 
414
+ # For each model, compare current vs yesterday
415
  for model_name in current_df.index:
 
 
 
416
  current_row = current_df.loc[model_name]
417
+ yesterday_row = yesterday_data[yesterday_data.index == model_name.lower()]
418
 
419
+ # Parse current failures
420
+ current_amd = parse_json_field(current_row.get('failures_amd', {}))
421
+ current_nvidia = parse_json_field(current_row.get('failures_nvidia', {}))
 
422
 
423
+ # Parse yesterday failures
424
+ yesterday_amd = {}
425
+ yesterday_nvidia = {}
426
  if not yesterday_row.empty:
427
  yesterday_row = yesterday_row.iloc[0]
428
+ yesterday_amd = parse_json_field(yesterday_row.get('failures_amd', {}))
429
+ yesterday_nvidia = parse_json_field(yesterday_row.get('failures_nvidia', {}))
430
+
431
+ # Find regressions for both devices
432
+ new_regressions.extend(_find_device_regressions(model_name, current_amd, yesterday_amd, 'amd'))
433
+ new_regressions.extend(_find_device_regressions(model_name, current_nvidia, yesterday_nvidia, 'nvidia'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
  return new_regressions
436
 
437
 
438
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
439
  """Extract and process model data from DataFrame row."""
440
+ # Extract all counts
441
+ counts = {key: safe_extract(row, key) for key in [
442
+ 'success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia',
443
+ 'failed_multi_no_amd', 'failed_multi_no_nvidia',
444
+ 'failed_single_no_amd', 'failed_single_no_nvidia'
445
+ ]}
446
 
447
+ # Create stats dictionaries
 
 
 
 
 
 
 
448
  amd_stats = {
449
+ 'passed': counts['success_amd'],
450
+ 'failed': counts['failed_multi_no_amd'] + counts['failed_single_no_amd'],
451
+ 'skipped': counts['skipped_amd'],
452
+ 'error': 0
453
  }
454
  nvidia_stats = {
455
+ 'passed': counts['success_nvidia'],
456
+ 'failed': counts['failed_multi_no_nvidia'] + counts['failed_single_no_nvidia'],
457
+ 'skipped': counts['skipped_nvidia'],
458
+ 'error': 0
459
  }
460
+
461
+ return (amd_stats, nvidia_stats, counts['failed_multi_no_amd'],
462
+ counts['failed_single_no_amd'], counts['failed_multi_no_nvidia'],
463
+ counts['failed_single_no_nvidia'])
464
 
465
 
466
 
 
531
  """Load all available historical data at startup."""
532
  try:
533
  if not self.available_dates:
534
+ self.available_dates = generate_fake_dates()
 
 
 
 
 
 
535
  logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
 
 
 
 
536
 
537
+ logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
538
+ start_date, end_date = self.available_dates[-1], self.available_dates[0]
539
  self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
540
  logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
541
  except Exception as e:
 
552
  self.historical_df = pd.DataFrame()
553
  return
554
 
555
+ # Filter by date range
556
  start_dt = datetime.strptime(start_date, "%Y-%m-%d")
557
  end_dt = datetime.strptime(end_date, "%Y-%m-%d")
558
 
559
+ filtered_data = [
560
+ self.all_historical_data[self.all_historical_data['date'] == date_str]
561
+ for date_str in self.all_historical_data['date'].unique()
562
+ if start_dt <= datetime.strptime(date_str, "%Y-%m-%d") <= end_dt
563
+ ]
 
 
564
 
565
  if filtered_data:
566
  self.historical_df = pd.concat(filtered_data, ignore_index=False)