github-actions commited on
Commit
3e6893b
·
1 Parent(s): 737c9a0

added check for new data updates

Browse files
Files changed (1) hide show
  1. update_data.py +9 -6
update_data.py CHANGED
@@ -1,5 +1,6 @@
1
  import gspread
2
  import logging
 
3
  import os, sys
4
  import pandas as pd
5
  import requests
@@ -14,7 +15,7 @@ def read_existing_data(file_path):
14
  if os.path.exists(file_path):
15
  logging.info("Existing data file found. Reading data...")
16
  e_df = pd.read_csv(file_path)
17
- logging.info(f"Data read successfully: {e_df.shape[0]} rows and {e_df.shape[1]} columns.")
18
  return e_df
19
  else:
20
  logging.info("No existing data file found.")
@@ -42,9 +43,11 @@ def download_sheet(sheet_id, range_name):
42
  data = worksheet.get_all_records()
43
 
44
  # Convert to DataFrame
45
- n_df = pd.DataFrame(data)
46
- logging.info(f"Data downloaded successfully: {n_df.shape[0]} rows and {n_df.shape[1]} columns.")
47
- return n_df
 
 
48
  except gspread.exceptions.APIError as e:
49
  logging.error(f"API error occurred: {e}")
50
  raise
@@ -68,7 +71,7 @@ def git_commit_push():
68
  try:
69
  subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True)
70
  subprocess.run(['git', 'config', '--global', 'user.email', '[email protected]'], check=True)
71
- subprocess.run(['git', 'add', '.'], check=True)
72
  subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True)
73
  subprocess.run(['git', 'push'], check=True)
74
  logging.info(f"Data updated successfully.")
@@ -89,7 +92,7 @@ if __name__ == "__main__":
89
  new_data = download_sheet(SHEET_ID, RANGE_NAME)
90
 
91
  if new_data.equals(existing_data):
92
- logging.info("No new data to update.")
93
  sys.exit(0)
94
  else:
95
  save_to_csv(new_data, FILE_PATH)
 
1
  import gspread
2
  import logging
3
+ import numpy as np
4
  import os, sys
5
  import pandas as pd
6
  import requests
 
15
  if os.path.exists(file_path):
16
  logging.info("Existing data file found. Reading data...")
17
  e_df = pd.read_csv(file_path)
18
+ logging.info(f"Data read successfully.")
19
  return e_df
20
  else:
21
  logging.info("No existing data file found.")
 
43
  data = worksheet.get_all_records()
44
 
45
  # Convert to DataFrame
46
+ df = pd.DataFrame(data).astype(str)
47
+ df.replace('', np.nan, inplace=True)
48
+ df = df.astype({'Power_time': 'float', 'Outages':'float'})
49
+ logging.info(f"New data downloaded successfully.")
50
+ return df
51
  except gspread.exceptions.APIError as e:
52
  logging.error(f"API error occurred: {e}")
53
  raise
 
71
  try:
72
  subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True)
73
  subprocess.run(['git', 'config', '--global', 'user.email', '[email protected]'], check=True)
74
+ subprocess.run(['git', 'add', 'omoku_data.csv'], check=True)
75
  subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True)
76
  subprocess.run(['git', 'push'], check=True)
77
  logging.info(f"Data updated successfully.")
 
92
  new_data = download_sheet(SHEET_ID, RANGE_NAME)
93
 
94
  if new_data.equals(existing_data):
95
+ logging.info("No new entry for new data. Nothing to update.")
96
  sys.exit(0)
97
  else:
98
  save_to_csv(new_data, FILE_PATH)