omoku-analysis / update_data.py
github-actions
added check for new data updates
3e6893b
raw
history blame
3.67 kB
import gspread
import logging
import numpy as np
import os, sys
import pandas as pd
import requests
import subprocess
from oauth2client.service_account import ServiceAccountCredentials
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def read_existing_data(file_path):
"""Reads existing data from a CSV file to compare with newly fetched data."""
if os.path.exists(file_path):
logging.info("Existing data file found. Reading data...")
e_df = pd.read_csv(file_path)
logging.info(f"Data read successfully.")
return e_df
else:
logging.info("No existing data file found.")
return pd.DataFrame()
def download_sheet(sheet_id, range_name):
"""Downloads data from Google Sheets and returns a DataFrame."""
try:
# Define the scope
scope = ['https://www.googleapis.com/auth/spreadsheets']
# Add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('omoku-analysis-cred_key.json', scope)
# Authorize the clientsheet
client = gspread.authorize(creds)
# Get the instance of the Spreadsheet
sheet = client.open_by_key(sheet_id)
# Get the sheet by name
worksheet = sheet.get_worksheet(0)
# Get all records of the data
data = worksheet.get_all_records()
# Convert to DataFrame
df = pd.DataFrame(data).astype(str)
df.replace('', np.nan, inplace=True)
df = df.astype({'Power_time': 'float', 'Outages':'float'})
logging.info(f"New data downloaded successfully.")
return df
except gspread.exceptions.APIError as e:
logging.error(f"API error occurred: {e}")
raise
except requests.exceptions.RequestException as e:
logging.error(f"Request failed: {e}")
raise
def save_to_csv(df, file_path):
if df.empty:
logging.error("No data to save: DataFrame is empty.")
raise ValueError("Empty DataFrame")
try:
df.to_csv(file_path, index=False)
logging.info(f"Data saved successfully.")
except Exception as e:
logging.error(f"Failed to save data: {e}")
raise
def git_commit_push():
"""Sets git configurations, commits, and pushes updated CSV file to GitHub."""
try:
subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True)
subprocess.run(['git', 'config', '--global', 'user.email', '[email protected]'], check=True)
subprocess.run(['git', 'add', 'omoku_data.csv'], check=True)
subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True)
subprocess.run(['git', 'push'], check=True)
logging.info(f"Data updated successfully.")
except subprocess.CalledProcessError as e:
if "nothing to commit" in str(e.stderr):
logging.info("No changes to commit.")
else:
logging.error(f"Failed to commit or push changes: {e}")
raise
if __name__ == "__main__":
try:
SHEET_ID = '1dVa6SGm1j-z20NUDUlWSJgQffXzvJZ_a33wT_O5EOUk'
RANGE_NAME = 'data'
FILE_PATH = 'omoku_data.csv'
existing_data = read_existing_data(FILE_PATH)
new_data = download_sheet(SHEET_ID, RANGE_NAME)
if new_data.equals(existing_data):
logging.info("No new entry for new data. Nothing to update.")
sys.exit(0)
else:
save_to_csv(new_data, FILE_PATH)
git_commit_push()
except Exception as e:
logging.critical(f"An unexpected error occurred: {e}")
raise