|
|
import codecs |
|
|
import csv |
|
|
import http.client |
|
|
import os |
|
|
import re |
|
|
import sys |
|
|
import urllib.request |
|
|
from datetime import date, timedelta |
|
|
from io import StringIO |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
def pollution_data(): |
|
|
particles = ["NO2", "O3"] |
|
|
stations = ["NL10636", "NL10639", "NL10643"] |
|
|
last_year_date = date.today() - timedelta(days=365) |
|
|
start_date = last_year_date - timedelta(days=7) |
|
|
end_date = last_year_date + timedelta(days=3) |
|
|
date_list = [ |
|
|
start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1) |
|
|
] |
|
|
for current_date in date_list: |
|
|
today = current_date.isoformat() + "T09:00:00Z" |
|
|
yesterday = (current_date - timedelta(1)).isoformat() + "T09:00:00Z" |
|
|
for particle in particles: |
|
|
all_dataframes = [] |
|
|
for station in stations: |
|
|
conn = http.client.HTTPSConnection("api.luchtmeetnet.nl") |
|
|
payload = "" |
|
|
headers = {} |
|
|
conn.request( |
|
|
"GET", |
|
|
f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", |
|
|
payload, |
|
|
headers, |
|
|
) |
|
|
res = conn.getresponse() |
|
|
data = res.read() |
|
|
decoded_data = data.decode("utf-8") |
|
|
df = pd.read_csv(StringIO(decoded_data)) |
|
|
df = df.filter(like="value") |
|
|
all_dataframes.append(df) |
|
|
if all_dataframes: |
|
|
combined_data = pd.concat(all_dataframes, ignore_index=True) |
|
|
combined_data.to_csv(f"{particle}_{today}.csv", index=False) |
|
|
|
|
|
|
|
|
def delete_csv(csvs): |
|
|
for csv_file in csvs: |
|
|
if os.path.exists(csv_file) and os.path.isfile(csv_file): |
|
|
os.remove(csv_file) |
|
|
|
|
|
|
|
|
def clean_values(): |
|
|
particles = ["NO2", "O3"] |
|
|
csvs = [] |
|
|
NO2 = [] |
|
|
O3 = [] |
|
|
last_year_date = date.today() - timedelta(days=365) |
|
|
start_date = last_year_date - timedelta(days=7) |
|
|
end_date = last_year_date + timedelta(days=3) |
|
|
date_list = [ |
|
|
start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1) |
|
|
] |
|
|
for current_date in date_list: |
|
|
today = current_date.isoformat() + "T09:00:00Z" |
|
|
for particle in particles: |
|
|
name = f"{particle}_{today}.csv" |
|
|
csvs.append(name) |
|
|
for csv_file in csvs: |
|
|
if not os.path.exists(csv_file): |
|
|
continue |
|
|
values = [] |
|
|
|
|
|
with open(csv_file, "r") as file: |
|
|
reader = csv.reader(file) |
|
|
for row in reader: |
|
|
for value in row: |
|
|
|
|
|
cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value) |
|
|
if cleaned_value: |
|
|
values.append( |
|
|
float(cleaned_value[0]) |
|
|
) |
|
|
|
|
|
|
|
|
if values: |
|
|
avg = sum(values) / len(values) |
|
|
if "NO2" in csv_file: |
|
|
NO2.append(avg) |
|
|
else: |
|
|
O3.append(avg) |
|
|
delete_csv(csvs) |
|
|
return NO2, O3 |
|
|
|
|
|
|
|
|
def add_columns(): |
|
|
file_path = "weather_data.csv" |
|
|
df = pd.read_csv(file_path) |
|
|
|
|
|
df.insert(1, "NO2", None) |
|
|
df.insert(2, "O3", None) |
|
|
df.insert(10, "weekday", None) |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
def scale(data): |
|
|
df = data |
|
|
columns = list(df.columns) |
|
|
|
|
|
columns.insert(3, columns.pop(6)) |
|
|
df = df[columns] |
|
|
|
|
|
columns.insert(5, columns.pop(9)) |
|
|
df = df[columns] |
|
|
|
|
|
columns.insert(9, columns.pop(6)) |
|
|
df = df[columns] |
|
|
|
|
|
df = df.rename( |
|
|
columns={ |
|
|
"datetime": "date", |
|
|
"windspeed": "wind_speed", |
|
|
"temp": "mean_temp", |
|
|
"solarradiation": "global_radiation", |
|
|
"precip": "percipitation", |
|
|
"sealevelpressure": "pressure", |
|
|
"visibility": "minimum_visibility", |
|
|
} |
|
|
) |
|
|
|
|
|
df["date"] = pd.to_datetime(df["date"]) |
|
|
df["weekday"] = df["date"].dt.day_name() |
|
|
|
|
|
df = df.sort_values(by="date").reset_index(drop=True) |
|
|
|
|
|
df["wind_speed"] = (df["wind_speed"] / 3.6) * 10 |
|
|
df["mean_temp"] = df["mean_temp"] * 10 |
|
|
df["minimum_visibility"] = df["minimum_visibility"] * 10 |
|
|
df["percipitation"] = df["percipitation"] * 10 |
|
|
df["pressure"] = df["pressure"] |
|
|
|
|
|
df["wind_speed"] = df["wind_speed"].astype(int) |
|
|
df["mean_temp"] = df["mean_temp"].astype(int) |
|
|
df["minimum_visibility"] = df["minimum_visibility"].astype(int) |
|
|
df["percipitation"] = df["percipitation"].astype(int) |
|
|
df["pressure"] = df["pressure"].astype(int) |
|
|
df["humidity"] = df["humidity"].astype(int) |
|
|
df["global_radiation"] = df["global_radiation"].astype(int) |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
def insert_pollution(NO2, O3, data): |
|
|
df = data |
|
|
df["NO2"] = NO2 |
|
|
df["O3"] = O3 |
|
|
return df |
|
|
|
|
|
|
|
|
def weather_data(): |
|
|
last_year_date = date.today() - timedelta(days=365) |
|
|
start_date = (last_year_date - timedelta(days=7)).isoformat() |
|
|
end_date = (last_year_date + timedelta(days=3)).isoformat() |
|
|
try: |
|
|
ResultBytes = urllib.request.urlopen( |
|
|
f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Utrecht/{start_date}/{end_date}?unitGroup=metric&elements=datetime%2Cwindspeed%2Ctemp%2Csolarradiation%2Cprecip%2Cpressure%2Cvisibility%2Chumidity&include=days&key=7Y6AY56M6RWVNHQ3SAVHNJWFS&maxStations=1&contentType=csv" |
|
|
) |
|
|
|
|
|
|
|
|
CSVText = csv.reader(codecs.iterdecode(ResultBytes, "utf-8")) |
|
|
|
|
|
current_dir = os.path.dirname(os.path.realpath(__file__)) |
|
|
file_path = os.path.join(current_dir, "past_weather_data.csv") |
|
|
with open(file_path, "w", newline="", encoding="utf-8") as csvfile: |
|
|
csv_writer = csv.writer(csvfile) |
|
|
csv_writer.writerows(CSVText) |
|
|
|
|
|
except urllib.error.HTTPError as e: |
|
|
ErrorInfo = e.read().decode() |
|
|
print("Error code: ", e.code, ErrorInfo) |
|
|
sys.exit() |
|
|
except urllib.error.URLError as e: |
|
|
ErrorInfo = e.read().decode() |
|
|
print("Error code: ", e.code, ErrorInfo) |
|
|
sys.exit() |
|
|
|
|
|
|
|
|
def get_past_data(): |
|
|
weather_data() |
|
|
pollution_data() |
|
|
NO2, O3 = clean_values() |
|
|
df = add_columns() |
|
|
scaled_df = scale(df) |
|
|
output_df = insert_pollution(NO2, O3, scaled_df) |
|
|
os.remove("past_weather_data.csv") |
|
|
return output_df |
|
|
|