elisaklunder commited on
Commit
f4930a4
·
1 Parent(s): 3dd6a8c

data finally working

Browse files
app.py CHANGED
@@ -35,7 +35,6 @@ no2_values = pd.concat([no2_past_values, no2_future_values], ignore_index=True)
35
  dates = dates_past + dates_future
36
  df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
37
 
38
-
39
  # App Title
40
  st.title("Utrecht Pollution Dashboard🌱")
41
 
 
35
  dates = dates_past + dates_future
36
  df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
37
 
 
38
  # App Title
39
  st.title("Utrecht Pollution Dashboard🌱")
40
 
past_pollution_data.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date,NO2,O3
2
+ 2023-10-18,10.842702702702699,39.81260000000001
3
+ 2023-10-19,17.97026666666666,31.779024390243908
4
+ 2023-10-20,17.233055555555563,18.7156
5
+ 2023-10-21,15.023599999999993,22.04
6
+ 2023-10-22,8.723378378378372,48.33439999999999
7
+ 2023-10-23,20.634266666666676,15.586000000000002
8
+ 2023-10-24,15.115599999999999,24.628085106382972
9
+ 2023-10-25,22.885675675675678,27.117599999999992
10
+ 2023-10-26,21.531756756756756,13.3216
11
+ 2023-10-27,23.07226666666666,16.15416666666666
12
+ 2023-10-28,24.89121621621622,24.59040816326531
past_weather_data.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
2
+ 2023-10-17,8.5,84.8,0.0,22.3,1019.3,34.8,75.2
3
+ 2023-10-18,9.0,77.9,2.3,25.9,1006.0,23.8,71.2
4
+ 2023-10-19,14.5,94.0,11.4,22.3,990.8,21.2,39.8
5
+ 2023-10-20,11.9,97.4,20.4,25.9,981.0,10.4,7.0
6
+ 2023-10-21,13.1,88.0,3.5,22.3,989.4,27.7,39.9
7
+ 2023-10-22,12.1,87.3,3.9,25.9,1003.6,32.3,55.9
8
+ 2023-10-23,9.9,95.7,0.5,18.0,1011.1,5.9,43.8
9
+ 2023-10-24,11.6,92.3,6.5,22.3,1001.3,23.1,32.6
10
+ 2023-10-25,9.3,96.8,15.3,18.0,996.8,15.7,14.5
11
+ 2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
12
+ 2023-10-27,10.6,97.9,11.4,14.8,992,9.5,20.5
src/data_api_calls.py CHANGED
@@ -58,85 +58,61 @@ def update_pollution_data():
58
  all_dataframes = []
59
  today = date.today().isoformat() + "T09:00:00Z"
60
  yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
61
- latest_date = (date.today() - timedelta(8)).isoformat() + "T09:00:00Z"
62
- days_today = 0
63
- days_yesterday = 1
64
- while today != latest_date:
65
- days_today += 1
66
- days_yesterday += 1
67
- for particle in particles:
68
- for station in stations:
69
- conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
70
- payload = ""
71
- headers = {}
72
- conn.request(
73
- "GET",
74
- f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}",
75
- payload,
76
- headers,
77
- )
78
- res = conn.getresponse()
79
- data = res.read()
80
- decoded_data = data.decode("utf-8")
81
- df = pd.read_csv(StringIO(decoded_data))
82
- df = df.filter(like="value")
83
- all_dataframes.append(df)
84
- combined_data = pd.concat(all_dataframes, ignore_index=True)
85
- values = []
86
- for row in combined_data:
87
- cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", row)
88
- if cleaned_value: # If we successfully extract a number
89
- values.append(
90
- float(cleaned_value[0])
91
- ) # Convert the first match to float
92
-
93
- # Compute the average if the values list is not empty
94
- if values:
95
- avg = sum(values) / len(values)
96
- if particle == "NO2":
97
- NO2.append(avg)
98
- else:
99
- O3.append(avg)
100
- today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
101
- yesterday = (
102
- date.today() - timedelta(days_yesterday)
103
- ).isoformat() + "T09:00:00Z"
104
-
105
- avg_combined_data = pd.DataFrame(
106
  {
107
- "date": pd.date_range(end=date.today(), periods=len(NO2)),
108
  "NO2": NO2,
109
  "O3": O3,
110
  }
111
  )
112
 
113
- avg_combined_data = reverse_pollution(NO2, O3, avg_combined_data)
114
-
115
- if os.path.exists(POLLUTION_DATA_FILE):
116
- existing_data = pd.read_csv(POLLUTION_DATA_FILE)
117
- last_date = pd.to_datetime(existing_data["date"]).max()
118
- new_data = avg_combined_data[avg_combined_data["date"] > last_date]
119
- updated_data = pd.concat([existing_data, new_data], ignore_index=True)
120
- updated_data.drop_duplicates(subset="date", keep="last", inplace=True)
121
- else:
122
- updated_data = avg_combined_data
123
 
124
  updated_data.to_csv(POLLUTION_DATA_FILE, index=False)
125
 
126
 
127
- def reverse_pollution(NO2, O3, data):
128
- df = data
129
- start_index = 0
130
- while NO2:
131
- df.loc[start_index, "NO2"] = NO2.pop()
132
- start_index += 1
133
- start_index = 0
134
- while O3:
135
- df.loc[start_index, "O3"] = O3.pop()
136
- start_index += 1
137
- return df
138
-
139
-
140
  def get_combined_data():
141
  update_weather_data()
142
  update_pollution_data()
@@ -153,7 +129,7 @@ def get_combined_data():
153
  weather_df = weather_df[columns]
154
  columns.insert(9, columns.pop(6))
155
  weather_df = weather_df[columns]
156
-
157
  combined_df = weather_df
158
 
159
  # Apply scaling and renaming similar to the scale function from previous code
@@ -185,7 +161,7 @@ def get_combined_data():
185
  combined_df["pressure"] = combined_df["pressure"].astype(int)
186
  combined_df["humidity"] = combined_df["humidity"].astype(int)
187
  combined_df["global_radiation"] = combined_df["global_radiation"].astype(int)
188
-
189
  pollution_df = pd.read_csv(POLLUTION_DATA_FILE)
190
  combined_df["NO2"] = pollution_df["NO2"]
191
  combined_df["O3"] = pollution_df["O3"]
 
58
  all_dataframes = []
59
  today = date.today().isoformat() + "T09:00:00Z"
60
  yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
61
+
62
+ if os.path.exists(POLLUTION_DATA_FILE):
63
+ existing_data = pd.read_csv(POLLUTION_DATA_FILE)
64
+ last_date = pd.to_datetime(existing_data["date"]).max()
65
+ if last_date >= pd.Timestamp(date.today()):
66
+ print("Data is already up to date.")
67
+ return
68
+
69
+ # Only pull data for today if not already updated
70
+ for particle in particles:
71
+ for station in stations:
72
+ conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
73
+ payload = ""
74
+ headers = {}
75
+ conn.request(
76
+ "GET",
77
+ f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}",
78
+ payload,
79
+ headers,
80
+ )
81
+ res = conn.getresponse()
82
+ data = res.read()
83
+ decoded_data = data.decode("utf-8")
84
+ df = pd.read_csv(StringIO(decoded_data))
85
+ df = df.filter(like="value")
86
+ all_dataframes.append(df)
87
+ combined_data = pd.concat(all_dataframes, ignore_index=True)
88
+ values = []
89
+
90
+ for row in combined_data:
91
+ cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", row)
92
+ if cleaned_value:
93
+ values.append(float(cleaned_value[0]))
94
+
95
+ if values:
96
+ avg = sum(values) / len(values)
97
+ if particle == "NO2":
98
+ NO2.append(avg)
99
+ else:
100
+ O3.append(avg)
101
+
102
+ new_data = pd.DataFrame(
 
 
 
103
  {
104
+ "date": [date.today()],
105
  "NO2": NO2,
106
  "O3": O3,
107
  }
108
  )
109
 
110
+ updated_data = pd.concat([existing_data, new_data], ignore_index=True)
111
+ updated_data.drop_duplicates(subset="date", keep="last", inplace=True)
 
 
 
 
 
 
 
 
112
 
113
  updated_data.to_csv(POLLUTION_DATA_FILE, index=False)
114
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  def get_combined_data():
117
  update_weather_data()
118
  update_pollution_data()
 
129
  weather_df = weather_df[columns]
130
  columns.insert(9, columns.pop(6))
131
  weather_df = weather_df[columns]
132
+
133
  combined_df = weather_df
134
 
135
  # Apply scaling and renaming similar to the scale function from previous code
 
161
  combined_df["pressure"] = combined_df["pressure"].astype(int)
162
  combined_df["humidity"] = combined_df["humidity"].astype(int)
163
  combined_df["global_radiation"] = combined_df["global_radiation"].astype(int)
164
+
165
  pollution_df = pd.read_csv(POLLUTION_DATA_FILE)
166
  combined_df["NO2"] = pollution_df["NO2"]
167
  combined_df["O3"] = pollution_df["O3"]
src/past_data_api_calls copy.py DELETED
@@ -1,199 +0,0 @@
1
- import codecs
2
- import csv
3
- import http.client
4
- import os
5
- import re
6
- import sys
7
- import urllib.request
8
- from datetime import date, timedelta
9
- from io import StringIO
10
-
11
- import pandas as pd
12
-
13
-
14
- def pollution_data():
15
- particles = ["NO2", "O3"]
16
- stations = ["NL10636", "NL10639", "NL10643"]
17
- last_year_date = date.today() - timedelta(days=365)
18
- start_date = last_year_date - timedelta(days=7)
19
- end_date = last_year_date + timedelta(days=3)
20
- date_list = [
21
- start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)
22
- ]
23
- for current_date in date_list:
24
- today = current_date.isoformat() + "T09:00:00Z"
25
- yesterday = (current_date - timedelta(1)).isoformat() + "T09:00:00Z"
26
- for particle in particles:
27
- all_dataframes = [] # Reset for each particle
28
- for station in stations:
29
- conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
30
- payload = ""
31
- headers = {}
32
- conn.request(
33
- "GET",
34
- f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}",
35
- payload,
36
- headers,
37
- )
38
- res = conn.getresponse()
39
- data = res.read()
40
- decoded_data = data.decode("utf-8")
41
- df = pd.read_csv(StringIO(decoded_data))
42
- df = df.filter(like="value")
43
- all_dataframes.append(df)
44
- if all_dataframes:
45
- combined_data = pd.concat(all_dataframes, ignore_index=True)
46
- combined_data.to_csv(f"{particle}_{today}.csv", index=False)
47
-
48
-
49
- def delete_csv(csvs):
50
- for csv_file in csvs:
51
- if os.path.exists(csv_file) and os.path.isfile(csv_file):
52
- os.remove(csv_file)
53
-
54
-
55
- def clean_values():
56
- particles = ["NO2", "O3"]
57
- csvs = []
58
- NO2 = []
59
- O3 = []
60
- last_year_date = date.today() - timedelta(days=365)
61
- start_date = last_year_date - timedelta(days=7)
62
- end_date = last_year_date + timedelta(days=3)
63
- date_list = [
64
- start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)
65
- ]
66
- for current_date in date_list:
67
- today = current_date.isoformat() + "T09:00:00Z"
68
- for particle in particles:
69
- name = f"{particle}_{today}.csv"
70
- csvs.append(name)
71
- for csv_file in csvs:
72
- if not os.path.exists(csv_file):
73
- continue # Skip if the file doesn't exist
74
- values = [] # Reset values for each CSV file
75
- # Open the CSV file and read the values
76
- with open(csv_file, "r") as file:
77
- reader = csv.reader(file)
78
- for row in reader:
79
- for value in row:
80
- # Use regular expressions to extract numeric part
81
- cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value)
82
- if cleaned_value: # If we successfully extract a number
83
- values.append(
84
- float(cleaned_value[0])
85
- ) # Convert the first match to float
86
-
87
- # Compute the average if the values list is not empty
88
- if values:
89
- avg = sum(values) / len(values)
90
- if "NO2" in csv_file:
91
- NO2.append(avg)
92
- else:
93
- O3.append(avg)
94
- delete_csv(csvs)
95
- return NO2, O3
96
-
97
-
98
- def add_columns():
99
- file_path = "weather_data.csv"
100
- df = pd.read_csv(file_path)
101
-
102
- df.insert(1, "NO2", None)
103
- df.insert(2, "O3", None)
104
- df.insert(10, "weekday", None)
105
-
106
- return df
107
-
108
-
109
- def scale(data):
110
- df = data
111
- columns = list(df.columns)
112
-
113
- columns.insert(3, columns.pop(6))
114
- df = df[columns]
115
-
116
- columns.insert(5, columns.pop(9))
117
- df = df[columns]
118
-
119
- columns.insert(9, columns.pop(6))
120
- df = df[columns]
121
-
122
- df = df.rename(
123
- columns={
124
- "datetime": "date",
125
- "windspeed": "wind_speed",
126
- "temp": "mean_temp",
127
- "solarradiation": "global_radiation",
128
- "precip": "percipitation",
129
- "sealevelpressure": "pressure",
130
- "visibility": "minimum_visibility",
131
- }
132
- )
133
-
134
- df["date"] = pd.to_datetime(df["date"])
135
- df["weekday"] = df["date"].dt.day_name()
136
-
137
- df = df.sort_values(by="date").reset_index(drop=True)
138
-
139
- df["wind_speed"] = (df["wind_speed"] / 3.6) * 10
140
- df["mean_temp"] = df["mean_temp"] * 10
141
- df["minimum_visibility"] = df["minimum_visibility"] * 10
142
- df["percipitation"] = df["percipitation"] * 10
143
- df["pressure"] = df["pressure"]
144
-
145
- df["wind_speed"] = df["wind_speed"].astype(int)
146
- df["mean_temp"] = df["mean_temp"].astype(int)
147
- df["minimum_visibility"] = df["minimum_visibility"].astype(int)
148
- df["percipitation"] = df["percipitation"].astype(int)
149
- df["pressure"] = df["pressure"].astype(int)
150
- df["humidity"] = df["humidity"].astype(int)
151
- df["global_radiation"] = df["global_radiation"].astype(int)
152
-
153
- return df
154
-
155
-
156
- def insert_pollution(NO2, O3, data):
157
- df = data
158
- df["NO2"] = NO2
159
- df["O3"] = O3
160
- return df
161
-
162
-
163
- def weather_data():
164
- last_year_date = date.today() - timedelta(days=365)
165
- start_date = (last_year_date - timedelta(days=7)).isoformat()
166
- end_date = (last_year_date + timedelta(days=3)).isoformat()
167
- try:
168
- ResultBytes = urllib.request.urlopen(
169
- f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Utrecht/{start_date}/{end_date}?unitGroup=metric&elements=datetime%2Cwindspeed%2Ctemp%2Csolarradiation%2Cprecip%2Cpressure%2Cvisibility%2Chumidity&include=days&key=7Y6AY56M6RWVNHQ3SAVHNJWFS&maxStations=1&contentType=csv"
170
- )
171
-
172
- # Parse the results as CSV
173
- CSVText = csv.reader(codecs.iterdecode(ResultBytes, "utf-8"))
174
- # Saving the CSV content to a file
175
- current_dir = os.path.dirname(os.path.realpath(__file__))
176
- file_path = os.path.join(current_dir, "past_weather_data.csv")
177
- with open(file_path, "w", newline="", encoding="utf-8") as csvfile:
178
- csv_writer = csv.writer(csvfile)
179
- csv_writer.writerows(CSVText)
180
-
181
- except urllib.error.HTTPError as e:
182
- ErrorInfo = e.read().decode()
183
- print("Error code: ", e.code, ErrorInfo)
184
- sys.exit()
185
- except urllib.error.URLError as e:
186
- ErrorInfo = e.read().decode()
187
- print("Error code: ", e.code, ErrorInfo)
188
- sys.exit()
189
-
190
-
191
- def get_past_data():
192
- weather_data()
193
- pollution_data()
194
- NO2, O3 = clean_values()
195
- df = add_columns()
196
- scaled_df = scale(df)
197
- output_df = insert_pollution(NO2, O3, scaled_df)
198
- os.remove("past_weather_data.csv")
199
- return output_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/past_data_api_calls.py CHANGED
@@ -1,6 +1,7 @@
1
  import codecs
2
  import csv
3
  import http.client
 
4
  import re
5
  import sys
6
  import urllib.request
@@ -9,14 +10,21 @@ from io import StringIO
9
 
10
  import pandas as pd
11
 
12
- PAST_WEATHER_DATA_FILE = "weather_data.csv"
13
- PAST_POLLUTION_DATA_FILE = "pollution_data.csv"
14
 
15
 
16
- def get_past_weather_data():
17
  last_year_date = date.today() - timedelta(days=365)
18
- start_date = (last_year_date - timedelta(days=8)).isoformat()
19
- end_date = (last_year_date + timedelta(days=2)).isoformat()
 
 
 
 
 
 
 
20
 
21
  try:
22
  ResultBytes = urllib.request.urlopen(
@@ -28,7 +36,10 @@ def get_past_weather_data():
28
  data.columns = data.iloc[0]
29
  data = data[1:]
30
  data = data.rename(columns={"datetime": "date"})
31
- return data
 
 
 
32
 
33
  except urllib.error.HTTPError as e:
34
  ErrorInfo = e.read().decode()
@@ -40,15 +51,29 @@ def get_past_weather_data():
40
  sys.exit()
41
 
42
 
43
- def get_past_pollution_data():
44
  O3 = []
45
  NO2 = []
46
  particles = ["NO2", "O3"]
47
  stations = ["NL10636", "NL10639", "NL10643"]
48
  all_dataframes = []
 
49
  last_year_date = date.today() - timedelta(days=365)
50
- start_date = last_year_date - timedelta(days=7)
51
- end_date = last_year_date + timedelta(days=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  date_list = [
53
  start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)
54
  ]
@@ -88,16 +113,31 @@ def get_past_pollution_data():
88
  else:
89
  O3.append(avg)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  return NO2, O3
92
 
93
 
94
  def get_past_combined_data():
95
- weather_df = get_past_weather_data()
96
- NO2_df, O3_df = get_past_pollution_data()
97
 
98
- combined_df = weather_df
99
- combined_df["NO2"] = NO2_df
100
- combined_df["O3"] = O3_df
 
 
101
 
102
  # Apply scaling and renaming similar to the scale function from previous code
103
  combined_df = combined_df.rename(
@@ -114,7 +154,7 @@ def get_past_combined_data():
114
 
115
  combined_df["date"] = pd.to_datetime(combined_df["date"])
116
  combined_df["weekday"] = combined_df["date"].dt.day_name()
117
-
118
  combined_df["wind_speed"] = combined_df["wind_speed"].astype(float)
119
  combined_df["mean_temp"] = combined_df["mean_temp"].astype(float)
120
  combined_df["minimum_visibility"] = combined_df["minimum_visibility"].astype(float)
@@ -128,13 +168,23 @@ def get_past_combined_data():
128
  combined_df["minimum_visibility"] = combined_df["minimum_visibility"] * 10
129
  combined_df["percipitation"] = combined_df["percipitation"] * 10
130
  combined_df["pressure"] = combined_df["pressure"] * 10
131
-
132
- combined_df["wind_speed"] = combined_df["wind_speed"].astype(float).round().astype(int)
133
- combined_df["mean_temp"] = combined_df["mean_temp"].astype(float).round().astype(int)
134
- combined_df["minimum_visibility"] = combined_df["minimum_visibility"].astype(float).round().astype(int)
135
- combined_df["percipitation"] = combined_df["percipitation"].astype(float).round().astype(int)
 
 
 
 
 
 
 
 
136
  combined_df["pressure"] = combined_df["pressure"].astype(float).round().astype(int)
137
  combined_df["humidity"] = combined_df["humidity"].astype(float).round().astype(int)
138
- combined_df["global_radiation"] = combined_df["global_radiation"].astype(float).round().astype(int)
139
-
 
 
140
  return combined_df
 
1
  import codecs
2
  import csv
3
  import http.client
4
+ import os
5
  import re
6
  import sys
7
  import urllib.request
 
10
 
11
  import pandas as pd
12
 
13
+ PAST_WEATHER_DATA_FILE = "past_weather_data.csv"
14
+ PAST_POLLUTION_DATA_FILE = "past_pollution_data.csv"
15
 
16
 
17
+ def update_past_weather_data():
18
  last_year_date = date.today() - timedelta(days=365)
19
+
20
+ if os.path.exists(PAST_WEATHER_DATA_FILE):
21
+ df = pd.read_csv(PAST_WEATHER_DATA_FILE)
22
+ start_date = pd.to_datetime(df["date"]).max().date().isoformat()
23
+ end_date = (last_year_date + timedelta(days=2)).isoformat()
24
+ else:
25
+ df = pd.DataFrame()
26
+ start_date = (last_year_date - timedelta(days=8)).isoformat()
27
+ end_date = (last_year_date + timedelta(days=2)).isoformat()
28
 
29
  try:
30
  ResultBytes = urllib.request.urlopen(
 
36
  data.columns = data.iloc[0]
37
  data = data[1:]
38
  data = data.rename(columns={"datetime": "date"})
39
+
40
+ updated_df = pd.concat([df, data], ignore_index=True)
41
+ updated_df.drop_duplicates(subset="date", keep="last", inplace=True)
42
+ updated_df.to_csv(PAST_WEATHER_DATA_FILE, index=False)
43
 
44
  except urllib.error.HTTPError as e:
45
  ErrorInfo = e.read().decode()
 
51
  sys.exit()
52
 
53
 
54
+ def update_past_pollution_data():
55
  O3 = []
56
  NO2 = []
57
  particles = ["NO2", "O3"]
58
  stations = ["NL10636", "NL10639", "NL10643"]
59
  all_dataframes = []
60
+
61
  last_year_date = date.today() - timedelta(days=365)
62
+
63
+ if os.path.exists(PAST_POLLUTION_DATA_FILE):
64
+ existing_data = pd.read_csv(PAST_POLLUTION_DATA_FILE)
65
+ last_date = pd.to_datetime(existing_data["date"]).max()
66
+ if last_date >= pd.to_datetime(last_year_date):
67
+ print("Data is already up to date.")
68
+ return
69
+ else:
70
+ start_date = last_date.date()
71
+ end_date = last_year_date + timedelta(days=3)
72
+ else:
73
+ existing_data = pd.DataFrame()
74
+ start_date = last_year_date - timedelta(days=7)
75
+ end_date = last_year_date + timedelta(days=3)
76
+
77
  date_list = [
78
  start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)
79
  ]
 
113
  else:
114
  O3.append(avg)
115
 
116
+ new_data = pd.DataFrame(
117
+ {
118
+ "date": date_list,
119
+ "NO2": NO2,
120
+ "O3": O3,
121
+ }
122
+ )
123
+
124
+ updated_data = pd.concat([existing_data, new_data], ignore_index=True)
125
+ updated_data.drop_duplicates(subset="date", keep="last", inplace=True)
126
+
127
+ updated_data.to_csv(PAST_POLLUTION_DATA_FILE, index=False)
128
+
129
  return NO2, O3
130
 
131
 
132
  def get_past_combined_data():
133
+ update_past_weather_data()
134
+ update_past_pollution_data()
135
 
136
+ combined_df = pd.read_csv(PAST_WEATHER_DATA_FILE)
137
+ pollution_data = pd.read_csv(PAST_POLLUTION_DATA_FILE)
138
+
139
+ combined_df["NO2"] = pollution_data["NO2"]
140
+ combined_df["O3"] = pollution_data["O3"]
141
 
142
  # Apply scaling and renaming similar to the scale function from previous code
143
  combined_df = combined_df.rename(
 
154
 
155
  combined_df["date"] = pd.to_datetime(combined_df["date"])
156
  combined_df["weekday"] = combined_df["date"].dt.day_name()
157
+
158
  combined_df["wind_speed"] = combined_df["wind_speed"].astype(float)
159
  combined_df["mean_temp"] = combined_df["mean_temp"].astype(float)
160
  combined_df["minimum_visibility"] = combined_df["minimum_visibility"].astype(float)
 
168
  combined_df["minimum_visibility"] = combined_df["minimum_visibility"] * 10
169
  combined_df["percipitation"] = combined_df["percipitation"] * 10
170
  combined_df["pressure"] = combined_df["pressure"] * 10
171
+
172
+ combined_df["wind_speed"] = (
173
+ combined_df["wind_speed"].astype(float).round().astype(int)
174
+ )
175
+ combined_df["mean_temp"] = (
176
+ combined_df["mean_temp"].astype(float).round().astype(int)
177
+ )
178
+ combined_df["minimum_visibility"] = (
179
+ combined_df["minimum_visibility"].astype(float).round().astype(int)
180
+ )
181
+ combined_df["percipitation"] = (
182
+ combined_df["percipitation"].astype(float).round().astype(int)
183
+ )
184
  combined_df["pressure"] = combined_df["pressure"].astype(float).round().astype(int)
185
  combined_df["humidity"] = combined_df["humidity"].astype(float).round().astype(int)
186
+ combined_df["global_radiation"] = (
187
+ combined_df["global_radiation"].astype(float).round().astype(int)
188
+ )
189
+
190
  return combined_df
src/predict.py CHANGED
@@ -17,7 +17,7 @@ def load_model(particle):
17
  if particle == "O3":
18
  file_name = "O3_svr_model.pkl"
19
  elif particle == "NO2":
20
- file_name == "NO2_nn_model.pkl"
21
 
22
  model_path = hf_hub_download(repo_id=repo_id, filename=file_name)
23
  model = joblib.load(model_path)
@@ -48,7 +48,7 @@ def get_data_and_predictions():
48
  "pollutant": "O3",
49
  "date_predicted": date.today(),
50
  "date": date.today() + timedelta(days=i + 1),
51
- "prediction_value": o3_predictions[i],
52
  }
53
  )
54
  prediction_data.append(
@@ -56,15 +56,20 @@ def get_data_and_predictions():
56
  "pollutant": "NO2",
57
  "date_predicted": date.today(),
58
  "date": date.today() + timedelta(days=i + 1),
59
- "prediction_value": no2_predictions[i],
60
  }
61
  )
62
 
63
  predictions_df = pd.DataFrame(prediction_data)
64
 
65
  if os.path.exists(PREDICTIONS_FILE):
66
- predictions_df.to_csv(PREDICTIONS_FILE, mode="a", header=False, index=False)
 
 
 
 
67
  else:
68
- predictions_df.to_csv(PREDICTIONS_FILE, mode="w", header=True, index=False)
69
 
 
70
  return week_data, o3_predictions, no2_predictions
 
17
  if particle == "O3":
18
  file_name = "O3_svr_model.pkl"
19
  elif particle == "NO2":
20
+ file_name = "NO2_svr_model.pkl"
21
 
22
  model_path = hf_hub_download(repo_id=repo_id, filename=file_name)
23
  model = joblib.load(model_path)
 
48
  "pollutant": "O3",
49
  "date_predicted": date.today(),
50
  "date": date.today() + timedelta(days=i + 1),
51
+ "prediction_value": o3_predictions[0][i],
52
  }
53
  )
54
  prediction_data.append(
 
56
  "pollutant": "NO2",
57
  "date_predicted": date.today(),
58
  "date": date.today() + timedelta(days=i + 1),
59
+ "prediction_value": no2_predictions[0][i],
60
  }
61
  )
62
 
63
  predictions_df = pd.DataFrame(prediction_data)
64
 
65
  if os.path.exists(PREDICTIONS_FILE):
66
+ existing_data = pd.read_csv(PREDICTIONS_FILE)
67
+ combined_data = pd.concat([existing_data, predictions_df])
68
+ combined_data = combined_data.drop_duplicates(
69
+ subset=["pollutant", "date_predicted", "date"], keep="first"
70
+ )
71
  else:
72
+ combined_data = predictions_df
73
 
74
+ combined_data.to_csv(PREDICTIONS_FILE, index=False)
75
  return week_data, o3_predictions, no2_predictions
test.ipynb CHANGED
@@ -15,7 +15,9 @@
15
  }
16
  ],
17
  "source": [
18
- "from src.predict import get_data_and_predictions"
 
 
19
  ]
20
  },
21
  {
@@ -24,22 +26,14 @@
24
  "metadata": {},
25
  "outputs": [
26
  {
27
- "ename": "ValueError",
28
- "evalue": "Length of values (0) does not match length of index (11)",
29
- "output_type": "error",
30
- "traceback": [
31
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
32
- "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
33
- "Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m week_data, predictions_O3, predictions_NO2 \u001b[38;5;241m=\u001b[39m \u001b[43mget_data_and_predictions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
34
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:41\u001b[0m, in \u001b[0;36mget_data_and_predictions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 37\u001b[0m PREDICTIONS_FILE \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions_history.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 39\u001b[0m week_data \u001b[38;5;241m=\u001b[39m get_combined_data()\n\u001b[1;32m---> 41\u001b[0m o3_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweek_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 42\u001b[0m no2_predictions \u001b[38;5;241m=\u001b[39m run_model(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNO2\u001b[39m\u001b[38;5;124m\"\u001b[39m, data\u001b[38;5;241m=\u001b[39mweek_data)\n\u001b[0;32m 44\u001b[0m prediction_data \u001b[38;5;241m=\u001b[39m []\n",
35
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[1;34m(particle, data)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[1;32m---> 28\u001b[0m input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 29\u001b[0m model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[0;32m 30\u001b[0m prediction \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(input_data)\n",
36
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\features_pipeline.py:60\u001b[0m, in \u001b[0;36mcreate_features\u001b[1;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[0;32m 55\u001b[0m data[\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfeature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_sma_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msma_days\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 56\u001b[0m data[feature]\u001b[38;5;241m.\u001b[39mrolling(window\u001b[38;5;241m=\u001b[39msma_days)\u001b[38;5;241m.\u001b[39mmean()\n\u001b[0;32m 57\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[38;5;66;03m# Create particle data (NO2 and O3) from the same time last year\u001b[39;00m\n\u001b[1;32m---> 60\u001b[0m past_data \u001b[38;5;241m=\u001b[39m \u001b[43mget_past_combined_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 62\u001b[0m \u001b[38;5;66;03m# Today last year\u001b[39;00m\n\u001b[0;32m 63\u001b[0m data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3_last_year\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m past_data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39miloc[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m4\u001b[39m]\n",
37
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\past_data_api_calls.py:99\u001b[0m, in \u001b[0;36mget_past_combined_data\u001b[1;34m()\u001b[0m\n\u001b[0;32m 96\u001b[0m NO2_df, O3_df \u001b[38;5;241m=\u001b[39m get_past_pollution_data()\n\u001b[0;32m 98\u001b[0m combined_df \u001b[38;5;241m=\u001b[39m weather_df\n\u001b[1;32m---> 99\u001b[0m \u001b[43mcombined_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNO2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m NO2_df\n\u001b[0;32m 100\u001b[0m combined_df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m O3_df\n\u001b[0;32m 102\u001b[0m \u001b[38;5;66;03m# Apply scaling and renaming similar to the scale function from previous code\u001b[39;00m\n",
38
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4311\u001b[0m, in \u001b[0;36mDataFrame.__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 4308\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_setitem_array([key], value)\n\u001b[0;32m 4309\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 4310\u001b[0m \u001b[38;5;66;03m# set column\u001b[39;00m\n\u001b[1;32m-> 4311\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_item\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n",
39
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4524\u001b[0m, in \u001b[0;36mDataFrame._set_item\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 4514\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_set_item\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, value) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 4515\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4516\u001b[0m \u001b[38;5;124;03m Add series to DataFrame in specified column.\u001b[39;00m\n\u001b[0;32m 4517\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4522\u001b[0m \u001b[38;5;124;03m ensure homogeneity.\u001b[39;00m\n\u001b[0;32m 4523\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4524\u001b[0m value, refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sanitize_column\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4526\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 4527\u001b[0m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[0;32m 4528\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 4529\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value\u001b[38;5;241m.\u001b[39mdtype, ExtensionDtype)\n\u001b[0;32m 4530\u001b[0m ):\n\u001b[0;32m 4531\u001b[0m \u001b[38;5;66;03m# broadcast across multiple columns if necessary\u001b[39;00m\n\u001b[0;32m 4532\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mis_unique \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns, MultiIndex):\n",
40
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:5266\u001b[0m, in \u001b[0;36mDataFrame._sanitize_column\u001b[1;34m(self, value)\u001b[0m\n\u001b[0;32m 5263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _reindex_for_setitem(value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex)\n\u001b[0;32m 5265\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_list_like(value):\n\u001b[1;32m-> 5266\u001b[0m \u001b[43mcom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequire_length_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 5267\u001b[0m arr \u001b[38;5;241m=\u001b[39m sanitize_array(value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, allow_2d\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 5268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 5269\u001b[0m \u001b[38;5;28misinstance\u001b[39m(value, Index)\n\u001b[0;32m 5270\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobject\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 5273\u001b[0m \u001b[38;5;66;03m# TODO: Remove kludge in sanitize_array for string mode when enforcing\u001b[39;00m\n\u001b[0;32m 5274\u001b[0m \u001b[38;5;66;03m# this deprecation\u001b[39;00m\n",
41
- "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\common.py:573\u001b[0m, in \u001b[0;36mrequire_length_match\u001b[1;34m(data, index)\u001b[0m\n\u001b[0;32m 569\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 570\u001b[0m \u001b[38;5;124;03mCheck the length of data matches the length of the index.\u001b[39;00m\n\u001b[0;32m 571\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 572\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(index):\n\u001b[1;32m--> 573\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 574\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLength of values \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 575\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 576\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdoes not match length of index \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 577\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(index)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 578\u001b[0m )\n",
42
- "\u001b[1;31mValueError\u001b[0m: Length of values (0) does not match length of index (11)"
43
  ]
44
  }
45
  ],
@@ -47,29 +41,10 @@
47
  "week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()"
48
  ]
49
  },
50
- {
51
- "cell_type": "code",
52
- "execution_count": null,
53
- "metadata": {},
54
- "outputs": [],
55
- "source": [
56
- "week_data"
57
- ]
58
- },
59
  {
60
  "cell_type": "code",
61
  "execution_count": 3,
62
  "metadata": {},
63
- "outputs": [],
64
- "source": [
65
- "data = pd.read_csv(\"dataset.csv\")\n",
66
- "target_particle = \"O3\""
67
- ]
68
- },
69
- {
70
- "cell_type": "code",
71
- "execution_count": 4,
72
- "metadata": {},
73
  "outputs": [
74
  {
75
  "data": {
@@ -108,23 +83,9 @@
108
  " <tbody>\n",
109
  " <tr>\n",
110
  " <th>0</th>\n",
111
- " <td>2024-10-16</td>\n",
112
- " <td>22.602712</td>\n",
113
- " <td>22.881288</td>\n",
114
- " <td>61</td>\n",
115
- " <td>151</td>\n",
116
- " <td>40</td>\n",
117
- " <td>0</td>\n",
118
- " <td>10103</td>\n",
119
- " <td>358</td>\n",
120
- " <td>82</td>\n",
121
- " <td>Wednesday</td>\n",
122
- " </tr>\n",
123
- " <tr>\n",
124
- " <th>1</th>\n",
125
  " <td>2024-10-17</td>\n",
126
- " <td>23.104327</td>\n",
127
- " <td>23.038638</td>\n",
128
  " <td>51</td>\n",
129
  " <td>169</td>\n",
130
  " <td>43</td>\n",
@@ -135,52 +96,52 @@
135
  " <td>Thursday</td>\n",
136
  " </tr>\n",
137
  " <tr>\n",
138
- " <th>2</th>\n",
139
  " <td>2024-10-18</td>\n",
140
- " <td>23.682857</td>\n",
141
- " <td>23.716611</td>\n",
142
  " <td>21</td>\n",
143
- " <td>156</td>\n",
144
  " <td>42</td>\n",
145
  " <td>39</td>\n",
146
  " <td>10140</td>\n",
147
- " <td>64</td>\n",
148
  " <td>97</td>\n",
149
  " <td>Friday</td>\n",
150
  " </tr>\n",
151
  " <tr>\n",
152
- " <th>3</th>\n",
153
  " <td>2024-10-19</td>\n",
154
- " <td>24.532039</td>\n",
155
- " <td>23.604723</td>\n",
156
- " <td>43</td>\n",
157
  " <td>147</td>\n",
158
  " <td>43</td>\n",
159
- " <td>28</td>\n",
160
- " <td>10140</td>\n",
161
- " <td>236</td>\n",
162
- " <td>92</td>\n",
163
  " <td>Saturday</td>\n",
164
  " </tr>\n",
165
  " <tr>\n",
166
- " <th>4</th>\n",
167
  " <td>2024-10-20</td>\n",
168
- " <td>23.019102</td>\n",
169
- " <td>24.173377</td>\n",
170
- " <td>68</td>\n",
171
- " <td>145</td>\n",
172
- " <td>0</td>\n",
173
  " <td>0</td>\n",
 
174
  " <td>10160</td>\n",
175
- " <td>241</td>\n",
176
- " <td>82</td>\n",
177
  " <td>Sunday</td>\n",
178
  " </tr>\n",
179
  " <tr>\n",
180
- " <th>5</th>\n",
181
  " <td>2024-10-21</td>\n",
182
- " <td>21.275629</td>\n",
183
- " <td>25.058736</td>\n",
184
  " <td>58</td>\n",
185
  " <td>144</td>\n",
186
  " <td>27</td>\n",
@@ -191,499 +152,120 @@
191
  " <td>Monday</td>\n",
192
  " </tr>\n",
193
  " <tr>\n",
194
- " <th>6</th>\n",
195
  " <td>2024-10-22</td>\n",
196
- " <td>22.334375</td>\n",
197
- " <td>24.594219</td>\n",
198
- " <td>76</td>\n",
199
- " <td>123</td>\n",
200
  " <td>57</td>\n",
201
- " <td>12</td>\n",
202
- " <td>10265</td>\n",
203
- " <td>100</td>\n",
204
- " <td>87</td>\n",
205
  " <td>Tuesday</td>\n",
206
  " </tr>\n",
207
  " <tr>\n",
208
- " <th>7</th>\n",
209
  " <td>2024-10-23</td>\n",
210
- " <td>24.261733</td>\n",
211
- " <td>23.560000</td>\n",
212
- " <td>31</td>\n",
213
- " <td>115</td>\n",
214
- " <td>7</td>\n",
215
  " <td>0</td>\n",
216
  " <td>10328</td>\n",
217
- " <td>105</td>\n",
218
- " <td>95</td>\n",
219
  " <td>Wednesday</td>\n",
220
  " </tr>\n",
221
- " </tbody>\n",
222
- "</table>\n",
223
- "</div>"
224
- ],
225
- "text/plain": [
226
- " date NO2 O3 wind_speed mean_temp global_radiation \\\n",
227
- "0 2024-10-16 22.602712 22.881288 61 151 40 \n",
228
- "1 2024-10-17 23.104327 23.038638 51 169 43 \n",
229
- "2 2024-10-18 23.682857 23.716611 21 156 42 \n",
230
- "3 2024-10-19 24.532039 23.604723 43 147 43 \n",
231
- "4 2024-10-20 23.019102 24.173377 68 145 0 \n",
232
- "5 2024-10-21 21.275629 25.058736 58 144 27 \n",
233
- "6 2024-10-22 22.334375 24.594219 76 123 57 \n",
234
- "7 2024-10-23 24.261733 23.560000 31 115 7 \n",
235
- "\n",
236
- " percipitation pressure minimum_visibility humidity weekday \n",
237
- "0 0 10103 358 82 Wednesday \n",
238
- "1 6 10100 371 86 Thursday \n",
239
- "2 39 10140 64 97 Friday \n",
240
- "3 28 10140 236 92 Saturday \n",
241
- "4 0 10160 241 82 Sunday \n",
242
- "5 43 10206 220 92 Monday \n",
243
- "6 12 10265 100 87 Tuesday \n",
244
- "7 0 10328 105 95 Wednesday "
245
- ]
246
- },
247
- "execution_count": 4,
248
- "metadata": {},
249
- "output_type": "execute_result"
250
- }
251
- ],
252
- "source": [
253
- "data"
254
- ]
255
- },
256
- {
257
- "cell_type": "code",
258
- "execution_count": 5,
259
- "metadata": {},
260
- "outputs": [
261
- {
262
- "name": "stdout",
263
- "output_type": "stream",
264
- "text": [
265
- "Number of rows with missing values dropped: 7\n"
266
- ]
267
- }
268
- ],
269
- "source": [
270
- "input_data = create_features(\n",
271
- " data=data,\n",
272
- " target_particle=target_particle,\n",
273
- " lag_days=7,\n",
274
- " sma_days=7,\n",
275
- ")"
276
- ]
277
- },
278
- {
279
- "cell_type": "code",
280
- "execution_count": 6,
281
- "metadata": {},
282
- "outputs": [
283
- {
284
- "data": {
285
- "text/html": [
286
- "<div>\n",
287
- "<style scoped>\n",
288
- " .dataframe tbody tr th:only-of-type {\n",
289
- " vertical-align: middle;\n",
290
- " }\n",
291
- "\n",
292
- " .dataframe tbody tr th {\n",
293
- " vertical-align: top;\n",
294
- " }\n",
295
- "\n",
296
- " .dataframe thead th {\n",
297
- " text-align: right;\n",
298
- " }\n",
299
- "</style>\n",
300
- "<table border=\"1\" class=\"dataframe\">\n",
301
- " <thead>\n",
302
- " <tr style=\"text-align: right;\">\n",
303
- " <th></th>\n",
304
- " <th>NO2</th>\n",
305
- " <th>O3</th>\n",
306
- " <th>wind_speed</th>\n",
307
- " <th>mean_temp</th>\n",
308
- " <th>global_radiation</th>\n",
309
- " <th>percipitation</th>\n",
310
- " <th>pressure</th>\n",
311
- " <th>minimum_visibility</th>\n",
312
- " <th>humidity</th>\n",
313
- " <th>weekday_sin</th>\n",
314
- " <th>...</th>\n",
315
- " <th>O3_last_year_4_days_before</th>\n",
316
- " <th>NO2_last_year_4_days_before</th>\n",
317
- " <th>O3_last_year_5_days_before</th>\n",
318
- " <th>NO2_last_year_5_days_before</th>\n",
319
- " <th>O3_last_year_6_days_before</th>\n",
320
- " <th>NO2_last_year_6_days_before</th>\n",
321
- " <th>O3_last_year_7_days_before</th>\n",
322
- " <th>NO2_last_year_7_days_before</th>\n",
323
- " <th>O3_last_year_3_days_after</th>\n",
324
- " <th>NO2_last_year_3_days_after</th>\n",
325
- " </tr>\n",
326
- " </thead>\n",
327
- " <tbody>\n",
328
  " <tr>\n",
329
- " <th>0</th>\n",
330
- " <td>-0.126371</td>\n",
331
- " <td>-0.855455</td>\n",
332
- " <td>-0.206181</td>\n",
333
- " <td>0.082314</td>\n",
334
- " <td>-1.330268</td>\n",
335
- " <td>-0.493936</td>\n",
336
- " <td>1.783274</td>\n",
337
- " <td>2.813837</td>\n",
338
- " <td>1.547919</td>\n",
339
- " <td>1.37753</td>\n",
340
- " <td>...</td>\n",
341
- " <td>-1.036205</td>\n",
342
- " <td>-0.802392</td>\n",
343
- " <td>-0.883032</td>\n",
344
- " <td>-0.968984</td>\n",
345
- " <td>0.333776</td>\n",
346
- " <td>-1.446199</td>\n",
347
- " <td>-1.180992</td>\n",
348
- " <td>-0.54567</td>\n",
349
- " <td>-1.15814</td>\n",
350
- " <td>-0.358079</td>\n",
351
  " </tr>\n",
352
  " </tbody>\n",
353
  "</table>\n",
354
- "<p>1 rows × 87 columns</p>\n",
355
  "</div>"
356
  ],
357
  "text/plain": [
358
- " NO2 O3 wind_speed mean_temp global_radiation percipitation \\\n",
359
- "0 -0.126371 -0.855455 -0.206181 0.082314 -1.330268 -0.493936 \n",
360
- "\n",
361
- " pressure minimum_visibility humidity weekday_sin ... \\\n",
362
- "0 1.783274 2.813837 1.547919 1.37753 ... \n",
363
- "\n",
364
- " O3_last_year_4_days_before NO2_last_year_4_days_before \\\n",
365
- "0 -1.036205 -0.802392 \n",
366
- "\n",
367
- " O3_last_year_5_days_before NO2_last_year_5_days_before \\\n",
368
- "0 -0.883032 -0.968984 \n",
369
  "\n",
370
- " O3_last_year_6_days_before NO2_last_year_6_days_before \\\n",
371
- "0 0.333776 -1.446199 \n",
372
- "\n",
373
- " O3_last_year_7_days_before NO2_last_year_7_days_before \\\n",
374
- "0 -1.180992 -0.54567 \n",
375
- "\n",
376
- " O3_last_year_3_days_after NO2_last_year_3_days_after \n",
377
- "0 -1.15814 -0.358079 \n",
378
- "\n",
379
- "[1 rows x 87 columns]"
380
  ]
381
  },
382
- "execution_count": 6,
383
  "metadata": {},
384
  "output_type": "execute_result"
385
  }
386
  ],
387
  "source": [
388
- "input_data"
389
- ]
390
- },
391
- {
392
- "cell_type": "code",
393
- "execution_count": null,
394
- "metadata": {},
395
- "outputs": [],
396
- "source": [
397
- "#prediction = run_model(particle=\"O3\", data=df)"
398
  ]
399
  },
400
  {
401
  "cell_type": "code",
402
- "execution_count": 9,
403
  "metadata": {},
404
  "outputs": [
405
  {
406
  "data": {
407
- "text/html": [
408
- "<div>\n",
409
- "<style scoped>\n",
410
- " .dataframe tbody tr th:only-of-type {\n",
411
- " vertical-align: middle;\n",
412
- " }\n",
413
- "\n",
414
- " .dataframe tbody tr th {\n",
415
- " vertical-align: top;\n",
416
- " }\n",
417
- "\n",
418
- " .dataframe thead th {\n",
419
- " text-align: right;\n",
420
- " }\n",
421
- "</style>\n",
422
- "<table border=\"1\" class=\"dataframe\">\n",
423
- " <thead>\n",
424
- " <tr style=\"text-align: right;\">\n",
425
- " <th></th>\n",
426
- " <th>date</th>\n",
427
- " <th>NO2</th>\n",
428
- " <th>O3</th>\n",
429
- " <th>wind_speed</th>\n",
430
- " <th>mean_temp</th>\n",
431
- " <th>global_radiation</th>\n",
432
- " <th>percipitation</th>\n",
433
- " <th>pressure</th>\n",
434
- " <th>minimum_visibility</th>\n",
435
- " <th>humidity</th>\n",
436
- " <th>weekday</th>\n",
437
- " </tr>\n",
438
- " </thead>\n",
439
- " <tbody>\n",
440
- " <tr>\n",
441
- " <th>0</th>\n",
442
- " <td>2023-10-16</td>\n",
443
- " <td>17.958784</td>\n",
444
- " <td>32.611400</td>\n",
445
- " <td>31</td>\n",
446
- " <td>90</td>\n",
447
- " <td>68</td>\n",
448
- " <td>9</td>\n",
449
- " <td>1022</td>\n",
450
- " <td>348</td>\n",
451
- " <td>88</td>\n",
452
- " <td>Monday</td>\n",
453
- " </tr>\n",
454
- " <tr>\n",
455
- " <th>1</th>\n",
456
- " <td>2023-10-17</td>\n",
457
- " <td>10.842703</td>\n",
458
- " <td>39.812600</td>\n",
459
- " <td>61</td>\n",
460
- " <td>85</td>\n",
461
- " <td>75</td>\n",
462
- " <td>0</td>\n",
463
- " <td>1019</td>\n",
464
- " <td>348</td>\n",
465
- " <td>84</td>\n",
466
- " <td>Tuesday</td>\n",
467
- " </tr>\n",
468
- " <tr>\n",
469
- " <th>2</th>\n",
470
- " <td>2023-10-18</td>\n",
471
- " <td>17.970267</td>\n",
472
- " <td>31.779024</td>\n",
473
- " <td>71</td>\n",
474
- " <td>90</td>\n",
475
- " <td>71</td>\n",
476
- " <td>23</td>\n",
477
- " <td>1006</td>\n",
478
- " <td>238</td>\n",
479
- " <td>77</td>\n",
480
- " <td>Wednesday</td>\n",
481
- " </tr>\n",
482
- " <tr>\n",
483
- " <th>3</th>\n",
484
- " <td>2023-10-19</td>\n",
485
- " <td>17.233056</td>\n",
486
- " <td>18.715600</td>\n",
487
- " <td>61</td>\n",
488
- " <td>145</td>\n",
489
- " <td>39</td>\n",
490
- " <td>114</td>\n",
491
- " <td>990</td>\n",
492
- " <td>212</td>\n",
493
- " <td>94</td>\n",
494
- " <td>Thursday</td>\n",
495
- " </tr>\n",
496
- " <tr>\n",
497
- " <th>4</th>\n",
498
- " <td>2023-10-20</td>\n",
499
- " <td>15.023600</td>\n",
500
- " <td>22.040000</td>\n",
501
- " <td>71</td>\n",
502
- " <td>119</td>\n",
503
- " <td>7</td>\n",
504
- " <td>204</td>\n",
505
- " <td>981</td>\n",
506
- " <td>104</td>\n",
507
- " <td>97</td>\n",
508
- " <td>Friday</td>\n",
509
- " </tr>\n",
510
- " <tr>\n",
511
- " <th>5</th>\n",
512
- " <td>2023-10-21</td>\n",
513
- " <td>8.723378</td>\n",
514
- " <td>48.334400</td>\n",
515
- " <td>61</td>\n",
516
- " <td>131</td>\n",
517
- " <td>39</td>\n",
518
- " <td>35</td>\n",
519
- " <td>989</td>\n",
520
- " <td>277</td>\n",
521
- " <td>88</td>\n",
522
- " <td>Saturday</td>\n",
523
- " </tr>\n",
524
- " <tr>\n",
525
- " <th>6</th>\n",
526
- " <td>2023-10-22</td>\n",
527
- " <td>20.634267</td>\n",
528
- " <td>15.586000</td>\n",
529
- " <td>71</td>\n",
530
- " <td>121</td>\n",
531
- " <td>55</td>\n",
532
- " <td>39</td>\n",
533
- " <td>1003</td>\n",
534
- " <td>323</td>\n",
535
- " <td>87</td>\n",
536
- " <td>Sunday</td>\n",
537
- " </tr>\n",
538
- " <tr>\n",
539
- " <th>7</th>\n",
540
- " <td>2023-10-23</td>\n",
541
- " <td>15.115600</td>\n",
542
- " <td>24.628085</td>\n",
543
- " <td>50</td>\n",
544
- " <td>99</td>\n",
545
- " <td>43</td>\n",
546
- " <td>5</td>\n",
547
- " <td>1011</td>\n",
548
- " <td>59</td>\n",
549
- " <td>95</td>\n",
550
- " <td>Monday</td>\n",
551
- " </tr>\n",
552
- " <tr>\n",
553
- " <th>8</th>\n",
554
- " <td>2023-10-24</td>\n",
555
- " <td>22.885676</td>\n",
556
- " <td>27.117600</td>\n",
557
- " <td>61</td>\n",
558
- " <td>116</td>\n",
559
- " <td>32</td>\n",
560
- " <td>65</td>\n",
561
- " <td>1001</td>\n",
562
- " <td>231</td>\n",
563
- " <td>92</td>\n",
564
- " <td>Tuesday</td>\n",
565
- " </tr>\n",
566
- " <tr>\n",
567
- " <th>9</th>\n",
568
- " <td>2023-10-25</td>\n",
569
- " <td>21.531757</td>\n",
570
- " <td>13.321600</td>\n",
571
- " <td>50</td>\n",
572
- " <td>93</td>\n",
573
- " <td>14</td>\n",
574
- " <td>153</td>\n",
575
- " <td>996</td>\n",
576
- " <td>157</td>\n",
577
- " <td>96</td>\n",
578
- " <td>Wednesday</td>\n",
579
- " </tr>\n",
580
- " <tr>\n",
581
- " <th>10</th>\n",
582
- " <td>2023-10-26</td>\n",
583
- " <td>23.072267</td>\n",
584
- " <td>16.154167</td>\n",
585
- " <td>31</td>\n",
586
- " <td>94</td>\n",
587
- " <td>36</td>\n",
588
- " <td>1</td>\n",
589
- " <td>995</td>\n",
590
- " <td>48</td>\n",
591
- " <td>97</td>\n",
592
- " <td>Thursday</td>\n",
593
- " </tr>\n",
594
- " </tbody>\n",
595
- "</table>\n",
596
- "</div>"
597
- ],
598
  "text/plain": [
599
- " date NO2 O3 wind_speed mean_temp global_radiation \\\n",
600
- "0 2023-10-16 17.958784 32.611400 31 90 68 \n",
601
- "1 2023-10-17 10.842703 39.812600 61 85 75 \n",
602
- "2 2023-10-18 17.970267 31.779024 71 90 71 \n",
603
- "3 2023-10-19 17.233056 18.715600 61 145 39 \n",
604
- "4 2023-10-20 15.023600 22.040000 71 119 7 \n",
605
- "5 2023-10-21 8.723378 48.334400 61 131 39 \n",
606
- "6 2023-10-22 20.634267 15.586000 71 121 55 \n",
607
- "7 2023-10-23 15.115600 24.628085 50 99 43 \n",
608
- "8 2023-10-24 22.885676 27.117600 61 116 32 \n",
609
- "9 2023-10-25 21.531757 13.321600 50 93 14 \n",
610
- "10 2023-10-26 23.072267 16.154167 31 94 36 \n",
611
- "\n",
612
- " percipitation pressure minimum_visibility humidity weekday \n",
613
- "0 9 1022 348 88 Monday \n",
614
- "1 0 1019 348 84 Tuesday \n",
615
- "2 23 1006 238 77 Wednesday \n",
616
- "3 114 990 212 94 Thursday \n",
617
- "4 204 981 104 97 Friday \n",
618
- "5 35 989 277 88 Saturday \n",
619
- "6 39 1003 323 87 Sunday \n",
620
- "7 5 1011 59 95 Monday \n",
621
- "8 65 1001 231 92 Tuesday \n",
622
- "9 153 996 157 96 Wednesday \n",
623
- "10 1 995 48 97 Thursday "
624
  ]
625
  },
626
- "execution_count": 9,
627
  "metadata": {},
628
  "output_type": "execute_result"
629
  }
630
  ],
631
  "source": [
632
- "get_past_data()"
633
- ]
634
- },
635
- {
636
- "cell_type": "code",
637
- "execution_count": 9,
638
- "metadata": {},
639
- "outputs": [
640
- {
641
- "name": "stderr",
642
- "output_type": "stream",
643
- "text": [
644
- "2024-10-23 19:40:20.321 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
645
- "2024-10-23 19:40:20.322 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
646
- "2024-10-23 19:40:20.323 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
647
- ]
648
- },
649
- {
650
- "name": "stdout",
651
- "output_type": "stream",
652
- "text": [
653
- "Number of rows with missing values dropped: 7\n"
654
- ]
655
- },
656
- {
657
- "name": "stderr",
658
- "output_type": "stream",
659
- "text": [
660
- "2024-10-23 19:40:34.183 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
661
- "2024-10-23 19:40:34.184 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
662
- ]
663
- }
664
- ],
665
- "source": [
666
- "prediction=run_model(particle=target_particle, data=data)"
667
  ]
668
  },
669
  {
670
  "cell_type": "code",
671
- "execution_count": 10,
672
  "metadata": {},
673
  "outputs": [
674
  {
675
  "data": {
676
  "text/plain": [
677
- "array([[19.90814701, 8.8039613 , 26.57711386]])"
678
  ]
679
  },
680
- "execution_count": 10,
681
  "metadata": {},
682
  "output_type": "execute_result"
683
  }
684
  ],
685
  "source": [
686
- "prediction"
687
  ]
688
  }
689
  ],
 
15
  }
16
  ],
17
  "source": [
18
+ "from src.predict import get_data_and_predictions\n",
19
+ "from src.data_api_calls import get_combined_data\n",
20
+ "from src.past_data_api_calls import get_past_combined_data"
21
  ]
22
  },
23
  {
 
26
  "metadata": {},
27
  "outputs": [
28
  {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "Data is already up to date.\n",
33
+ "Data is already up to date.\n",
34
+ "Number of rows with missing values dropped: 7\n",
35
+ "Data is already up to date.\n",
36
+ "Number of rows with missing values dropped: 7\n"
 
 
 
 
 
 
 
 
37
  ]
38
  }
39
  ],
 
41
  "week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()"
42
  ]
43
  },
 
 
 
 
 
 
 
 
 
44
  {
45
  "cell_type": "code",
46
  "execution_count": 3,
47
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
48
  "outputs": [
49
  {
50
  "data": {
 
83
  " <tbody>\n",
84
  " <tr>\n",
85
  " <th>0</th>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  " <td>2024-10-17</td>\n",
87
+ " <td>22.804605</td>\n",
88
+ " <td>22.769160</td>\n",
89
  " <td>51</td>\n",
90
  " <td>169</td>\n",
91
  " <td>43</td>\n",
 
96
  " <td>Thursday</td>\n",
97
  " </tr>\n",
98
  " <tr>\n",
99
+ " <th>1</th>\n",
100
  " <td>2024-10-18</td>\n",
101
+ " <td>23.268500</td>\n",
102
+ " <td>23.307332</td>\n",
103
  " <td>21</td>\n",
104
+ " <td>155</td>\n",
105
  " <td>42</td>\n",
106
  " <td>39</td>\n",
107
  " <td>10140</td>\n",
108
+ " <td>45</td>\n",
109
  " <td>97</td>\n",
110
  " <td>Friday</td>\n",
111
  " </tr>\n",
112
  " <tr>\n",
113
+ " <th>2</th>\n",
114
  " <td>2024-10-19</td>\n",
115
+ " <td>23.910064</td>\n",
116
+ " <td>23.171714</td>\n",
117
+ " <td>41</td>\n",
118
  " <td>147</td>\n",
119
  " <td>43</td>\n",
120
+ " <td>16</td>\n",
121
+ " <td>10141</td>\n",
122
+ " <td>228</td>\n",
123
+ " <td>89</td>\n",
124
  " <td>Saturday</td>\n",
125
  " </tr>\n",
126
  " <tr>\n",
127
+ " <th>3</th>\n",
128
  " <td>2024-10-20</td>\n",
129
+ " <td>22.573238</td>\n",
130
+ " <td>23.537845</td>\n",
131
+ " <td>81</td>\n",
132
+ " <td>155</td>\n",
 
133
  " <td>0</td>\n",
134
+ " <td>5</td>\n",
135
  " <td>10160</td>\n",
136
+ " <td>415</td>\n",
137
+ " <td>83</td>\n",
138
  " <td>Sunday</td>\n",
139
  " </tr>\n",
140
  " <tr>\n",
141
+ " <th>4</th>\n",
142
  " <td>2024-10-21</td>\n",
143
+ " <td>21.145700</td>\n",
144
+ " <td>24.020696</td>\n",
145
  " <td>58</td>\n",
146
  " <td>144</td>\n",
147
  " <td>27</td>\n",
 
152
  " <td>Monday</td>\n",
153
  " </tr>\n",
154
  " <tr>\n",
155
+ " <th>5</th>\n",
156
  " <td>2024-10-22</td>\n",
157
+ " <td>21.776580</td>\n",
158
+ " <td>23.335886</td>\n",
159
+ " <td>53</td>\n",
160
+ " <td>114</td>\n",
161
  " <td>57</td>\n",
162
+ " <td>49</td>\n",
163
+ " <td>10269</td>\n",
164
+ " <td>226</td>\n",
165
+ " <td>92</td>\n",
166
  " <td>Tuesday</td>\n",
167
  " </tr>\n",
168
  " <tr>\n",
169
+ " <th>6</th>\n",
170
  " <td>2024-10-23</td>\n",
171
+ " <td>21.974794</td>\n",
172
+ " <td>22.214689</td>\n",
173
+ " <td>36</td>\n",
174
+ " <td>112</td>\n",
175
+ " <td>12</td>\n",
176
  " <td>0</td>\n",
177
  " <td>10328</td>\n",
178
+ " <td>65</td>\n",
179
+ " <td>97</td>\n",
180
  " <td>Wednesday</td>\n",
181
  " </tr>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  " <tr>\n",
183
+ " <th>7</th>\n",
184
+ " <td>2024-10-24</td>\n",
185
+ " <td>25.512568</td>\n",
186
+ " <td>20.913710</td>\n",
187
+ " <td>56</td>\n",
188
+ " <td>104</td>\n",
189
+ " <td>62</td>\n",
190
+ " <td>0</td>\n",
191
+ " <td>10247</td>\n",
192
+ " <td>130</td>\n",
193
+ " <td>94</td>\n",
194
+ " <td>Thursday</td>\n",
 
 
 
 
 
 
 
 
 
 
195
  " </tr>\n",
196
  " </tbody>\n",
197
  "</table>\n",
 
198
  "</div>"
199
  ],
200
  "text/plain": [
201
+ " date NO2 O3 wind_speed mean_temp global_radiation \\\n",
202
+ "0 2024-10-17 22.804605 22.769160 51 169 43 \n",
203
+ "1 2024-10-18 23.268500 23.307332 21 155 42 \n",
204
+ "2 2024-10-19 23.910064 23.171714 41 147 43 \n",
205
+ "3 2024-10-20 22.573238 23.537845 81 155 0 \n",
206
+ "4 2024-10-21 21.145700 24.020696 58 144 27 \n",
207
+ "5 2024-10-22 21.776580 23.335886 53 114 57 \n",
208
+ "6 2024-10-23 21.974794 22.214689 36 112 12 \n",
209
+ "7 2024-10-24 25.512568 20.913710 56 104 62 \n",
 
 
210
  "\n",
211
+ " percipitation pressure minimum_visibility humidity weekday \n",
212
+ "0 6 10100 371 86 Thursday \n",
213
+ "1 39 10140 45 97 Friday \n",
214
+ "2 16 10141 228 89 Saturday \n",
215
+ "3 5 10160 415 83 Sunday \n",
216
+ "4 43 10206 220 92 Monday \n",
217
+ "5 49 10269 226 92 Tuesday \n",
218
+ "6 0 10328 65 97 Wednesday \n",
219
+ "7 0 10247 130 94 Thursday "
 
220
  ]
221
  },
222
+ "execution_count": 3,
223
  "metadata": {},
224
  "output_type": "execute_result"
225
  }
226
  ],
227
  "source": [
228
+ "week_data"
 
 
 
 
 
 
 
 
 
229
  ]
230
  },
231
  {
232
  "cell_type": "code",
233
+ "execution_count": 4,
234
  "metadata": {},
235
  "outputs": [
236
  {
237
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  "text/plain": [
239
+ "array([[10.33808859, 16.00098432, 19.64377496]])"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  ]
241
  },
242
+ "execution_count": 4,
243
  "metadata": {},
244
  "output_type": "execute_result"
245
  }
246
  ],
247
  "source": [
248
+ "predictions_O3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  ]
250
  },
251
  {
252
  "cell_type": "code",
253
+ "execution_count": 5,
254
  "metadata": {},
255
  "outputs": [
256
  {
257
  "data": {
258
  "text/plain": [
259
+ "array([[25.68519992, 25.76030745, 31.21057679]])"
260
  ]
261
  },
262
+ "execution_count": 5,
263
  "metadata": {},
264
  "output_type": "execute_result"
265
  }
266
  ],
267
  "source": [
268
+ "predictions_NO2"
269
  ]
270
  }
271
  ],