test with different data
Browse files- dashboard_utils/bubbles.py +43 -30
- data/serializaledata.json +0 -0
- perso/change_data.py +19 -0
- perso/get_usernames.py +14 -0
dashboard_utils/bubbles.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import datetime
|
|
|
|
| 2 |
from concurrent.futures import as_completed
|
| 3 |
from urllib import parse
|
| 4 |
|
|
@@ -13,19 +14,25 @@ WANDB_REPO = "learning-at-home/Worker_logs"
|
|
| 13 |
|
| 14 |
@simple_time_tracker(_log)
|
| 15 |
def get_new_bubble_data():
|
| 16 |
-
serialized_data_points, latest_timestamp = get_serialized_data_points()
|
|
|
|
| 17 |
serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
return serialized_data, profiles
|
| 21 |
|
| 22 |
|
| 23 |
@simple_time_tracker(_log)
|
| 24 |
-
def get_profiles(
|
| 25 |
profiles = []
|
| 26 |
with FuturesSession() as session:
|
| 27 |
futures = []
|
| 28 |
-
for username in
|
| 29 |
future = session.get(URL_QUICKSEARCH + parse.urlencode({"type": "user", "q": username}))
|
| 30 |
future.username = username
|
| 31 |
futures.append(future)
|
|
@@ -100,30 +107,36 @@ def get_serialized_data_points():
|
|
| 100 |
|
| 101 |
@simple_time_tracker(_log)
|
| 102 |
def get_serialized_data(serialized_data_points, latest_timestamp):
|
| 103 |
-
serialized_data_points_v2 = []
|
| 104 |
-
max_velocity = 1
|
| 105 |
-
for run_name, serialized_data_point in serialized_data_points.items():
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
return serialized_data
|
|
|
|
| 1 |
import datetime
|
| 2 |
+
import json
|
| 3 |
from concurrent.futures import as_completed
|
| 4 |
from urllib import parse
|
| 5 |
|
|
|
|
| 14 |
|
| 15 |
@simple_time_tracker(_log)
|
| 16 |
def get_new_bubble_data():
|
| 17 |
+
# serialized_data_points, latest_timestamp = get_serialized_data_points()
|
| 18 |
+
serialized_data_points, latest_timestamp = None, None
|
| 19 |
serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
|
| 20 |
+
|
| 21 |
+
usernames = []
|
| 22 |
+
for item in serialized_data["points"][0]:
|
| 23 |
+
usernames.append(item["profileId"])
|
| 24 |
+
|
| 25 |
+
profiles = get_profiles(usernames)
|
| 26 |
|
| 27 |
return serialized_data, profiles
|
| 28 |
|
| 29 |
|
| 30 |
@simple_time_tracker(_log)
|
| 31 |
+
def get_profiles(usernames):
|
| 32 |
profiles = []
|
| 33 |
with FuturesSession() as session:
|
| 34 |
futures = []
|
| 35 |
+
for username in usernames:
|
| 36 |
future = session.get(URL_QUICKSEARCH + parse.urlencode({"type": "user", "q": username}))
|
| 37 |
future.username = username
|
| 38 |
futures.append(future)
|
|
|
|
| 107 |
|
| 108 |
@simple_time_tracker(_log)
|
| 109 |
def get_serialized_data(serialized_data_points, latest_timestamp):
|
| 110 |
+
# serialized_data_points_v2 = []
|
| 111 |
+
# max_velocity = 1
|
| 112 |
+
# for run_name, serialized_data_point in serialized_data_points.items():
|
| 113 |
+
# activeRuns = []
|
| 114 |
+
# loss = 0
|
| 115 |
+
# runtime = 0
|
| 116 |
+
# batches = 0
|
| 117 |
+
# velocity = 0
|
| 118 |
+
# for run in serialized_data_point["Runs"]:
|
| 119 |
+
# if run["date"] == latest_timestamp:
|
| 120 |
+
# run["date"] = run["date"].isoformat()
|
| 121 |
+
# activeRuns.append(run)
|
| 122 |
+
# loss += run["loss"]
|
| 123 |
+
# velocity += run["velocity"]
|
| 124 |
+
# loss = loss / len(activeRuns) if activeRuns else 0
|
| 125 |
+
# runtime += run["runtime"]
|
| 126 |
+
# batches += run["batches"]
|
| 127 |
+
# new_item = {
|
| 128 |
+
# "date": latest_timestamp.isoformat(),
|
| 129 |
+
# "profileId": run_name,
|
| 130 |
+
# "batches": batches,
|
| 131 |
+
# "runtime": runtime,
|
| 132 |
+
# "activeRuns": activeRuns,
|
| 133 |
+
# }
|
| 134 |
+
# serialized_data_points_v2.append(new_item)
|
| 135 |
+
# serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
|
| 136 |
+
with open(
|
| 137 |
+
"/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
|
| 138 |
+
"serializaledata_V2.json",
|
| 139 |
+
"r",
|
| 140 |
+
) as f:
|
| 141 |
+
serialized_data = json.load(f)
|
| 142 |
return serialized_data
|
data/serializaledata.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
perso/change_data.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
with open(
|
| 5 |
+
"/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
|
| 6 |
+
"serializaledata.json",
|
| 7 |
+
"r",
|
| 8 |
+
) as f:
|
| 9 |
+
serialized_data = json.load(f)
|
| 10 |
+
|
| 11 |
+
serialized_data_v2 = serialized_data
|
| 12 |
+
serialized_data_v2["points"] = [[item for item in serialized_data["points"][-1] if random.random() > 0.8]]
|
| 13 |
+
|
| 14 |
+
with open(
|
| 15 |
+
"/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
|
| 16 |
+
"serializaledata_V2.json",
|
| 17 |
+
"w",
|
| 18 |
+
) as f:
|
| 19 |
+
f.write(json.dumps(serialized_data_v2))
|
perso/get_usernames.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
with open(
|
| 4 |
+
"/mnt/storage/Documents/hugging_face/colaborative_hub_training/demo_neurips/training-transformers-together-dashboard/data/"
|
| 5 |
+
"serializaledata_V2.json",
|
| 6 |
+
"r",
|
| 7 |
+
) as f:
|
| 8 |
+
serialized_data = json.load(f)
|
| 9 |
+
|
| 10 |
+
usernames = []
|
| 11 |
+
for item in serialized_data["points"][0]:
|
| 12 |
+
usernames.append(item["profileId"])
|
| 13 |
+
|
| 14 |
+
print(usernames)
|