Spaces:
Sleeping
Sleeping
features
Browse files
app.py
CHANGED
@@ -17,17 +17,17 @@ from PIL import Image
|
|
17 |
import numpy as np
|
18 |
import matplotlib
|
19 |
import wandb
|
20 |
-
|
21 |
|
22 |
# Load dataset once at the start to avoid redundant requests
|
23 |
# dataset = load_dataset("Chendi/NYC_TAXI_FARE_CLEANED")
|
24 |
|
25 |
wandb.login(key=os.getenv("WANDB_API_KEY"))
|
26 |
wandb.init(project="billion-row-analysis", name="benchmarking")
|
27 |
-
|
28 |
parquet_path = "jan_2024.parquet"
|
29 |
-
|
30 |
-
|
31 |
os.environ["MODIN_ENGINE"] = "dask"
|
32 |
|
33 |
# Initialize FastAPI app
|
|
|
17 |
import numpy as np
|
18 |
import matplotlib
|
19 |
import wandb
|
20 |
+
from datasets import load_dataset
|
21 |
|
22 |
# Load dataset once at the start to avoid redundant requests
|
23 |
# dataset = load_dataset("Chendi/NYC_TAXI_FARE_CLEANED")
|
24 |
|
25 |
wandb.login(key=os.getenv("WANDB_API_KEY"))
|
26 |
wandb.init(project="billion-row-analysis", name="benchmarking")
|
27 |
+
dataset = load_dataset("AnnsKhan/jan_2024_nyc", split="train")
|
28 |
parquet_path = "jan_2024.parquet"
|
29 |
+
if not os.path.exists(parquet_path):
|
30 |
+
dataset.to_pandas().to_parquet(parquet_path) # Save to disk
|
31 |
os.environ["MODIN_ENGINE"] = "dask"
|
32 |
|
33 |
# Initialize FastAPI app
|