AnnsKhan commited on
Commit
edcd8c8
·
1 Parent(s): b30b471
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -17,17 +17,17 @@ from PIL import Image
17
  import numpy as np
18
  import matplotlib
19
  import wandb
20
- #from datasets import load_dataset
21
 
22
  # Load dataset once at the start to avoid redundant requests
23
  # dataset = load_dataset("Chendi/NYC_TAXI_FARE_CLEANED")
24
 
25
  wandb.login(key=os.getenv("WANDB_API_KEY"))
26
  wandb.init(project="billion-row-analysis", name="benchmarking")
27
- #dataset = load_dataset("AnnsKhan/jan_2024_nyc", split="train")
28
  parquet_path = "jan_2024.parquet"
29
- # if not os.path.exists(parquet_path):
30
- # dataset.to_pandas().to_parquet(parquet_path) # Save to disk
31
  os.environ["MODIN_ENGINE"] = "dask"
32
 
33
  # Initialize FastAPI app
 
17
  import numpy as np
18
  import matplotlib
19
  import wandb
20
+ from datasets import load_dataset
21
 
22
  # Load dataset once at the start to avoid redundant requests
23
  # dataset = load_dataset("Chendi/NYC_TAXI_FARE_CLEANED")
24
 
25
  wandb.login(key=os.getenv("WANDB_API_KEY"))
26
  wandb.init(project="billion-row-analysis", name="benchmarking")
27
+ dataset = load_dataset("AnnsKhan/jan_2024_nyc", split="train")
28
  parquet_path = "jan_2024.parquet"
29
+ if not os.path.exists(parquet_path):
30
+ dataset.to_pandas().to_parquet(parquet_path) # Save to disk
31
  os.environ["MODIN_ENGINE"] = "dask"
32
 
33
  # Initialize FastAPI app