GenAIDevTOProd commited on
Commit
7b8fa4f
·
verified ·
1 Parent(s): cc97202

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -18,12 +18,12 @@ HF_TOKEN = os.environ.get("RedditSemanticSearch")
18
  # Function to stream JSONL Reddit files from HF Hub
19
  from datasets import load_dataset
20
 
21
- # Load full Reddit dataset (assumes it's pre-split by subreddit or has a field)
22
- dataset = load_dataset("HuggingFaceGECLM/REDDIT_comments", split="askscience", "gaming", "technology", "todayilearned", "programming")
23
-
24
  # Define target subreddits
25
  target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "programming"]
26
 
 
 
 
27
  # Filter only relevant subreddits
28
  dataset = dataset.filter(lambda x: x["subreddit"] in target_subreddits)
29
 
 
18
  # Function to stream JSONL Reddit files from HF Hub
19
  from datasets import load_dataset
20
 
 
 
 
21
  # Define target subreddits
22
  target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "programming"]
23
 
24
+ # Load full Reddit dataset (assumes it's pre-split by subreddit or has a field)
25
+ dataset_splits = [load_dataset("HuggingFaceGECLM/REDDIT_comments", split=sub, streaming=True) for sub in target_subreddits]
26
+
27
  # Filter only relevant subreddits
28
  dataset = dataset.filter(lambda x: x["subreddit"] in target_subreddits)
29