Update praw_newgest_df2024.py
Browse files- praw_newgest_df2024.py +7 -14
praw_newgest_df2024.py
CHANGED
|
@@ -152,16 +152,6 @@ def get_comments_and_upload(df, dataset_repo_id):
|
|
| 152 |
# Fetch comments for the current submission
|
| 153 |
comments_df = get_comments(reddit, row['id'])
|
| 154 |
|
| 155 |
-
# # Prepare data for the current submission’s comments
|
| 156 |
-
# comments_data = [{
|
| 157 |
-
# 'comment_id': comment.id,
|
| 158 |
-
# 'comment_content': comment.body,
|
| 159 |
-
# 'comment_created': comment.created,
|
| 160 |
-
# 'submission_id': row['id']
|
| 161 |
-
# } for comment in comments]
|
| 162 |
-
|
| 163 |
-
# Create a DataFrame for the current submission's comments
|
| 164 |
-
# comments_df = pd.DataFrame(comments_data, columns=['comment_id', 'comment_content', 'comment_created', 'submission_id'])
|
| 165 |
if len(comments_df) == 0:
|
| 166 |
print(f"No comments found for {row['id']}")
|
| 167 |
# continue
|
|
@@ -218,13 +208,16 @@ def main():
|
|
| 218 |
print(f"Dataset {dataset_repo_id} will be created.")
|
| 219 |
# If the dataset doesn't exist, create it and then upload the CSV file
|
| 220 |
# api.create_repo(repo_id=dataset_repo_id, repo_type="dataset")
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
df_24.to_csv(
|
| 223 |
-
csv_file_path =
|
| 224 |
|
| 225 |
api.upload_file(
|
| 226 |
-
path_or_fileobj=
|
| 227 |
-
path_in_repo="
|
| 228 |
repo_id=dataset_repo_id,
|
| 229 |
repo_type="dataset"
|
| 230 |
)
|
|
|
|
| 152 |
# Fetch comments for the current submission
|
| 153 |
comments_df = get_comments(reddit, row['id'])
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
if len(comments_df) == 0:
|
| 156 |
print(f"No comments found for {row['id']}")
|
| 157 |
# continue
|
|
|
|
| 208 |
print(f"Dataset {dataset_repo_id} will be created.")
|
| 209 |
# If the dataset doesn't exist, create it and then upload the CSV file
|
| 210 |
# api.create_repo(repo_id=dataset_repo_id, repo_type="dataset")
|
| 211 |
+
|
| 212 |
+
today_date = datetime.now().strftime('%Y%m%d')
|
| 213 |
+
filename = f"df_24_{today_date}.csv"
|
| 214 |
|
| 215 |
+
df_24.to_csv(filename, index=False)
|
| 216 |
+
# csv_file_path = filename
|
| 217 |
|
| 218 |
api.upload_file(
|
| 219 |
+
path_or_fileobj= filename,
|
| 220 |
+
path_in_repo=f"submissions/{filename}",
|
| 221 |
repo_id=dataset_repo_id,
|
| 222 |
repo_type="dataset"
|
| 223 |
)
|