Spaces:

mangoesai
/

Refresh_Praw_pinecone_dataset

Sleeping

Vera-ZWY commited on Nov 19, 2024

Commit

449695a

verified ·

1 Parent(s): 513d672

Update praw_newgest_df2024.py

Files changed (1) hide show

praw_newgest_df2024.py CHANGED Viewed

@@ -152,16 +152,6 @@ def get_comments_and_upload(df, dataset_repo_id):
         # Fetch comments for the current submission
         comments_df = get_comments(reddit, row['id'])
-        # # Prepare data for the current submission’s comments
-        # comments_data = [{
-        #     'comment_id': comment.id,
-        #     'comment_content': comment.body,
-        #     'comment_created': comment.created,
-        #     'submission_id': row['id']
-        # } for comment in comments]
-        # Create a DataFrame for the current submission's comments
-        # comments_df = pd.DataFrame(comments_data, columns=['comment_id', 'comment_content', 'comment_created', 'submission_id'])
         if len(comments_df) == 0:
             print(f"No comments found for {row['id']}")
             # continue
@@ -218,13 +208,16 @@ def main():
                 print(f"Dataset {dataset_repo_id} will be created.")
                 # If the dataset doesn't exist, create it and then upload the CSV file
                 # api.create_repo(repo_id=dataset_repo_id, repo_type="dataset")
-            df_24.to_csv("df_24.csv", index=False)
-            csv_file_path = "df_24.csv"
             api.upload_file(
-                path_or_fileobj= csv_file_path,
-                path_in_repo="df_24_newest.csv",
                 repo_id=dataset_repo_id,
                 repo_type="dataset"
             )

         # Fetch comments for the current submission
         comments_df = get_comments(reddit, row['id'])
         if len(comments_df) == 0:
             print(f"No comments found for {row['id']}")
             # continue
                 print(f"Dataset {dataset_repo_id} will be created.")
                 # If the dataset doesn't exist, create it and then upload the CSV file
                 # api.create_repo(repo_id=dataset_repo_id, repo_type="dataset")
+            today_date = datetime.now().strftime('%Y%m%d')
+            filename = f"df_24_{today_date}.csv"
+            df_24.to_csv(filename, index=False)
+            # csv_file_path = filename
             api.upload_file(
+                path_or_fileobj= filename,
+                path_in_repo=f"submissions/{filename}",
                 repo_id=dataset_repo_id,
                 repo_type="dataset"
             )