bluuebunny commited on
Commit
e8bb333
·
verified ·
1 Parent(s): 727da25

Update update_embeddings.py

Browse files
Files changed (1) hide show
  1. update_embeddings.py +10 -1
update_embeddings.py CHANGED
@@ -174,8 +174,17 @@ previous_embeddings = pd.read_parquet(previous_embed)
174
  # Find papers that are not in the previous embeddings
175
  new_papers = arxiv_metadata_split[~arxiv_metadata_split['id'].isin(previous_embeddings['id'])]
176
 
 
 
 
 
 
 
 
 
 
177
  # Create a column for embeddings
178
- print(f"Creating new embeddings for: {len(new_papers)} entries")
179
  new_papers["vector"] = new_papers["abstract"].progress_apply(embed)
180
 
181
  # Rename columns
 
174
  # Find papers that are not in the previous embeddings
175
  new_papers = arxiv_metadata_split[~arxiv_metadata_split['id'].isin(previous_embeddings['id'])]
176
 
177
+ # Number of new papers
178
+ num_new_papers = len(new_papers)
179
+
180
+ # What if there are no new papers?
181
+ if num_new_papers == 0:
182
+ print(f"No new papers found for year: {year}")
183
+ print("Exiting")
184
+ sys.exit()
185
+
186
  # Create a column for embeddings
187
+ print(f"Creating new embeddings for: {num_new_papers} entries")
188
  new_papers["vector"] = new_papers["abstract"].progress_apply(embed)
189
 
190
  # Rename columns