Henry65
/

RepoSim4Py

@@ -179,26 +179,26 @@ class RepoPipeline(Pipeline):
                 # Code embeddings
                 tqdm.write(f"[*] Generating code embeddings for {repo_name}")
                 code_embeddings = self.generate_embeddings(repo_info["codes"], max_length)
-                info["code_embeddings"] = code_embeddings.numpy()
-                info["mean_code_embedding"] = torch.mean(code_embeddings, dim=0).numpy()
                 # Doc embeddings
                 tqdm.write(f"[*] Generating doc embeddings for {repo_name}")
                 doc_embeddings = self.generate_embeddings(repo_info["docs"], max_length)
-                info["doc_embeddings"] = doc_embeddings.numpy()
-                info["mean_doc_embedding"] = torch.mean(doc_embeddings, dim=0).numpy()
                 # Requirement embeddings
                 tqdm.write(f"[*] Generating requirement embeddings for {repo_name}")
                 requirement_embeddings = self.generate_embeddings(repo_info["requirements"], max_length)
-                info["requirement_embeddings"] = requirement_embeddings.numpy()
-                info["mean_requirement_embedding"] = torch.mean(requirement_embeddings, dim=0).numpy()
                 # Requirement embeddings
                 tqdm.write(f"[*] Generating readme embeddings for {repo_name}")
                 readme_embeddings = self.generate_embeddings(repo_info["readmes"], max_length)
-                info["readme_embeddings"] = readme_embeddings.numpy()
-                info["mean_readme_embedding"] = torch.mean(readme_embeddings, dim=0).numpy()
                 progress_bar.update(1)
                 model_outputs.append(info)

                 # Code embeddings
                 tqdm.write(f"[*] Generating code embeddings for {repo_name}")
                 code_embeddings = self.generate_embeddings(repo_info["codes"], max_length)
+                info["code_embeddings"] = code_embeddings.cpu().numpy()
+                info["mean_code_embedding"] = torch.mean(code_embeddings, dim=0).cpu().numpy()
                 # Doc embeddings
                 tqdm.write(f"[*] Generating doc embeddings for {repo_name}")
                 doc_embeddings = self.generate_embeddings(repo_info["docs"], max_length)
+                info["doc_embeddings"] = doc_embeddings.cpu().numpy()
+                info["mean_doc_embedding"] = torch.mean(doc_embeddings, dim=0).cpu().numpy()
                 # Requirement embeddings
                 tqdm.write(f"[*] Generating requirement embeddings for {repo_name}")
                 requirement_embeddings = self.generate_embeddings(repo_info["requirements"], max_length)
+                info["requirement_embeddings"] = requirement_embeddings.cpu().numpy()
+                info["mean_requirement_embedding"] = torch.mean(requirement_embeddings, dim=0).cpu().numpy()
                 # Requirement embeddings
                 tqdm.write(f"[*] Generating readme embeddings for {repo_name}")
                 readme_embeddings = self.generate_embeddings(repo_info["readmes"], max_length)
+                info["readme_embeddings"] = readme_embeddings.cpu().numpy()
+                info["mean_readme_embedding"] = torch.mean(readme_embeddings, dim=0).cpu().numpy()
                 progress_bar.update(1)
                 model_outputs.append(info)