| from github import Github | |
| from urllib.parse import urlparse | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def extract_repo_name(url): | |
| path = urlparse(url).path.strip("/") | |
| return path.split("/")[-2], path.split("/")[-1] | |
| def is_important_file(path): | |
| ignored_dirs = ["node_modules", "test", "__tests__", ".git", "build", ".next", ".vscode"] | |
| important_dirs = ["src", "components", "pages", "hooks", "controller", "service"] | |
| important_exts = [".py", ".js", ".ts", ".jsx", ".tsx", ".java"] | |
| if any(ignored in path for ignored in ignored_dirs): | |
| return False | |
| if any(dir_ in path for dir_ in important_dirs) and any(path.endswith(ext) for ext in important_exts): | |
| return True | |
| if path.count("/") <= 1 and any(path.endswith(ext) for ext in important_exts): | |
| return True | |
| return False | |
| def get_filtered_file_contents(repo, path=""): | |
| contents = repo.get_contents(path) | |
| all_files = {} | |
| for content in contents: | |
| if content.type == "dir": | |
| all_files.update(get_filtered_file_contents(repo, content.path)) | |
| else: | |
| if is_important_file(content.path): | |
| try: | |
| file_data = content.decoded_content.decode("utf-8") | |
| all_files[content.path] = file_data | |
| except Exception as e: | |
| all_files[content.path] = f"Unable to read: {e}" | |
| return all_files | |
| def analyze_repo(repo_url): | |
| g = Github(os.getenv("GITHUB_TOKEN")) | |
| owner, name = extract_repo_name(repo_url) | |
| repo = g.get_repo(f"{owner}/{name}") | |
| info = { | |
| "name": repo.name, | |
| "description": repo.description, | |
| "topics": repo.get_topics(), | |
| "files": get_filtered_file_contents(repo), | |
| "url": repo.html_url | |
| } | |
| return info | |