Spaces:
Sleeping
Sleeping
from github import Github | |
from urllib.parse import urlparse | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
def extract_repo_name(url): | |
path = urlparse(url).path.strip("/") | |
return path.split("/")[-2], path.split("/")[-1] | |
def is_important_file(path): | |
ignored_dirs = ["node_modules", "test", "__tests__", ".git", "build", ".next", ".vscode"] | |
important_dirs = ["src", "components", "pages", "hooks", "controller", "service"] | |
important_exts = [".py", ".js", ".ts", ".jsx", ".tsx", ".java"] | |
if any(ignored in path for ignored in ignored_dirs): | |
return False | |
if any(dir_ in path for dir_ in important_dirs) and any(path.endswith(ext) for ext in important_exts): | |
return True | |
if path.count("/") <= 1 and any(path.endswith(ext) for ext in important_exts): | |
return True | |
return False | |
def get_filtered_file_contents(repo, path=""): | |
contents = repo.get_contents(path) | |
all_files = {} | |
for content in contents: | |
if content.type == "dir": | |
all_files.update(get_filtered_file_contents(repo, content.path)) | |
else: | |
if is_important_file(content.path): | |
try: | |
file_data = content.decoded_content.decode("utf-8") | |
all_files[content.path] = file_data | |
except Exception as e: | |
all_files[content.path] = f"Unable to read: {e}" | |
return all_files | |
def analyze_repo(repo_url): | |
g = Github(os.getenv("GITHUB_TOKEN").strip()) | |
owner, name = extract_repo_name(repo_url) | |
repo = g.get_repo(f"{owner}/{name}") | |
info = { | |
"name": repo.name, | |
"description": repo.description, | |
"topics": repo.get_topics(), | |
"files": get_filtered_file_contents(repo), | |
"url": repo.html_url | |
} | |
return info | |