from github import Github
from urllib.parse import urlparse
import os
from dotenv import load_dotenv

load_dotenv()

def extract_repo_name(url):
    path = urlparse(url).path.strip("/")
    return path.split("/")[-2], path.split("/")[-1]

def is_important_file(path):
    ignored_dirs = ["node_modules", "test", "__tests__", ".git", "build", ".next", ".vscode"]
    important_dirs = ["src", "components", "pages", "hooks", "controller", "service", "frontend", "backend", "db", "database", "api", "ui", "models", "config", "scripts", "static", "templates","utils", "lib", "middleware", "tests", "styles", "public"]
    important_exts = [".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".html", ".php", "Dockerfile", ".env.example",".dart",".cpp",".c",".php", ".css", ".scss", ".json", ".yaml", ".yml", ".md",".sh", ".bat", ".ps1", ".cpp", ".c", ".h", ".hpp",".ipynb", ".onnx", ".pb", ".tflite", "Dockerfile", "Makefile",".gradle", ".jar", ".war"]

    if any(ignored in path for ignored in ignored_dirs):
        return False

    if any(dir_ in path for dir_ in important_dirs) and any(path.endswith(ext) for ext in important_exts):
        return True

    if path.count("/") <= 1 and any(path.endswith(ext) for ext in important_exts):
        return True

    return False

def get_filtered_file_contents(repo, path=""):
    contents = repo.get_contents(path)
    all_files = {}
    for content in contents:
        if content.type == "dir":
            all_files.update(get_filtered_file_contents(repo, content.path))
        else:
            if is_important_file(content.path):
                try:
                    file_data = content.decoded_content.decode("utf-8")
                    all_files[content.path] = file_data
                except Exception as e:
                    all_files[content.path] = f"Unable to read: {e}"
    return all_files

def analyze_repo(repo_url):
    g = Github(os.getenv("GITHUB_TOKEN").strip())
    owner, name = extract_repo_name(repo_url)
    repo = g.get_repo(f"{owner}/{name}")

    info = {
        "name": repo.name,
        "description": repo.description,
        "topics": repo.get_topics(),
        "files": get_filtered_file_contents(repo),
        "url": repo.html_url
    }
    return info