import os
import json
import pandas as pd
import requests
from pathlib import Path
from huggingface_hub import hf_hub_download
from datasets import load_dataset

# Constants
REPO_ID = "danielrosehill/ifvi_valuefactors_deriv"
DATASET_ID = "danielrosehill/ifvi_valuefactors_deriv"
HF_API_URL = "https://huggingface.co/api/datasets/danielrosehill/ifvi_valuefactors_deriv/tree/main/data"

def get_hf_directory_structure(path=""):
    """
    Get the directory structure from Hugging Face API
    """
    url = f"{HF_API_URL}/{path}" if path else HF_API_URL
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching directory structure: {response.status_code}")
        return []

def download_file_from_hf(file_path):
    """
    Download a file from Hugging Face
    """
    try:
        # Convert local path to HF path
        hf_path = file_path.split('/data/')[1] if '/data/' in file_path else file_path
        downloaded_file = hf_hub_download(repo_id=REPO_ID, filename=f"data/{hf_path}")
        return downloaded_file
    except Exception as e:
        print(f"Error downloading file: {e}")
        return None

def load_dataset_from_hf(path=None):
    """
    Load dataset from Hugging Face
    """
    try:
        # If path is specified, load specific file, otherwise load the whole dataset
        if path:
            dataset = load_dataset(DATASET_ID, data_files={"data": path}, split="data")
        else:
            dataset = load_dataset(DATASET_ID)
        return dataset
    except Exception as e:
        print(f"Error loading dataset from HF: {e}")
        return None

def load_json_data(file_path):
    """
    Load JSON data from a file
    """
    try:
        # Check if file exists locally
        if os.path.exists(file_path):
            with open(file_path, 'r') as f:
                data = json.load(f)
            return data
        else:
            # Try to load from HF datasets
            try:
                # Extract the path relative to the dataset
                relative_path = file_path.split('/data/')[1] if '/data/' in file_path else file_path
                dataset = load_dataset_from_hf(f"data/{relative_path}")
                if dataset:
                    # Convert dataset to list/dict format
                    return dataset.to_dict()
            except Exception as dataset_error:
                print(f"Error loading from dataset: {dataset_error}")
            
            # Fallback to direct download
            downloaded_file = download_file_from_hf(file_path)
            if downloaded_file:
                with open(downloaded_file, 'r') as f:
                    data = json.load(f)
                return data
            else:
                # Fallback to sample data
                sample_data_path = os.path.join(os.getcwd(), "data/sample_data.json")
                if os.path.exists(sample_data_path):
                    print(f"Using sample data for {file_path}")
                    with open(sample_data_path, 'r') as f:
                        return json.load(f)
                return None
    except Exception as e:
        print(f"Error loading JSON data: {e}")
        return None

def load_csv_data(file_path):
    """
    Load CSV data from a file
    """
    try:
        # Check if file exists locally
        if os.path.exists(file_path):
            return pd.read_csv(file_path)
        else:
            # Try to load from HF datasets
            try:
                # Extract the path relative to the dataset
                relative_path = file_path.split('/data/')[1] if '/data/' in file_path else file_path
                dataset = load_dataset_from_hf(f"data/{relative_path}")
                if dataset:
                    # Convert dataset to pandas DataFrame
                    return pd.DataFrame(dataset)
            except Exception as dataset_error:
                print(f"Error loading from dataset: {dataset_error}")
            
            # Fallback to direct download
            downloaded_file = download_file_from_hf(file_path)
            if downloaded_file:
                return pd.read_csv(downloaded_file)
            else:
                return None
    except Exception as e:
        print(f"Error loading CSV data: {e}")
        return None

def get_continents():
    """
    Get list of continents
    """
    continents_path = os.path.join(os.getcwd(), "data/by-region/continental")
    if os.path.exists(continents_path):
        return [d for d in os.listdir(continents_path) if os.path.isdir(os.path.join(continents_path, d))]
    else:
        # Try to get from HF API
        try:
            dir_structure = get_hf_directory_structure("by-region/continental")
            continents = [item['path'].split('/')[-1] for item in dir_structure 
                         if item['type'] == 'directory']
            if continents:
                return sorted(continents)
        except Exception as e:
            print(f"Error getting continents from HF API: {e}")
        
        # Fallback to hardcoded list
        return ["Africa", "Asia", "Europe", "North America", "Oceania", "South America"]

def get_countries(continent):
    """
    Get list of countries for a continent
    """
    countries_dir = f"data/by-region/continental/{continent}"
    countries_path = os.path.join(os.getcwd(), countries_dir)
    
    if os.path.exists(countries_path):
        countries = [os.path.splitext(f)[0] for f in os.listdir(countries_path) 
                    if os.path.isfile(os.path.join(countries_path, f)) and f.endswith('.json')]
        return sorted(countries)
    else:
        # Try to get from HF API
        try:
            dir_structure = get_hf_directory_structure(f"by-region/continental/{continent}")
            countries = [os.path.splitext(item['path'].split('/')[-1])[0] for item in dir_structure 
                        if item['type'] == 'file' and item['path'].endswith('.json')]
            return sorted(countries)
        except Exception as e:
            print(f"Error getting countries from HF API: {e}")
            return []

def get_impact_types():
    """
    Get list of impact types
    """
    impact_types_path = os.path.join(os.getcwd(), "data/by-impact-type")
    if os.path.exists(impact_types_path):
        impact_types = [d for d in os.listdir(impact_types_path) 
                       if os.path.isdir(os.path.join(impact_types_path, d))]
        # Add single JSON files
        impact_types.extend([os.path.splitext(f)[0] for f in os.listdir(impact_types_path)
                           if os.path.isfile(os.path.join(impact_types_path, f)) and f.endswith('.json')])
        return sorted(impact_types)
    else:
        # Try to get from HF API
        try:
            dir_structure = get_hf_directory_structure("by-impact-type")
            impact_types = []
            
            # Add directories
            impact_types.extend([item['path'].split('/')[-1] for item in dir_structure 
                               if item['type'] == 'directory'])
            
            # Add JSON files (without extension)
            impact_types.extend([os.path.splitext(item['path'].split('/')[-1])[0] for item in dir_structure 
                               if item['type'] == 'file' and item['path'].endswith('.json')])
            
            if impact_types:
                return sorted(impact_types)
        except Exception as e:
            print(f"Error getting impact types from HF API: {e}")
        
        # Fallback to hardcoded list
        return ["air-pollution", "GHG_Impacts", "waste", "water-consumption", 
                "economic", "ecosystem", "health", "social"]

def get_impact_type_data(impact_type):
    """
    Get data for an impact type
    """
    # Map friendly names to directory/file names
    impact_map = {
        "Air Pollution": "air-pollution",
        "GHG Impacts": "GHG_Impacts",
        "Waste": "waste",
        "Water Consumption": "water-consumption",
        "Economic": "economic",
        "Ecosystem": "ecosystem",
        "Health": "health",
        "Social": "social"
    }
    
    # If impact_type is already a directory/file name, use it directly
    impact_dir = impact_map.get(impact_type, impact_type)
    
    # Check if it's a file or directory
    base_path = os.path.join(os.getcwd(), "data/by-impact-type")
    impact_path = os.path.join(base_path, impact_dir)
    
    if os.path.isfile(f"{impact_path}.json"):
        # It's a single JSON file
        return load_json_data(f"{impact_path}.json")
    elif os.path.isdir(impact_path):
        # It's a directory, find all JSON files
        json_files = [f for f in os.listdir(impact_path) if f.endswith('.json')]
        if json_files:
            # Return the first JSON file for now
            return load_json_data(os.path.join(impact_path, json_files[0]))
    
    # If not found locally, try to get from HF
    try:
        # Try to load from HF datasets
        dataset_path = f"data/by-impact-type/{impact_dir}.json"
        dataset = load_dataset_from_hf(dataset_path)
        if dataset:
            return dataset.to_dict()
        
        # Check if it's a single file
        downloaded_file = download_file_from_hf(f"data/by-impact-type/{impact_dir}.json")
        if downloaded_file:
            return load_json_data(downloaded_file)
        
        # Check if it's a directory
        dir_structure = get_hf_directory_structure(f"by-impact-type/{impact_dir}")
        if dir_structure:
            json_files = [item['path'] for item in dir_structure if item['path'].endswith('.json')]
            if json_files:
                downloaded_file = download_file_from_hf(f"data/{json_files[0]}")
                if downloaded_file:
                    return load_json_data(downloaded_file)
    except Exception as e:
        print(f"Error getting impact type data from HF: {e}")
    
    return None

def get_country_data(continent, country):
    """
    Get data for a country
    """
    if not continent or not country:
        return None
    
    file_path = os.path.join(os.getcwd(), f"data/by-region/continental/{continent}/{country}.json")
    data = load_json_data(file_path)
    
    if not data:
        # Try to get from HF datasets
        try:
            dataset_path = f"data/by-region/continental/{continent}/{country}.json"
            dataset = load_dataset_from_hf(dataset_path)
            if dataset:
                return dataset.to_dict()
        except Exception as dataset_error:
            print(f"Error loading from dataset: {dataset_error}")
        
        # Fallback to direct download
        try:
            downloaded_file = download_file_from_hf(f"data/by-region/continental/{continent}/{country}.json")
            if downloaded_file:
                data = load_json_data(downloaded_file)
        except Exception as e:
            print(f"Error getting country data from HF: {e}")
    
    return data

def create_summary_stats(data):
    """
    Create summary statistics for data
    """
    if not data:
        return "No data available for summary statistics."
    
    # Convert data to DataFrame if it's a list
    if isinstance(data, list):
        df = pd.DataFrame(data)
    else:
        # If it's already a DataFrame or another format, try to convert
        df = pd.DataFrame(data)
    
    if 'ValueFactor' in df.columns:
        stats = {
            "Count": len(df),
            "Mean Value Factor": f"${df['ValueFactor'].mean():.2f}",
            "Median Value Factor": f"${df['ValueFactor'].median():.2f}",
            "Min Value Factor": f"${df['ValueFactor'].min():.2f}",
            "Max Value Factor": f"${df['ValueFactor'].max():.2f}",
            "Standard Deviation": f"${df['ValueFactor'].std():.2f}"
        }
        
        # Create categories summary if available
        if 'Category' in df.columns:
            categories = df['Category'].unique()
            stats["Number of Categories"] = len(categories)
            stats["Categories"] = ", ".join(categories[:5]) + ("..." if len(categories) > 5 else "")
        
        # Create impacts summary if available
        if 'Impact' in df.columns:
            impacts = df['Impact'].unique()
            stats["Number of Impact Types"] = len(impacts)
            stats["Impact Types"] = ", ".join(impacts[:5]) + ("..." if len(impacts) > 5 else "")
        
        return "\n".join([f"**{k}**: {v}" for k, v in stats.items()])
    else:
        return "Value Factor data not available for summary statistics."