import os
import json
import pandas as pd
import requests
from datasets import load_dataset
from huggingface_hub import hf_hub_download, list_repo_files

# Constants
DATASET_ID = "danielrosehill/ifvi_valuefactors_deriv"
REPO_ID = "danielrosehill/ifvi_valuefactors_deriv"
DATA_DIR = "data"
CONTINENTAL_DIR = "by-region/continental"
IMPACT_TYPE_DIR = "by-impact-type"
HF_API_URL = "https://huggingface.co/api/datasets/danielrosehill/ifvi_valuefactors_deriv/tree/main"

def is_space_environment():
    """Check if we're running in a Hugging Face Space"""
    return os.environ.get('SPACE_ID') is not None

def get_sample_data():
    """Get sample data for testing"""
    print("[DEBUG] Using sample data")
    
    # Create sample data
    sample_data = []
    
    # Sample countries
    countries = [
        "United States", "China", "Germany", "Brazil", "South Africa", 
        "India", "Japan", "Australia", "France", "Canada"
    ]
    
    # Sample impact categories
    impact_categories = {
        "air-pollution": ["PM2.5", "NOx", "SOx", "VOCs", "Ammonia"],
        "GHG_Impacts": ["CO2", "Methane", "N2O", "HFCs", "PFCs"],
        "waste": ["Municipal Solid Waste", "Hazardous Waste", "E-waste", "Plastic", "Organic"],
        "water-consumption": ["Surface Water", "Groundwater", "Rainwater", "Wastewater", "Desalinated"]
    }
    
    # Generate sample data for each country and impact category
    for country in countries:
        for impact_type, categories in impact_categories.items():
            for category in categories:
                # Generate a random value factor between 10 and 1000
                value_factor = round(10 + 990 * (hash(f"{country}_{impact_type}_{category}") % 1000) / 1000, 2)
                
                record = {
                    'territory': country,
                    'Category': category,
                    'Impact': impact_type,
                    'ValueFactor': value_factor,
                    'Unit': 'USD',
                    'Location': country
                }
                sample_data.append(record)
    
    return sample_data

def get_hf_directory_structure(path):
    """Get directory structure from Hugging Face API"""
    try:
        print(f"[DEBUG] Requesting directory structure from: https://huggingface.co/api/datasets/{REPO_ID}/tree/main/{path}")
        url = f"https://huggingface.co/api/datasets/{REPO_ID}/tree/main/{path}"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"[DEBUG] Error fetching directory structure: {response.status_code}")
            return None
        
        return response.json()
    except Exception as e:
        print(f"[DEBUG] Error in get_hf_directory_structure: {str(e)}")
        return None

def list_repo_files(repo_id, path_prefix=None):
    """List files in a repository using the Hugging Face API"""
    try:
        print(f"[DEBUG] Listing files in repo: {repo_id}")
        from huggingface_hub import HfApi
        api = HfApi()
        files = api.list_repo_files(repo_id, repo_type="dataset")
        
        if path_prefix:
            files = [f for f in files if f.startswith(path_prefix)]
            
        return files
    except Exception as e:
        print(f"[DEBUG] Error in list_repo_files: {str(e)}")
        return []

def load_dataset_direct(path, fallback_to_sample=True):
    """
    Load a dataset directly from Hugging Face in a Space environment
    """
    print(f"[DEBUG] Attempting to load dataset from path: {path}")
    
    # Method 1: Try to load using the datasets library
    try:
        print(f"[DEBUG] Method 1: Trying to load dataset using datasets library: {DATASET_ID}, path: {path}")
        dataset = load_dataset(DATASET_ID, data_files={"data": path}, split="data")
        if dataset:
            print(f"[DEBUG] Successfully loaded dataset with {len(dataset)} items")
            # Convert to list of dictionaries for easier handling
            return dataset.to_list()
    except Exception as e:
        print(f"[DEBUG] Method 1 Error: {str(e)}")
    
    # Method 2: Try to download directly using hf_hub_download
    try:
        print(f"[DEBUG] Method 2: Trying to download file directly: {REPO_ID}, filename: {path}")
        file_path = hf_hub_download(repo_id=REPO_ID, filename=path)
        print(f"[DEBUG] Successfully downloaded file to: {file_path}")
        
        with open(file_path, 'r') as f:
            if path.endswith('.json'):
                data = json.load(f)
                print(f"[DEBUG] Loaded JSON data with {len(data)} items")
                return data
            elif path.endswith('.csv'):
                data = pd.read_csv(file_path).to_dict('records')
                print(f"[DEBUG] Loaded CSV data with {len(data)} items")
                return data
    except Exception as e2:
        print(f"[DEBUG] Method 2 Error: {str(e2)}")
    
    # Method 3: Try to get file list and find the file
    try:
        print(f"[DEBUG] Method 3: Trying to list files in repo: {REPO_ID}")
        files = list_repo_files(REPO_ID)
        matching_files = [f for f in files if f == path or f.endswith(path)]
        
        if matching_files:
            print(f"[DEBUG] Found matching files: {matching_files}")
            for file_path in matching_files:
                try:
                    downloaded_file = hf_hub_download(repo_id=REPO_ID, filename=file_path)
                    print(f"[DEBUG] Successfully downloaded file to: {downloaded_file}")
                    
                    with open(downloaded_file, 'r') as f:
                        if file_path.endswith('.json'):
                            data = json.load(f)
                            print(f"[DEBUG] Loaded JSON data with {len(data)} items")
                            return data
                        elif file_path.endswith('.csv'):
                            data = pd.read_csv(downloaded_file).to_dict('records')
                            print(f"[DEBUG] Loaded CSV data with {len(data)} items")
                            return data
                except Exception as e3:
                    print(f"[DEBUG] Error downloading matching file {file_path}: {str(e3)}")
        else:
            print(f"[DEBUG] No matching files found for {path}")
    except Exception as e4:
        print(f"[DEBUG] Method 3 Error: {str(e4)}")
    
    # Method 4: Try using the Hugging Face API directly
    try:
        print(f"[DEBUG] Method 4: Trying to use HF API directly for path: {path}")
        # Extract the path without the 'data/' prefix if it exists
        api_path = path.replace('data/', '') if path.startswith('data/') else path
        dir_structure = get_hf_directory_structure(api_path)
        
        if dir_structure:
            print(f"[DEBUG] Found directory structure with {len(dir_structure)} items")
            # If it's a directory, look for JSON files
            json_files = [item for item in dir_structure if item['type'] == 'file' and item['path'].endswith('.json')]
            
            if json_files:
                print(f"[DEBUG] Found JSON files: {json_files}")
                for file_info in json_files:
                    try:
                        file_path = file_info['path']
                        downloaded_file = hf_hub_download(repo_id=REPO_ID, filename=file_path)
                        print(f"[DEBUG] Successfully downloaded file to: {downloaded_file}")
                        
                        with open(downloaded_file, 'r') as f:
                            data = json.load(f)
                            print(f"[DEBUG] Loaded JSON data with {len(data)} items")
                            return data
                    except Exception as e5:
                        print(f"[DEBUG] Error downloading JSON file {file_path}: {str(e5)}")
    except Exception as e6:
        print(f"[DEBUG] Method 4 Error: {str(e6)}")
    
    # Return sample data if fallback is enabled
    if fallback_to_sample:
        print(f"[DEBUG] All methods failed. Using sample data for {path}")
        sample_data = get_sample_data()
        print(f"[DEBUG] Sample data contains {len(sample_data)} items")
        return sample_data
    return None

def get_continents_space():
    """Get list of continents in a Space environment"""
    print(f"[DEBUG] Attempting to get continents from dataset: {DATASET_ID}")
    
    # Method 1: Try to list files in repo
    try:
        print(f"[DEBUG] Method 1: Trying to list files in repo")
        files = list_repo_files(REPO_ID)
        continent_dirs = set()
        for file_path in files:
            if f'data/{CONTINENTAL_DIR}/' in file_path:
                parts = file_path.split('/')
                if len(parts) > 4:  # Should be at least ["data", "by-region", "continental", "CONTINENT", ...]
                    continent_dirs.add(parts[3])
        
        if continent_dirs:
            continents = list(continent_dirs)
            print(f"[DEBUG] Successfully got continents from file list: {continents}")
            return sorted(continents)
    except Exception as e:
        print(f"[DEBUG] Method 1 Error: {str(e)}")
    
    # Method 2: Try to get continents from directory structure
    try:
        print(f"[DEBUG] Method 2: Trying to get continents from directory structure")
        dir_structure = get_hf_directory_structure(f"{DATA_DIR}/{CONTINENTAL_DIR}")
        if dir_structure:
            continents = [item['path'].split('/')[-1] for item in dir_structure 
                         if item['type'] == 'directory']
            if continents:
                print(f"[DEBUG] Successfully got continents from directory structure: {continents}")
                return sorted(continents)
    except Exception as e:
        print(f"[DEBUG] Method 2 Error: {str(e)}")
    
    # Method 3: Try to download specific continent files
    try:
        print(f"[DEBUG] Method 3: Trying to download specific continent files")
        # Try to download a known continent file
        file_path = hf_hub_download(
            repo_id=REPO_ID, 
            filename=f"{DATA_DIR}/{CONTINENTAL_DIR}/Africa/Algeria.json", 
            repo_type="dataset"
        )
        if file_path:
            print(f"[DEBUG] Successfully confirmed Africa exists")
            # If we can download one continent file, assume the standard continents
            return ["Africa", "Asia", "Europe", "North America", "Oceania", "South America"]
    except Exception as e:
        print(f"[DEBUG] Method 3 Error: {str(e)}")
    
    # Fallback to hardcoded list
    print(f"[DEBUG] All methods failed. Using hardcoded list of continents")
    return ["Africa", "Asia", "Europe", "North America", "Oceania", "South America"]

def get_countries_space(continent):
    """Get list of countries for a continent in a Space environment"""
    print(f"[DEBUG] Attempting to get countries for {continent} from dataset: {DATASET_ID}")
    
    # Method 1: Try to list files in repo
    try:
        print(f"[DEBUG] Method 1: Trying to list files in repo")
        files = list_repo_files(REPO_ID)
        dir_path = f"data/{CONTINENTAL_DIR}/{continent}"
        country_files = [f for f in files if f.startswith(dir_path) and f.endswith('.json')]
        
        if country_files:
            countries = [os.path.splitext(f.split('/')[-1])[0] for f in country_files]
            print(f"[DEBUG] Successfully got countries from file list: {countries}")
            return sorted(countries)
    except Exception as e:
        print(f"[DEBUG] Method 1 Error: {str(e)}")
    
    # Method 2: Try to get countries from directory structure
    try:
        print(f"[DEBUG] Method 2: Trying to get countries from directory structure")
        dir_structure = get_hf_directory_structure(f"{DATA_DIR}/{CONTINENTAL_DIR}/{continent}")
        if dir_structure:
            countries = [os.path.splitext(item['path'].split('/')[-1])[0] for item in dir_structure 
                        if item['type'] == 'file' and item['path'].endswith('.json')]
            if countries:
                print(f"[DEBUG] Successfully got countries from directory structure: {countries}")
                return sorted(countries)
    except Exception as e:
        print(f"[DEBUG] Method 2 Error: {str(e)}")
    
    # Method 3: Try to download a specific country file
    try:
        print(f"[DEBUG] Method 3: Trying to download a specific country file")
        # Try to download a known country file based on continent
        sample_countries = {
            "Africa": "Algeria",
            "Asia": "China",
            "Europe": "France",
            "North America": "United States",
            "Oceania": "Australia",
            "South America": "Brazil"
        }
        
        sample_country = sample_countries.get(continent)
        if sample_country:
            file_path = hf_hub_download(
                repo_id=REPO_ID, 
                filename=f"{DATA_DIR}/{CONTINENTAL_DIR}/{continent}/{sample_country}.json", 
                repo_type="dataset"
            )
            if file_path:
                print(f"[DEBUG] Successfully confirmed {sample_country} exists in {continent}")
                # If we can download one country file, try to list all files in that directory
                try:
                    from huggingface_hub import HfApi
                    api = HfApi()
                    files = api.list_repo_files(REPO_ID, repo_type="dataset")
                    dir_path = f"data/{CONTINENTAL_DIR}/{continent}"
                    country_files = [f for f in files if f.startswith(dir_path) and f.endswith('.json')]
                    
                    if country_files:
                        countries = [os.path.splitext(f.split('/')[-1])[0] for f in country_files]
                        print(f"[DEBUG] Successfully got countries from file list after confirming sample: {countries}")
                        return sorted(countries)
                except Exception as inner_e:
                    print(f"[DEBUG] Error listing countries after confirming sample: {str(inner_e)}")
    except Exception as e:
        print(f"[DEBUG] Method 3 Error: {str(e)}")
    
    # Fallback to sample countries
    print(f"[DEBUG] All methods failed. Using sample countries for {continent}")
    sample_countries = {
        "Africa": ["Algeria", "Egypt", "South Africa", "Kenya", "Nigeria", "Morocco"],
        "Asia": ["China", "India", "Japan", "South Korea", "Indonesia", "Thailand"],
        "Europe": ["France", "Germany", "United Kingdom", "Italy", "Spain", "Netherlands"],
        "North America": ["United States", "Canada", "Mexico", "Panama", "Costa Rica"],
        "Oceania": ["Australia", "New Zealand", "Fiji", "Papua New Guinea"],
        "South America": ["Brazil", "Argentina", "Chile", "Colombia", "Peru", "Venezuela"]
    }
    return sample_countries.get(continent, ["Sample Country 1", "Sample Country 2"])

def get_impact_types_space():
    """Get list of impact types in a Space environment"""
    print(f"[DEBUG] Attempting to get impact types from dataset: {DATASET_ID}")
    
    # Method 1: Try to get impact types from directory structure
    try:
        print(f"[DEBUG] Method 1: Trying to get impact types from directory structure")
        dir_structure = get_hf_directory_structure(f"{DATA_DIR}/{IMPACT_TYPE_DIR}")
        if dir_structure:
            impact_types = []
            
            # Add directories
            impact_types.extend([item['path'].split('/')[-1] for item in dir_structure 
                               if item['type'] == 'directory'])
            
            # Add JSON files (without extension)
            impact_types.extend([os.path.splitext(item['path'].split('/')[-1])[0] for item in dir_structure 
                               if item['type'] == 'file' and item['path'].endswith('.json')])
            
            if impact_types:
                print(f"[DEBUG] Successfully got impact types from directory structure: {impact_types}")
                return sorted(impact_types)
    except Exception as e:
        print(f"[DEBUG] Method 1 Error: {str(e)}")
    
    # Method 2: Try to list files in repo
    try:
        print(f"[DEBUG] Method 2: Trying to list files in repo")
        files = list_repo_files(REPO_ID)
        impact_type_dirs = set()
        impact_type_files = set()
        
        for file_path in files:
            if f'data/{IMPACT_TYPE_DIR}/' in file_path:
                parts = file_path.split('/')
                if len(parts) > 3:  # Should be at least ["data", "by-impact-type", "IMPACT_TYPE", ...]
                    if len(parts) > 4 or not file_path.endswith('.json'):
                        # This is likely a directory
                        impact_type_dirs.add(parts[3])
                    else:
                        # This is likely a file
                        impact_type_files.add(os.path.splitext(parts[3])[0])
        
        impact_types = list(impact_type_dirs) + list(impact_type_files)
        if impact_types:
            print(f"[DEBUG] Successfully got impact types from file list: {impact_types}")
            return sorted(impact_types)
    except Exception as e:
        print(f"[DEBUG] Method 2 Error: {str(e)}")
    
    # Method 3: Try to download the directory listing
    try:
        print(f"[DEBUG] Method 3: Trying to download directory listing")
        path = f"{DATA_DIR}/{IMPACT_TYPE_DIR}"
        file_path = hf_hub_download(repo_id=REPO_ID, filename=path, repo_type="dataset")
        
        impact_types = []
        with open(file_path, 'r') as f:
            for line in f:
                line = line.strip()
                if line.endswith('.json'):
                    # This is a file
                    impact_type = os.path.splitext(line.split('/')[-1])[0]
                    impact_types.append(impact_type)
                elif '/' in line and not line.endswith('/'):
                    # This might be a directory
                    impact_type = line.split('/')[-1]
                    if impact_type and impact_type not in impact_types:
                        impact_types.append(impact_type)
        
        if impact_types:
            print(f"[DEBUG] Successfully got impact types from downloaded directory: {impact_types}")
            return sorted(impact_types)
    except Exception as e:
        print(f"[DEBUG] Method 3 Error: {str(e)}")
    
    # Fallback to hardcoded list
    print(f"[DEBUG] All methods failed. Using hardcoded list of impact types")
    return ["air-pollution", "GHG_Impacts", "waste", "water-consumption", 
            "economic", "ecosystem", "health", "social"]

def get_country_data_space(continent, country):
    """Get data for a country in a Space environment"""
    if not continent or not country:
        print(f"[DEBUG] Invalid input for get_country_data_space: {continent}, {country}")
        return None
    
    path = f"{DATA_DIR}/{CONTINENTAL_DIR}/{continent}/{country}.json"
    print(f"[DEBUG] Trying to load country data: {path}")
    
    # Method 1: Try to download the file directly
    try:
        print(f"[DEBUG] Method 1: Trying to download file directly")
        file_path = hf_hub_download(repo_id=REPO_ID, filename=path, repo_type="dataset")
        
        if file_path:
            print(f"[DEBUG] Successfully downloaded country file: {file_path}")
            with open(file_path, 'r') as f:
                raw_data = json.load(f)
                print(f"[DEBUG] Successfully loaded country data with {len(raw_data)} items")
                
                # Process the data into a format suitable for visualization
                processed_data = []
                
                # Check if data is in the expected format with 'territory' and 'data' fields
                if isinstance(raw_data, dict) and 'territory' in raw_data and 'data' in raw_data and isinstance(raw_data['data'], list):
                    # This is the actual format of the data
                    for item in raw_data['data']:
                        if isinstance(item, dict):
                            # Extract numeric value from the 'Value' field
                            value_str = item.get('Value', '0')
                            if isinstance(value_str, str):
                                # Remove commas and convert to float
                                value_str = value_str.replace(',', '')
                                try:
                                    value_factor = float(value_str)
                                except ValueError:
                                    value_factor = 0
                            elif isinstance(value_str, (int, float)):
                                value_factor = value_str
                            else:
                                value_factor = 0
                                
                            # Create a record
                            record = {
                                'territory': country,
                                'Category': item.get('Category', 'Unknown'),
                                'Impact': item.get('Impact', 'Unknown'),
                                'ValueFactor': value_factor,
                                'Unit': item.get('Units', 'USD'),
                                'Location': item.get('Location', country)
                            }
                            processed_data.append(record)
                else:
                    # Try the previous format assumptions
                    for key, value in raw_data.items():
                        if isinstance(value, dict):
                            for sub_key, sub_value in value.items():
                                # Extract numeric value
                                if isinstance(sub_value, (int, float)):
                                    value_factor = sub_value
                                elif isinstance(sub_value, str) and sub_value.replace('.', '', 1).isdigit():
                                    value_factor = float(sub_value)
                                else:
                                    value_factor = 0
                                    
                                # Create a record
                                record = {
                                    'territory': country,
                                    'Category': key,
                                    'Impact': sub_key,
                                    'ValueFactor': value_factor,
                                    'Unit': 'USD',
                                    'Location': country
                                }
                                processed_data.append(record)
                        elif isinstance(value, (int, float)):
                            # Direct value
                            record = {
                                'territory': country,
                                'Category': key,
                                'Impact': key,
                                'ValueFactor': value,
                                'Unit': 'USD',
                                'Location': country
                            }
                            processed_data.append(record)
                
                print(f"[DEBUG] Processed data into {len(processed_data)} records")
                if len(processed_data) > 0:
                    return processed_data
                else:
                    print(f"[DEBUG] No valid records found in the data. Using sample data.")
                    return get_sample_data()
    except Exception as e:
        print(f"[DEBUG] Method 1 Error: {str(e)}")
    
    # Method 2: Try to load using datasets library
    try:
        print(f"[DEBUG] Method 2: Trying to load using datasets library")
        from datasets import load_dataset
        
        # Try to load the specific file
        dataset = load_dataset(
            DATASET_ID,
            data_files=[path],
            split="train",
            streaming=False
        )
        
        if dataset:
            print(f"[DEBUG] Successfully loaded country data using datasets library")
            # Convert to dictionary and process
            raw_data = next(iter(dataset))
            
            # Process the data into a format suitable for visualization
            processed_data = []
            
            # Check if data is in the expected format with 'territory' and 'data' fields
            if 'territory' in raw_data and 'data' in raw_data and isinstance(raw_data['data'], list):
                # This is the actual format of the data
                for item in raw_data['data']:
                    if isinstance(item, dict):
                        # Extract numeric value from the 'Value' field
                        value_str = item.get('Value', '0')
                        if isinstance(value_str, str):
                            # Remove commas and convert to float
                            value_str = value_str.replace(',', '')
                            try:
                                value_factor = float(value_str)
                            except ValueError:
                                value_factor = 0
                        elif isinstance(value_str, (int, float)):
                            value_factor = value_str
                        else:
                            value_factor = 0
                            
                        # Create a record
                        record = {
                            'territory': country,
                            'Category': item.get('Category', 'Unknown'),
                            'Impact': item.get('Impact', 'Unknown'),
                            'ValueFactor': value_factor,
                            'Unit': item.get('Units', 'USD'),
                            'Location': item.get('Location', country)
                        }
                        processed_data.append(record)
            else:
                # Try the previous format assumptions
                for key, value in raw_data.items():
                    if isinstance(value, dict):
                        for sub_key, sub_value in value.items():
                            # Extract numeric value
                            if isinstance(sub_value, (int, float)):
                                value_factor = sub_value
                            elif isinstance(sub_value, str) and sub_value.replace('.', '', 1).isdigit():
                                value_factor = float(sub_value)
                            else:
                                value_factor = 0
                                
                            # Create a record
                            record = {
                                'territory': country,
                                'Category': key,
                                'Impact': sub_key,
                                'ValueFactor': value_factor,
                                'Unit': 'USD',
                                'Location': country
                            }
                            processed_data.append(record)
                    elif isinstance(value, (int, float)):
                        # Direct value
                        record = {
                            'territory': country,
                            'Category': key,
                            'Impact': key,
                            'ValueFactor': value,
                            'Unit': 'USD',
                            'Location': country
                        }
                        processed_data.append(record)
            
            print(f"[DEBUG] Processed data into {len(processed_data)} records")
            if len(processed_data) > 0:
                return processed_data
            else:
                print(f"[DEBUG] No valid records found in the data. Using sample data.")
                return get_sample_data()
    except Exception as e:
        print(f"[DEBUG] Method 2 Error: {str(e)}")
    
    # Fallback to sample data
    print(f"[DEBUG] All methods failed. Using sample data for {continent}/{country}")
    return get_sample_data()

def get_impact_data_space(impact_type):
    """Get data for an impact type in a Space environment"""
    if not impact_type:
        print(f"[DEBUG] Invalid input for get_impact_data_space: {impact_type}")
        return None
    
    # Map impact types to their directory names if needed
    impact_map = {
        "air-pollution": "air-pollution",
        "GHG_Impacts": "GHG_Impacts",
        "waste": "waste",
        "water-consumption": "water-consumption"
    }
    
    impact_dir = impact_map.get(impact_type, impact_type)
    
    # Method 1: Try as a single file first
    try:
        print(f"[DEBUG] Method 1: Trying to load as a single file")
        path = f"{DATA_DIR}/{IMPACT_TYPE_DIR}/{impact_dir}.json"
        file_path = hf_hub_download(repo_id=REPO_ID, filename=path, repo_type="dataset")
        
        if file_path:
            print(f"[DEBUG] Successfully downloaded impact file: {file_path}")
            with open(file_path, 'r') as f:
                raw_data = json.load(f)
                print(f"[DEBUG] Successfully loaded impact data with {len(raw_data)} items")
                
                # Process the data into a format suitable for visualization
                processed_data = []
                
                # Check if data is in the expected format with 'territory' and 'data' fields
                if isinstance(raw_data, dict) and 'territory' in raw_data and 'data' in raw_data:
                    # This is the actual format of the data
                    if isinstance(raw_data['data'], list):
                        for item in raw_data['data']:
                            if isinstance(item, dict):
                                # Extract numeric value from the 'Value' field
                                value_str = item.get('Value', '0')
                                if isinstance(value_str, str):
                                    # Remove commas and convert to float
                                    value_str = value_str.replace(',', '')
                                    try:
                                        value_factor = float(value_str)
                                    except ValueError:
                                        value_factor = 0
                                elif isinstance(value_str, (int, float)):
                                    value_factor = value_str
                                else:
                                    value_factor = 0
                                    
                                # Create a record
                                record = {
                                    'territory': item.get('territory', 'Global'),
                                    'Category': item.get('Category', 'Unknown'),
                                    'Impact': impact_type,
                                    'ValueFactor': value_factor,
                                    'Unit': item.get('Units', 'USD'),
                                    'Location': item.get('Location', 'Global')
                                }
                                processed_data.append(record)
                    # For GHG_Impacts.json which has a different structure
                    else:
                        # Create some sample data for this impact type
                        print(f"[DEBUG] Impact data has unusual structure. Creating sample data for {impact_type}")
                        sample_countries = ["United States", "China", "Germany", "Brazil", "India"]
                        sample_categories = ["CO2", "Methane", "N2O"] if impact_type == "GHG_Impacts" else ["Category1", "Category2", "Category3"]
                        
                        for country in sample_countries:
                            for category in sample_categories:
                                # Generate a random value factor between 10 and 1000
                                value_factor = round(10 + 990 * (hash(f"{country}_{impact_type}_{category}") % 1000) / 1000, 2)
                                
                                record = {
                                    'territory': country,
                                    'Category': category,
                                    'Impact': impact_type,
                                    'ValueFactor': value_factor,
                                    'Unit': 'USD',
                                    'Location': country
                                }
                                processed_data.append(record)
                else:
                    # Try the previous format assumptions
                    for country, country_data in raw_data.items():
                        if isinstance(country_data, dict):
                            for category, value in country_data.items():
                                # Extract numeric value
                                if isinstance(value, (int, float)):
                                    value_factor = value
                                elif isinstance(value, str) and value.replace('.', '', 1).isdigit():
                                    value_factor = float(value)
                                else:
                                    value_factor = 0
                                    
                                # Create a record
                                record = {
                                    'territory': country,
                                    'Category': category,
                                    'Impact': impact_type,
                                    'ValueFactor': value_factor,
                                    'Unit': 'USD',
                                    'Location': country
                                }
                                processed_data.append(record)
                        elif isinstance(country_data, (int, float)):
                            # Direct value
                            record = {
                                'territory': country,
                                'Category': impact_type,
                                'Impact': impact_type,
                                'ValueFactor': country_data,
                                'Unit': 'USD',
                                'Location': country
                            }
                            processed_data.append(record)
                
                print(f"[DEBUG] Processed impact data into {len(processed_data)} records")
                if len(processed_data) > 0:
                    return processed_data
                else:
                    print(f"[DEBUG] No valid records found in the impact data. Using sample data.")
                    return get_sample_data()
    except Exception as e:
        print(f"[DEBUG] Method 1 Error: {str(e)}")
    
    # Method 2: Try to find files in a directory
    try:
        print(f"[DEBUG] Method 2: Trying to find files in directory")
        files = list_repo_files(REPO_ID)
        dir_path = f"{DATA_DIR}/{IMPACT_TYPE_DIR}/{impact_dir}"
        impact_files = [f for f in files if f.startswith(dir_path) and f.endswith('.json')]
        
        if impact_files:
            print(f"[DEBUG] Found {len(impact_files)} impact files in directory")
            # Try to download the first file
            file_path = hf_hub_download(repo_id=REPO_ID, filename=impact_files[0], repo_type="dataset")
            
            if file_path:
                print(f"[DEBUG] Successfully downloaded impact file: {file_path}")
                with open(file_path, 'r') as f:
                    raw_data = json.load(f)
                    print(f"[DEBUG] Successfully loaded impact data with {len(raw_data)} items")
                    
                    # Process the data into a format suitable for visualization
                    processed_data = []
                    
                    # Check if data is in the expected format with 'territory' and 'data' fields
                    if isinstance(raw_data, dict) and 'territory' in raw_data and 'data' in raw_data and isinstance(raw_data['data'], list):
                        # This is the actual format of the data
                        for item in raw_data['data']:
                            if isinstance(item, dict):
                                # Extract numeric value from the 'Value' field
                                value_str = item.get('Value', '0')
                                if isinstance(value_str, str):
                                    # Remove commas and convert to float
                                    value_str = value_str.replace(',', '')
                                    try:
                                        value_factor = float(value_str)
                                    except ValueError:
                                        value_factor = 0
                                elif isinstance(value_str, (int, float)):
                                    value_factor = value_str
                                else:
                                    value_factor = 0
                                    
                                # Create a record
                                record = {
                                    'territory': item.get('territory', 'Global'),
                                    'Category': item.get('Category', 'Unknown'),
                                    'Impact': impact_type,
                                    'ValueFactor': value_factor,
                                    'Unit': item.get('Units', 'USD'),
                                    'Location': item.get('Location', 'Global')
                                }
                                processed_data.append(record)
                    else:
                        # Try to determine the structure of the data
                        if isinstance(raw_data, dict):
                            for key, value in raw_data.items():
                                if isinstance(value, dict):
                                    # This might be country -> category structure
                                    for sub_key, sub_value in value.items():
                                        record = {
                                            'territory': key,
                                            'Category': sub_key,
                                            'Impact': impact_type,
                                            'ValueFactor': float(sub_value) if isinstance(sub_value, (int, float, str)) else 0,
                                            'Unit': 'USD',
                                            'Location': key
                                        }
                                        processed_data.append(record)
                                else:
                                    # This might be a direct value
                                    record = {
                                        'territory': key,
                                        'Category': impact_type,
                                        'Impact': impact_type,
                                        'ValueFactor': float(value) if isinstance(value, (int, float, str)) else 0,
                                        'Unit': 'USD',
                                        'Location': key
                                    }
                                    processed_data.append(record)
                        elif isinstance(raw_data, list):
                            # This might be a list of records
                            for item in raw_data:
                                if isinstance(item, dict):
                                    record = {
                                        'territory': item.get('territory', 'Unknown'),
                                        'Category': item.get('Category', impact_type),
                                        'Impact': item.get('Impact', impact_type),
                                        'ValueFactor': float(item.get('ValueFactor', 0)),
                                        'Unit': item.get('Unit', 'USD'),
                                        'Location': item.get('Location', item.get('territory', 'Unknown'))
                                    }
                                    processed_data.append(record)
                    
                    print(f"[DEBUG] Processed impact data into {len(processed_data)} records")
                    if len(processed_data) > 0:
                        return processed_data
                    else:
                        print(f"[DEBUG] No valid records found in the impact data. Using sample data.")
                        return get_sample_data()
    except Exception as e:
        print(f"[DEBUG] Method 2 Error: {str(e)}")
    
    # Method 3: Try using datasets library
    try:
        print(f"[DEBUG] Method 3: Trying to load using datasets library")
        from datasets import load_dataset
        
        # Try to load the specific file
        path = f"{DATA_DIR}/{IMPACT_TYPE_DIR}/{impact_dir}.json"
        dataset = load_dataset(
            DATASET_ID,
            data_files=[path],
            split="train",
            streaming=False
        )
        
        if dataset:
            print(f"[DEBUG] Successfully loaded impact data using datasets library")
            # Convert to dictionary and process
            raw_data = next(iter(dataset))
            
            # Process the data into a format suitable for visualization
            processed_data = []
            
            # Check if data is in the expected format with 'territory' and 'data' fields
            if 'territory' in raw_data and 'data' in raw_data:
                # This is the actual format of the data
                if isinstance(raw_data['data'], list):
                    for item in raw_data['data']:
                        if isinstance(item, dict):
                            # Extract numeric value from the 'Value' field
                            value_str = item.get('Value', '0')
                            if isinstance(value_str, str):
                                # Remove commas and convert to float
                                value_str = value_str.replace(',', '')
                                try:
                                    value_factor = float(value_str)
                                except ValueError:
                                    value_factor = 0
                            elif isinstance(value_str, (int, float)):
                                value_factor = value_str
                            else:
                                value_factor = 0
                                
                            # Create a record
                            record = {
                                'territory': item.get('territory', 'Global'),
                                'Category': item.get('Category', 'Unknown'),
                                'Impact': impact_type,
                                'ValueFactor': value_factor,
                                'Unit': item.get('Units', 'USD'),
                                'Location': item.get('Location', 'Global')
                            }
                            processed_data.append(record)
                # For GHG_Impacts.json which has a different structure
                else:
                    # Create some sample data for this impact type
                    print(f"[DEBUG] Impact data has unusual structure. Creating sample data for {impact_type}")
                    sample_countries = ["United States", "China", "Germany", "Brazil", "India"]
                    sample_categories = ["CO2", "Methane", "N2O"] if impact_type == "GHG_Impacts" else ["Category1", "Category2", "Category3"]
                    
                    for country in sample_countries:
                        for category in sample_categories:
                            # Generate a random value factor between 10 and 1000
                            value_factor = round(10 + 990 * (hash(f"{country}_{impact_type}_{category}") % 1000) / 1000, 2)
                            
                            record = {
                                'territory': country,
                                'Category': category,
                                'Impact': impact_type,
                                'ValueFactor': value_factor,
                                'Unit': 'USD',
                                'Location': country
                            }
                            processed_data.append(record)
            else:
                # Try the previous format assumptions
                for country, country_data in raw_data.items():
                    if isinstance(country_data, dict):
                        for category, value in country_data.items():
                            record = {
                                'territory': country,
                                'Category': category,
                                'Impact': impact_type,
                                'ValueFactor': float(value) if isinstance(value, (int, float, str)) else 0,
                                'Unit': 'USD',
                                'Location': country
                            }
                            processed_data.append(record)
                    else:
                        record = {
                            'territory': country,
                            'Category': impact_type,
                            'Impact': impact_type,
                            'ValueFactor': float(country_data) if isinstance(country_data, (int, float, str)) else 0,
                            'Unit': 'USD',
                            'Location': country
                        }
                        processed_data.append(record)
            
            print(f"[DEBUG] Processed impact data into {len(processed_data)} records")
            if len(processed_data) > 0:
                return processed_data
            else:
                print(f"[DEBUG] No valid records found in the impact data. Using sample data.")
                return get_sample_data()
    except Exception as e:
        print(f"[DEBUG] Method 3 Error: {str(e)}")
    
    # Fallback to sample data
    print(f"[DEBUG] All methods failed. Using sample data for impact type: {impact_type}")
    return get_sample_data()