import json
import datetime
import requests
import re
import random
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Set, Tuple, Union

class ProxyFinder:
    """Finds and validates proxies from various online sources"""
    
    def __init__(self, verbose: bool = False):
        """Initialize ProxyFinder with optional verbose logging"""
        self.verbose = verbose
        self.api: Dict[str, List[str]] = {
            'socks4': [
                "https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4&timeout=10000&country=all&simplified=true",
                "https://www.proxy-list.download/api/v1/get?type=socks4",
                "https://api.openproxylist.xyz/socks4.txt",
                'https://openproxy.space/list/socks4',
                'https://proxyspace.pro/socks4.txt',
                "https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt",
                'https://cdn.rei.my.id/proxy/SOCKS4',
                "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt", 
                "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt",
                'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt'
            ],
            'socks5': [
                "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all&simplified=true",
                "https://www.proxy-list.download/api/v1/get?type=socks5",
                "https://api.openproxylist.xyz/socks5.txt",
                'https://openproxy.space/list/socks5',
                'https://spys.me/socks.txt',
                'https://proxyspace.pro/socks5.txt',
                "https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt",
                'https://cdn.rei.my.id/proxy/SOCKS5',
                'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
                "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt",
                "https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt"
            ],
            'http': [
                'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
                "https://github.com/TheSpeedX/PROXY-List/raw/refs/heads/master/http.txt",
                "https://api.proxyscrape.com/?request=displayproxies&proxytype=http&timeout=10000&country=all&simplified=true",
                "https://www.proxy-list.download/api/v1/get?type=http",
                "https://api.openproxylist.xyz/http.txt",
                'https://openproxy.space/list/http',
                'https://proxyspace.pro/http.txt',
                "https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt",
                'https://cdn.rei.my.id/proxy/HTTP',
                'https://raw.githubusercontent.com/UptimerBot/proxy-list/master/proxies/http.txt',
                'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt'
            ],
            'https': [
                'https://raw.githubusercontent.com/Firdoxx/proxy-list/main/https',
                'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
                'https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt',
                'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',
                'https://raw.githubusercontent.com/zloi-user/hideip.me/main/https.txt',
                'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/https.txt',
                'https://raw.githubusercontent.com/Vann-Dev/proxy-list/main/proxies/https.txt'
            ],
            'mixed': [
                'https://github.com/jetkai/proxy-list/blob/main/online-proxies/txt/proxies.txt',
                'https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt',
                'https://raw.githubusercontent.com/a2u/free-proxy-list/master/free-proxy-list.txt',
                'https://raw.githubusercontent.com/mishakorzik/Free-Proxy/main/proxy.txt',
                'http://rootjazz.com/proxies/proxies.txt',
                'https://multiproxy.org/txt_all/proxy.txt',
                'https://proxy-spider.com/api/proxies.example.txt'
            ]
        }
        self.proxy_dict: Dict[str, List[str]] = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
        self.max_workers = 20  # Maximum workers for parallel requests
    
    def log(self, *args):
        """Log messages if verbose mode is enabled"""
        if self.verbose:
            print(*args)
            
    def extract_proxy(self, line: str) -> Optional[str]:
        """
        Extracts the first occurrence of an IP:port from a line.
        """
        match = re.search(r'(\d{1,3}(?:\.\d{1,3}){3}:\d{2,5})', line)
        if match:
            return match.group(1)
        return None

    def fetch_from_url(self, url: str, proxy_type: str) -> List[str]:
        """
        Fetches proxies from a given URL for the specified type.
        Returns a list of valid proxies.
        """
        proxy_list = []
        try:
            r = requests.get(url, timeout=5)
            if r.status_code == requests.codes.ok:
                for line in r.text.splitlines():
                    proxy = self.extract_proxy(line)
                    if proxy:
                        proxy_list.append(proxy)
                self.log(f"Got {len(proxy_list)} {proxy_type} proxies from {url}")
            return proxy_list
        except Exception as e:
            self.log(f"Failed to retrieve from {url}: {str(e)}")
            return []

    def fetch_proxies_parallel(self, proxy_type: str) -> List[str]:
        """
        Fetch proxies in parallel for a specific type from all sources.
        """
        if proxy_type not in self.api:
            return []
            
        all_proxies = []
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(self.fetch_from_url, url, proxy_type) 
                      for url in self.api[proxy_type]]
            for future in futures:
                all_proxies.extend(future.result())
                
        return list(set(all_proxies))  # Remove duplicates

    def get_geonode_proxies(self) -> Dict[str, List[str]]:
        """
        Retrieves proxies from geonode API
        """
        result = {'http': [], 'socks4': [], 'socks5': [], 'https': []}
        try:
            url = 'https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc'
            response = requests.get(url, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                for p in data.get('data', []):
                    for protocol in p.get('protocols', []):
                        protocol = protocol.lower()
                        # Map 'https' to 'http' in our dictionary
                        if protocol == 'https':
                            result['http'].append(f"{p['ip']}:{p['port']}")
                        elif protocol in result:
                            result[protocol].append(f"{p['ip']}:{p['port']}")
                
                self.log(f"Got {sum(len(v) for v in result.values())} proxies from GeoNode")
        except Exception as e:
            self.log(f"Failed to fetch from GeoNode: {str(e)}")
        
        return result

    def get_checkerproxy_archive(self) -> Dict[str, List[str]]:
        """
        Gets proxies from checkerproxy.net archive
        """
        result = {'http': [], 'socks5': []}
        
        for q in range(5):  # Check only last 5 days to be faster
            day = datetime.date.today() + datetime.timedelta(days=-q)
            formatted_date = f'{day.year}-{day.month}-{day.day}'
            
            try:
                r = requests.get(f'https://checkerproxy.net/api/archive/{formatted_date}', timeout=5)
                if r.text != '[]':
                    json_result = json.loads(r.text)
                    for i in json_result:
                        # Skip internal IPs
                        if re.match(r"172\.|192\.168\.|10\.", i['ip']):
                            continue
                            
                        addr = i.get('addr')
                        if not addr:
                            continue
                            
                        if i['type'] in [1, 2]:
                            result['http'].append(addr)
                        if i['type'] == 4:
                            result['socks5'].append(addr)
                    
                    self.log(f"Got {len(result['http'])} http and {len(result['socks5'])} socks5 proxies from CheckerProxy for {formatted_date}")
            except Exception as e:
                self.log(f"Failed to get archive for {formatted_date}: {str(e)}")
                
        return result

    def get_proxies(self, proxy_types: List[str] = None) -> Dict[str, List[str]]:
        """
        Get proxies of the specified types. If None, get all types.
        Returns a dictionary with proxy lists for each type.
        """
        if proxy_types is None:
            proxy_types = ['http', 'https', 'socks4', 'socks5']
            
        # Reset proxy dictionary
        self.proxy_dict = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
        
        self.log("Starting proxy retrieval process")
        
        # Fetch from regular sources in parallel for each type
        for ptype in proxy_types:
            if ptype in self.api:
                self.log(f"Processing {ptype} proxy sources")
                proxies = self.fetch_proxies_parallel(ptype)
                self.proxy_dict[ptype].extend(proxies)
        
        # Add proxies from GeoNode
        geonode_proxies = self.get_geonode_proxies()
        for ptype, proxies in geonode_proxies.items():
            if ptype in proxy_types:
                self.proxy_dict[ptype].extend(proxies)
        
        # Add proxies from CheckerProxy
        checker_proxies = self.get_checkerproxy_archive()
        for ptype, proxies in checker_proxies.items():
            if ptype in proxy_types:
                self.proxy_dict[ptype].extend(proxies)
                
        # Process "mixed" sources if any proxy type is requested
        if proxy_types:
            self.log("Processing mixed proxy sources")
            for url in self.api.get('mixed', []):
                try:
                    proxies = self.fetch_from_url(url, 'mixed')
                    # Distribute mixed proxies equally among requested types
                    if proxies:
                        chunks = len(proxy_types)
                        chunk_size = len(proxies) // chunks if chunks > 0 else 0
                        for i, ptype in enumerate(proxy_types):
                            start = i * chunk_size
                            end = start + chunk_size if i < chunks - 1 else len(proxies)
                            self.proxy_dict[ptype].extend(proxies[start:end])
                except Exception as e:
                    self.log(f"Failed to process mixed proxy source: {str(e)}")
        
        # Remove duplicates for all types
        for key in self.proxy_dict:
            original_count = len(self.proxy_dict[key])
            self.proxy_dict[key] = list(set(self.proxy_dict[key]))
            new_count = len(self.proxy_dict[key])
            self.log(f"Removed {original_count - new_count} duplicate {key} proxies")
            
        self.log("Proxy retrieval process completed")
        return self.proxy_dict

    def get_random_proxy(self, proxy_type: str = None) -> Optional[str]:
        """
        Returns a random proxy of the specified type.
        If type is None, returns a random proxy from any type.
        """
        if proxy_type and proxy_type in self.proxy_dict and self.proxy_dict[proxy_type]:
            return random.choice(self.proxy_dict[proxy_type])
        elif not proxy_type:
            # Combine all proxy types and get a random one
            all_proxies = []
            for ptype in self.proxy_dict:
                all_proxies.extend(self.proxy_dict[ptype])
            if all_proxies:
                return random.choice(all_proxies)
        return None
        
    def get_random_proxies(self, count: int = 10, proxy_type: str = None) -> List[str]:
        """
        Returns a list of random proxies of the specified type.
        If type is None, returns random proxies from any type.
        """
        if proxy_type and proxy_type in self.proxy_dict:
            proxies = self.proxy_dict[proxy_type]
        else:
            # Combine all proxy types
            proxies = []
            for ptype in self.proxy_dict:
                proxies.extend(self.proxy_dict[ptype])
                
        # Get random proxies up to count or as many as available
        if not proxies:
            return []
            
        return random.sample(proxies, min(count, len(proxies)))

if __name__ == "__main__":
    # Example usage
    finder = ProxyFinder(verbose=True)
    proxies = finder.get_proxies(['http', 'socks5'])
    
    print("\nSummary:")
    for ptype, proxy_list in proxies.items():
        print(f"{ptype}: {len(proxy_list)} proxies")
    
    print("\nRandom HTTP proxy:", finder.get_random_proxy('http'))
    print("\nRandom SOCKS5 proxies:", finder.get_random_proxies(5, 'socks5'))