import json import datetime import requests import re import random import time from concurrent.futures import ThreadPoolExecutor from typing import Dict, List, Optional, Set, Tuple, Union class ProxyFinder: """Finds and validates proxies from various online sources""" def __init__(self, verbose: bool = False): """Initialize ProxyFinder with optional verbose logging""" self.verbose = verbose self.api: Dict[str, List[str]] = { 'socks4': [ "https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4&timeout=10000&country=all&simplified=true", "https://www.proxy-list.download/api/v1/get?type=socks4", "https://api.openproxylist.xyz/socks4.txt", 'https://openproxy.space/list/socks4', 'https://proxyspace.pro/socks4.txt', "https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt", 'https://cdn.rei.my.id/proxy/SOCKS4', "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt", 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt' ], 'socks5': [ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all&simplified=true", "https://www.proxy-list.download/api/v1/get?type=socks5", "https://api.openproxylist.xyz/socks5.txt", 'https://openproxy.space/list/socks5', 'https://spys.me/socks.txt', 'https://proxyspace.pro/socks5.txt', "https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt", 'https://cdn.rei.my.id/proxy/SOCKS5', 'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt', "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt", "https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt" ], 'http': [ 'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt', "https://github.com/TheSpeedX/PROXY-List/raw/refs/heads/master/http.txt", "https://api.proxyscrape.com/?request=displayproxies&proxytype=http&timeout=10000&country=all&simplified=true", "https://www.proxy-list.download/api/v1/get?type=http", "https://api.openproxylist.xyz/http.txt", 'https://openproxy.space/list/http', 'https://proxyspace.pro/http.txt', "https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt", 'https://cdn.rei.my.id/proxy/HTTP', 'https://raw.githubusercontent.com/UptimerBot/proxy-list/master/proxies/http.txt', 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt' ], 'https': [ 'https://raw.githubusercontent.com/Firdoxx/proxy-list/main/https', 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt', 'https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt', 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt', 'https://raw.githubusercontent.com/zloi-user/hideip.me/main/https.txt', 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/https.txt', 'https://raw.githubusercontent.com/Vann-Dev/proxy-list/main/proxies/https.txt' ], 'mixed': [ 'https://github.com/jetkai/proxy-list/blob/main/online-proxies/txt/proxies.txt', 'https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt', 'https://raw.githubusercontent.com/a2u/free-proxy-list/master/free-proxy-list.txt', 'https://raw.githubusercontent.com/mishakorzik/Free-Proxy/main/proxy.txt', 'http://rootjazz.com/proxies/proxies.txt', 'https://multiproxy.org/txt_all/proxy.txt', 'https://proxy-spider.com/api/proxies.example.txt' ] } self.proxy_dict: Dict[str, List[str]] = {'socks4': [], 'socks5': [], 'http': [], 'https': []} self.max_workers = 20 # Maximum workers for parallel requests def log(self, *args): """Log messages if verbose mode is enabled""" if self.verbose: print(*args) def extract_proxy(self, line: str) -> Optional[str]: """ Extracts the first occurrence of an IP:port from a line. """ match = re.search(r'(\d{1,3}(?:\.\d{1,3}){3}:\d{2,5})', line) if match: return match.group(1) return None def fetch_from_url(self, url: str, proxy_type: str) -> List[str]: """ Fetches proxies from a given URL for the specified type. Returns a list of valid proxies. """ proxy_list = [] try: r = requests.get(url, timeout=5) if r.status_code == requests.codes.ok: for line in r.text.splitlines(): proxy = self.extract_proxy(line) if proxy: proxy_list.append(proxy) self.log(f"Got {len(proxy_list)} {proxy_type} proxies from {url}") return proxy_list except Exception as e: self.log(f"Failed to retrieve from {url}: {str(e)}") return [] def fetch_proxies_parallel(self, proxy_type: str) -> List[str]: """ Fetch proxies in parallel for a specific type from all sources. """ if proxy_type not in self.api: return [] all_proxies = [] with ThreadPoolExecutor(max_workers=self.max_workers) as executor: futures = [executor.submit(self.fetch_from_url, url, proxy_type) for url in self.api[proxy_type]] for future in futures: all_proxies.extend(future.result()) return list(set(all_proxies)) # Remove duplicates def get_geonode_proxies(self) -> Dict[str, List[str]]: """ Retrieves proxies from geonode API """ result = {'http': [], 'socks4': [], 'socks5': [], 'https': []} try: url = 'https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc' response = requests.get(url, timeout=10) if response.status_code == 200: data = response.json() for p in data.get('data', []): for protocol in p.get('protocols', []): protocol = protocol.lower() # Map 'https' to 'http' in our dictionary if protocol == 'https': result['http'].append(f"{p['ip']}:{p['port']}") elif protocol in result: result[protocol].append(f"{p['ip']}:{p['port']}") self.log(f"Got {sum(len(v) for v in result.values())} proxies from GeoNode") except Exception as e: self.log(f"Failed to fetch from GeoNode: {str(e)}") return result def get_checkerproxy_archive(self) -> Dict[str, List[str]]: """ Gets proxies from checkerproxy.net archive """ result = {'http': [], 'socks5': []} for q in range(5): # Check only last 5 days to be faster day = datetime.date.today() + datetime.timedelta(days=-q) formatted_date = f'{day.year}-{day.month}-{day.day}' try: r = requests.get(f'https://checkerproxy.net/api/archive/{formatted_date}', timeout=5) if r.text != '[]': json_result = json.loads(r.text) for i in json_result: # Skip internal IPs if re.match(r"172\.|192\.168\.|10\.", i['ip']): continue addr = i.get('addr') if not addr: continue if i['type'] in [1, 2]: result['http'].append(addr) if i['type'] == 4: result['socks5'].append(addr) self.log(f"Got {len(result['http'])} http and {len(result['socks5'])} socks5 proxies from CheckerProxy for {formatted_date}") except Exception as e: self.log(f"Failed to get archive for {formatted_date}: {str(e)}") return result def get_proxies(self, proxy_types: List[str] = None) -> Dict[str, List[str]]: """ Get proxies of the specified types. If None, get all types. Returns a dictionary with proxy lists for each type. """ if proxy_types is None: proxy_types = ['http', 'https', 'socks4', 'socks5'] # Reset proxy dictionary self.proxy_dict = {'socks4': [], 'socks5': [], 'http': [], 'https': []} self.log("Starting proxy retrieval process") # Fetch from regular sources in parallel for each type for ptype in proxy_types: if ptype in self.api: self.log(f"Processing {ptype} proxy sources") proxies = self.fetch_proxies_parallel(ptype) self.proxy_dict[ptype].extend(proxies) # Add proxies from GeoNode geonode_proxies = self.get_geonode_proxies() for ptype, proxies in geonode_proxies.items(): if ptype in proxy_types: self.proxy_dict[ptype].extend(proxies) # Add proxies from CheckerProxy checker_proxies = self.get_checkerproxy_archive() for ptype, proxies in checker_proxies.items(): if ptype in proxy_types: self.proxy_dict[ptype].extend(proxies) # Process "mixed" sources if any proxy type is requested if proxy_types: self.log("Processing mixed proxy sources") for url in self.api.get('mixed', []): try: proxies = self.fetch_from_url(url, 'mixed') # Distribute mixed proxies equally among requested types if proxies: chunks = len(proxy_types) chunk_size = len(proxies) // chunks if chunks > 0 else 0 for i, ptype in enumerate(proxy_types): start = i * chunk_size end = start + chunk_size if i < chunks - 1 else len(proxies) self.proxy_dict[ptype].extend(proxies[start:end]) except Exception as e: self.log(f"Failed to process mixed proxy source: {str(e)}") # Remove duplicates for all types for key in self.proxy_dict: original_count = len(self.proxy_dict[key]) self.proxy_dict[key] = list(set(self.proxy_dict[key])) new_count = len(self.proxy_dict[key]) self.log(f"Removed {original_count - new_count} duplicate {key} proxies") self.log("Proxy retrieval process completed") return self.proxy_dict def get_random_proxy(self, proxy_type: str = None) -> Optional[str]: """ Returns a random proxy of the specified type. If type is None, returns a random proxy from any type. """ if proxy_type and proxy_type in self.proxy_dict and self.proxy_dict[proxy_type]: return random.choice(self.proxy_dict[proxy_type]) elif not proxy_type: # Combine all proxy types and get a random one all_proxies = [] for ptype in self.proxy_dict: all_proxies.extend(self.proxy_dict[ptype]) if all_proxies: return random.choice(all_proxies) return None def get_random_proxies(self, count: int = 10, proxy_type: str = None) -> List[str]: """ Returns a list of random proxies of the specified type. If type is None, returns random proxies from any type. """ if proxy_type and proxy_type in self.proxy_dict: proxies = self.proxy_dict[proxy_type] else: # Combine all proxy types proxies = [] for ptype in self.proxy_dict: proxies.extend(self.proxy_dict[ptype]) # Get random proxies up to count or as many as available if not proxies: return [] return random.sample(proxies, min(count, len(proxies))) if __name__ == "__main__": # Example usage finder = ProxyFinder(verbose=True) proxies = finder.get_proxies(['http', 'socks5']) print("\nSummary:") for ptype, proxy_list in proxies.items(): print(f"{ptype}: {len(proxy_list)} proxies") print("\nRandom HTTP proxy:", finder.get_random_proxy('http')) print("\nRandom SOCKS5 proxies:", finder.get_random_proxies(5, 'socks5'))