|
import json
|
|
import datetime
|
|
import requests
|
|
import re
|
|
import random
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from typing import Dict, List, Optional, Set, Tuple, Union
|
|
|
|
class ProxyFinder:
|
|
"""Finds and validates proxies from various online sources"""
|
|
|
|
def __init__(self, verbose: bool = False):
|
|
"""Initialize ProxyFinder with optional verbose logging"""
|
|
self.verbose = verbose
|
|
self.api: Dict[str, List[str]] = {
|
|
'socks4': [
|
|
"https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4&timeout=10000&country=all&simplified=true",
|
|
"https://www.proxy-list.download/api/v1/get?type=socks4",
|
|
"https://api.openproxylist.xyz/socks4.txt",
|
|
'https://openproxy.space/list/socks4',
|
|
'https://proxyspace.pro/socks4.txt',
|
|
"https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt",
|
|
'https://cdn.rei.my.id/proxy/SOCKS4',
|
|
"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt",
|
|
"https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt",
|
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt'
|
|
],
|
|
'socks5': [
|
|
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all&simplified=true",
|
|
"https://www.proxy-list.download/api/v1/get?type=socks5",
|
|
"https://api.openproxylist.xyz/socks5.txt",
|
|
'https://openproxy.space/list/socks5',
|
|
'https://spys.me/socks.txt',
|
|
'https://proxyspace.pro/socks5.txt',
|
|
"https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt",
|
|
'https://cdn.rei.my.id/proxy/SOCKS5',
|
|
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
|
|
"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt",
|
|
"https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt"
|
|
],
|
|
'http': [
|
|
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
|
|
"https://github.com/TheSpeedX/PROXY-List/raw/refs/heads/master/http.txt",
|
|
"https://api.proxyscrape.com/?request=displayproxies&proxytype=http&timeout=10000&country=all&simplified=true",
|
|
"https://www.proxy-list.download/api/v1/get?type=http",
|
|
"https://api.openproxylist.xyz/http.txt",
|
|
'https://openproxy.space/list/http',
|
|
'https://proxyspace.pro/http.txt',
|
|
"https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt",
|
|
'https://cdn.rei.my.id/proxy/HTTP',
|
|
'https://raw.githubusercontent.com/UptimerBot/proxy-list/master/proxies/http.txt',
|
|
'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt'
|
|
],
|
|
'https': [
|
|
'https://raw.githubusercontent.com/Firdoxx/proxy-list/main/https',
|
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
|
|
'https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt',
|
|
'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',
|
|
'https://raw.githubusercontent.com/zloi-user/hideip.me/main/https.txt',
|
|
'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/https.txt',
|
|
'https://raw.githubusercontent.com/Vann-Dev/proxy-list/main/proxies/https.txt'
|
|
],
|
|
'mixed': [
|
|
'https://github.com/jetkai/proxy-list/blob/main/online-proxies/txt/proxies.txt',
|
|
'https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt',
|
|
'https://raw.githubusercontent.com/a2u/free-proxy-list/master/free-proxy-list.txt',
|
|
'https://raw.githubusercontent.com/mishakorzik/Free-Proxy/main/proxy.txt',
|
|
'http://rootjazz.com/proxies/proxies.txt',
|
|
'https://multiproxy.org/txt_all/proxy.txt',
|
|
'https://proxy-spider.com/api/proxies.example.txt'
|
|
]
|
|
}
|
|
self.proxy_dict: Dict[str, List[str]] = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
|
|
self.max_workers = 20
|
|
|
|
def log(self, *args):
|
|
"""Log messages if verbose mode is enabled"""
|
|
if self.verbose:
|
|
print(*args)
|
|
|
|
def extract_proxy(self, line: str) -> Optional[str]:
|
|
"""
|
|
Extracts the first occurrence of an IP:port from a line.
|
|
"""
|
|
match = re.search(r'(\d{1,3}(?:\.\d{1,3}){3}:\d{2,5})', line)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
def fetch_from_url(self, url: str, proxy_type: str) -> List[str]:
|
|
"""
|
|
Fetches proxies from a given URL for the specified type.
|
|
Returns a list of valid proxies.
|
|
"""
|
|
proxy_list = []
|
|
try:
|
|
r = requests.get(url, timeout=5)
|
|
if r.status_code == requests.codes.ok:
|
|
for line in r.text.splitlines():
|
|
proxy = self.extract_proxy(line)
|
|
if proxy:
|
|
proxy_list.append(proxy)
|
|
self.log(f"Got {len(proxy_list)} {proxy_type} proxies from {url}")
|
|
return proxy_list
|
|
except Exception as e:
|
|
self.log(f"Failed to retrieve from {url}: {str(e)}")
|
|
return []
|
|
|
|
def fetch_proxies_parallel(self, proxy_type: str) -> List[str]:
|
|
"""
|
|
Fetch proxies in parallel for a specific type from all sources.
|
|
"""
|
|
if proxy_type not in self.api:
|
|
return []
|
|
|
|
all_proxies = []
|
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
futures = [executor.submit(self.fetch_from_url, url, proxy_type)
|
|
for url in self.api[proxy_type]]
|
|
for future in futures:
|
|
all_proxies.extend(future.result())
|
|
|
|
return list(set(all_proxies))
|
|
|
|
def get_geonode_proxies(self) -> Dict[str, List[str]]:
|
|
"""
|
|
Retrieves proxies from geonode API
|
|
"""
|
|
result = {'http': [], 'socks4': [], 'socks5': [], 'https': []}
|
|
try:
|
|
url = 'https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc'
|
|
response = requests.get(url, timeout=10)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
for p in data.get('data', []):
|
|
for protocol in p.get('protocols', []):
|
|
protocol = protocol.lower()
|
|
|
|
if protocol == 'https':
|
|
result['http'].append(f"{p['ip']}:{p['port']}")
|
|
elif protocol in result:
|
|
result[protocol].append(f"{p['ip']}:{p['port']}")
|
|
|
|
self.log(f"Got {sum(len(v) for v in result.values())} proxies from GeoNode")
|
|
except Exception as e:
|
|
self.log(f"Failed to fetch from GeoNode: {str(e)}")
|
|
|
|
return result
|
|
|
|
def get_checkerproxy_archive(self) -> Dict[str, List[str]]:
|
|
"""
|
|
Gets proxies from checkerproxy.net archive
|
|
"""
|
|
result = {'http': [], 'socks5': []}
|
|
|
|
for q in range(5):
|
|
day = datetime.date.today() + datetime.timedelta(days=-q)
|
|
formatted_date = f'{day.year}-{day.month}-{day.day}'
|
|
|
|
try:
|
|
r = requests.get(f'https://checkerproxy.net/api/archive/{formatted_date}', timeout=5)
|
|
if r.text != '[]':
|
|
json_result = json.loads(r.text)
|
|
for i in json_result:
|
|
|
|
if re.match(r"172\.|192\.168\.|10\.", i['ip']):
|
|
continue
|
|
|
|
addr = i.get('addr')
|
|
if not addr:
|
|
continue
|
|
|
|
if i['type'] in [1, 2]:
|
|
result['http'].append(addr)
|
|
if i['type'] == 4:
|
|
result['socks5'].append(addr)
|
|
|
|
self.log(f"Got {len(result['http'])} http and {len(result['socks5'])} socks5 proxies from CheckerProxy for {formatted_date}")
|
|
except Exception as e:
|
|
self.log(f"Failed to get archive for {formatted_date}: {str(e)}")
|
|
|
|
return result
|
|
|
|
def get_proxies(self, proxy_types: List[str] = None) -> Dict[str, List[str]]:
|
|
"""
|
|
Get proxies of the specified types. If None, get all types.
|
|
Returns a dictionary with proxy lists for each type.
|
|
"""
|
|
if proxy_types is None:
|
|
proxy_types = ['http', 'https', 'socks4', 'socks5']
|
|
|
|
|
|
self.proxy_dict = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
|
|
|
|
self.log("Starting proxy retrieval process")
|
|
|
|
|
|
for ptype in proxy_types:
|
|
if ptype in self.api:
|
|
self.log(f"Processing {ptype} proxy sources")
|
|
proxies = self.fetch_proxies_parallel(ptype)
|
|
self.proxy_dict[ptype].extend(proxies)
|
|
|
|
|
|
geonode_proxies = self.get_geonode_proxies()
|
|
for ptype, proxies in geonode_proxies.items():
|
|
if ptype in proxy_types:
|
|
self.proxy_dict[ptype].extend(proxies)
|
|
|
|
|
|
checker_proxies = self.get_checkerproxy_archive()
|
|
for ptype, proxies in checker_proxies.items():
|
|
if ptype in proxy_types:
|
|
self.proxy_dict[ptype].extend(proxies)
|
|
|
|
|
|
if proxy_types:
|
|
self.log("Processing mixed proxy sources")
|
|
for url in self.api.get('mixed', []):
|
|
try:
|
|
proxies = self.fetch_from_url(url, 'mixed')
|
|
|
|
if proxies:
|
|
chunks = len(proxy_types)
|
|
chunk_size = len(proxies) // chunks if chunks > 0 else 0
|
|
for i, ptype in enumerate(proxy_types):
|
|
start = i * chunk_size
|
|
end = start + chunk_size if i < chunks - 1 else len(proxies)
|
|
self.proxy_dict[ptype].extend(proxies[start:end])
|
|
except Exception as e:
|
|
self.log(f"Failed to process mixed proxy source: {str(e)}")
|
|
|
|
|
|
for key in self.proxy_dict:
|
|
original_count = len(self.proxy_dict[key])
|
|
self.proxy_dict[key] = list(set(self.proxy_dict[key]))
|
|
new_count = len(self.proxy_dict[key])
|
|
self.log(f"Removed {original_count - new_count} duplicate {key} proxies")
|
|
|
|
self.log("Proxy retrieval process completed")
|
|
return self.proxy_dict
|
|
|
|
def get_random_proxy(self, proxy_type: str = None) -> Optional[str]:
|
|
"""
|
|
Returns a random proxy of the specified type.
|
|
If type is None, returns a random proxy from any type.
|
|
"""
|
|
if proxy_type and proxy_type in self.proxy_dict and self.proxy_dict[proxy_type]:
|
|
return random.choice(self.proxy_dict[proxy_type])
|
|
elif not proxy_type:
|
|
|
|
all_proxies = []
|
|
for ptype in self.proxy_dict:
|
|
all_proxies.extend(self.proxy_dict[ptype])
|
|
if all_proxies:
|
|
return random.choice(all_proxies)
|
|
return None
|
|
|
|
def get_random_proxies(self, count: int = 10, proxy_type: str = None) -> List[str]:
|
|
"""
|
|
Returns a list of random proxies of the specified type.
|
|
If type is None, returns random proxies from any type.
|
|
"""
|
|
if proxy_type and proxy_type in self.proxy_dict:
|
|
proxies = self.proxy_dict[proxy_type]
|
|
else:
|
|
|
|
proxies = []
|
|
for ptype in self.proxy_dict:
|
|
proxies.extend(self.proxy_dict[ptype])
|
|
|
|
|
|
if not proxies:
|
|
return []
|
|
|
|
return random.sample(proxies, min(count, len(proxies)))
|
|
|
|
if __name__ == "__main__":
|
|
|
|
finder = ProxyFinder(verbose=True)
|
|
proxies = finder.get_proxies(['http', 'socks5'])
|
|
|
|
print("\nSummary:")
|
|
for ptype, proxy_list in proxies.items():
|
|
print(f"{ptype}: {len(proxy_list)} proxies")
|
|
|
|
print("\nRandom HTTP proxy:", finder.get_random_proxy('http'))
|
|
print("\nRandom SOCKS5 proxies:", finder.get_random_proxies(5, 'socks5'))
|
|
|