Spaces:

PyScoutAI
/

PyscoutAI

Building

App Files Files Community

PyscoutAI / proxy_finder.py

PyScoutAI

Upload 15 files

ead2510 verified 16 days ago

raw

history blame contribute delete

13.9 kB

	import json
	import datetime
	import requests
	import re
	import random
	import time
	from concurrent.futures import ThreadPoolExecutor
	from typing import Dict, List, Optional, Set, Tuple, Union

	class ProxyFinder:
	"""Finds and validates proxies from various online sources"""

	def __init__(self, verbose: bool = False):
	"""Initialize ProxyFinder with optional verbose logging"""
	self.verbose = verbose
	self.api: Dict[str, List[str]] = {
	'socks4': [
	"https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4&timeout=10000&country=all&simplified=true",
	"https://www.proxy-list.download/api/v1/get?type=socks4",
	"https://api.openproxylist.xyz/socks4.txt",
	'https://openproxy.space/list/socks4',
	'https://proxyspace.pro/socks4.txt',
	"https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt",
	'https://cdn.rei.my.id/proxy/SOCKS4',
	"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt",
	"https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt",
	'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt'
	],
	'socks5': [
	"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all&simplified=true",
	"https://www.proxy-list.download/api/v1/get?type=socks5",
	"https://api.openproxylist.xyz/socks5.txt",
	'https://openproxy.space/list/socks5',
	'https://spys.me/socks.txt',
	'https://proxyspace.pro/socks5.txt',
	"https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt",
	'https://cdn.rei.my.id/proxy/SOCKS5',
	'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
	"https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt",
	"https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt"
	],
	'http': [
	'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
	"https://github.com/TheSpeedX/PROXY-List/raw/refs/heads/master/http.txt",
	"https://api.proxyscrape.com/?request=displayproxies&proxytype=http&timeout=10000&country=all&simplified=true",
	"https://www.proxy-list.download/api/v1/get?type=http",
	"https://api.openproxylist.xyz/http.txt",
	'https://openproxy.space/list/http',
	'https://proxyspace.pro/http.txt',
	"https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt",
	'https://cdn.rei.my.id/proxy/HTTP',
	'https://raw.githubusercontent.com/UptimerBot/proxy-list/master/proxies/http.txt',
	'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt'
	],
	'https': [
	'https://raw.githubusercontent.com/Firdoxx/proxy-list/main/https',
	'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
	'https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt',
	'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',
	'https://raw.githubusercontent.com/zloi-user/hideip.me/main/https.txt',
	'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/https.txt',
	'https://raw.githubusercontent.com/Vann-Dev/proxy-list/main/proxies/https.txt'
	],
	'mixed': [
	'https://github.com/jetkai/proxy-list/blob/main/online-proxies/txt/proxies.txt',
	'https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt',
	'https://raw.githubusercontent.com/a2u/free-proxy-list/master/free-proxy-list.txt',
	'https://raw.githubusercontent.com/mishakorzik/Free-Proxy/main/proxy.txt',
	'http://rootjazz.com/proxies/proxies.txt',
	'https://multiproxy.org/txt_all/proxy.txt',
	'https://proxy-spider.com/api/proxies.example.txt'
	]
	}
	self.proxy_dict: Dict[str, List[str]] = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
	self.max_workers = 20 # Maximum workers for parallel requests

	def log(self, *args):
	"""Log messages if verbose mode is enabled"""
	if self.verbose:
	print(*args)

	def extract_proxy(self, line: str) -> Optional[str]:
	"""
	Extracts the first occurrence of an IP:port from a line.
	"""
	match = re.search(r'(\d{1,3}(?:\.\d{1,3}){3}:\d{2,5})', line)
	if match:
	return match.group(1)
	return None

	def fetch_from_url(self, url: str, proxy_type: str) -> List[str]:
	"""
	Fetches proxies from a given URL for the specified type.
	Returns a list of valid proxies.
	"""
	proxy_list = []
	try:
	r = requests.get(url, timeout=5)
	if r.status_code == requests.codes.ok:
	for line in r.text.splitlines():
	proxy = self.extract_proxy(line)
	if proxy:
	proxy_list.append(proxy)
	self.log(f"Got {len(proxy_list)} {proxy_type} proxies from {url}")
	return proxy_list
	except Exception as e:
	self.log(f"Failed to retrieve from {url}: {str(e)}")
	return []

	def fetch_proxies_parallel(self, proxy_type: str) -> List[str]:
	"""
	Fetch proxies in parallel for a specific type from all sources.
	"""
	if proxy_type not in self.api:
	return []

	all_proxies = []
	with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
	futures = [executor.submit(self.fetch_from_url, url, proxy_type)
	for url in self.api[proxy_type]]
	for future in futures:
	all_proxies.extend(future.result())

	return list(set(all_proxies)) # Remove duplicates

	def get_geonode_proxies(self) -> Dict[str, List[str]]:
	"""
	Retrieves proxies from geonode API
	"""
	result = {'http': [], 'socks4': [], 'socks5': [], 'https': []}
	try:
	url = 'https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc'
	response = requests.get(url, timeout=10)

	if response.status_code == 200:
	data = response.json()
	for p in data.get('data', []):
	for protocol in p.get('protocols', []):
	protocol = protocol.lower()
	# Map 'https' to 'http' in our dictionary
	if protocol == 'https':
	result['http'].append(f"{p['ip']}:{p['port']}")
	elif protocol in result:
	result[protocol].append(f"{p['ip']}:{p['port']}")

	self.log(f"Got {sum(len(v) for v in result.values())} proxies from GeoNode")
	except Exception as e:
	self.log(f"Failed to fetch from GeoNode: {str(e)}")

	return result

	def get_checkerproxy_archive(self) -> Dict[str, List[str]]:
	"""
	Gets proxies from checkerproxy.net archive
	"""
	result = {'http': [], 'socks5': []}

	for q in range(5): # Check only last 5 days to be faster
	day = datetime.date.today() + datetime.timedelta(days=-q)
	formatted_date = f'{day.year}-{day.month}-{day.day}'

	try:
	r = requests.get(f'https://checkerproxy.net/api/archive/{formatted_date}', timeout=5)
	if r.text != '[]':
	json_result = json.loads(r.text)
	for i in json_result:
	# Skip internal IPs
	if re.match(r"172\.\|192\.168\.\|10\.", i['ip']):
	continue

	addr = i.get('addr')
	if not addr:
	continue

	if i['type'] in [1, 2]:
	result['http'].append(addr)
	if i['type'] == 4:
	result['socks5'].append(addr)

	self.log(f"Got {len(result['http'])} http and {len(result['socks5'])} socks5 proxies from CheckerProxy for {formatted_date}")
	except Exception as e:
	self.log(f"Failed to get archive for {formatted_date}: {str(e)}")

	return result

	def get_proxies(self, proxy_types: List[str] = None) -> Dict[str, List[str]]:
	"""
	Get proxies of the specified types. If None, get all types.
	Returns a dictionary with proxy lists for each type.
	"""
	if proxy_types is None:
	proxy_types = ['http', 'https', 'socks4', 'socks5']

	# Reset proxy dictionary
	self.proxy_dict = {'socks4': [], 'socks5': [], 'http': [], 'https': []}

	self.log("Starting proxy retrieval process")

	# Fetch from regular sources in parallel for each type
	for ptype in proxy_types:
	if ptype in self.api:
	self.log(f"Processing {ptype} proxy sources")
	proxies = self.fetch_proxies_parallel(ptype)
	self.proxy_dict[ptype].extend(proxies)

	# Add proxies from GeoNode
	geonode_proxies = self.get_geonode_proxies()
	for ptype, proxies in geonode_proxies.items():
	if ptype in proxy_types:
	self.proxy_dict[ptype].extend(proxies)

	# Add proxies from CheckerProxy
	checker_proxies = self.get_checkerproxy_archive()
	for ptype, proxies in checker_proxies.items():
	if ptype in proxy_types:
	self.proxy_dict[ptype].extend(proxies)

	# Process "mixed" sources if any proxy type is requested
	if proxy_types:
	self.log("Processing mixed proxy sources")
	for url in self.api.get('mixed', []):
	try:
	proxies = self.fetch_from_url(url, 'mixed')
	# Distribute mixed proxies equally among requested types
	if proxies:
	chunks = len(proxy_types)
	chunk_size = len(proxies) // chunks if chunks > 0 else 0
	for i, ptype in enumerate(proxy_types):
	start = i * chunk_size
	end = start + chunk_size if i < chunks - 1 else len(proxies)
	self.proxy_dict[ptype].extend(proxies[start:end])
	except Exception as e:
	self.log(f"Failed to process mixed proxy source: {str(e)}")

	# Remove duplicates for all types
	for key in self.proxy_dict:
	original_count = len(self.proxy_dict[key])
	self.proxy_dict[key] = list(set(self.proxy_dict[key]))
	new_count = len(self.proxy_dict[key])
	self.log(f"Removed {original_count - new_count} duplicate {key} proxies")

	self.log("Proxy retrieval process completed")
	return self.proxy_dict

	def get_random_proxy(self, proxy_type: str = None) -> Optional[str]:
	"""
	Returns a random proxy of the specified type.
	If type is None, returns a random proxy from any type.
	"""
	if proxy_type and proxy_type in self.proxy_dict and self.proxy_dict[proxy_type]:
	return random.choice(self.proxy_dict[proxy_type])
	elif not proxy_type:
	# Combine all proxy types and get a random one
	all_proxies = []
	for ptype in self.proxy_dict:
	all_proxies.extend(self.proxy_dict[ptype])
	if all_proxies:
	return random.choice(all_proxies)
	return None

	def get_random_proxies(self, count: int = 10, proxy_type: str = None) -> List[str]:
	"""
	Returns a list of random proxies of the specified type.
	If type is None, returns random proxies from any type.
	"""
	if proxy_type and proxy_type in self.proxy_dict:
	proxies = self.proxy_dict[proxy_type]
	else:
	# Combine all proxy types
	proxies = []
	for ptype in self.proxy_dict:
	proxies.extend(self.proxy_dict[ptype])

	# Get random proxies up to count or as many as available
	if not proxies:
	return []

	return random.sample(proxies, min(count, len(proxies)))

	if __name__ == "__main__":
	# Example usage
	finder = ProxyFinder(verbose=True)
	proxies = finder.get_proxies(['http', 'socks5'])

	print("\nSummary:")
	for ptype, proxy_list in proxies.items():
	print(f"{ptype}: {len(proxy_list)} proxies")

	print("\nRandom HTTP proxy:", finder.get_random_proxy('http'))
	print("\nRandom SOCKS5 proxies:", finder.get_random_proxies(5, 'socks5'))