File size: 13,928 Bytes
ead2510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import json
import datetime
import requests
import re
import random
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Optional, Set, Tuple, Union

class ProxyFinder:
    """Finds and validates proxies from various online sources"""
    
    def __init__(self, verbose: bool = False):
        """Initialize ProxyFinder with optional verbose logging"""
        self.verbose = verbose
        self.api: Dict[str, List[str]] = {
            'socks4': [
                "https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4&timeout=10000&country=all&simplified=true",
                "https://www.proxy-list.download/api/v1/get?type=socks4",
                "https://api.openproxylist.xyz/socks4.txt",
                'https://openproxy.space/list/socks4',
                'https://proxyspace.pro/socks4.txt',
                "https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt",
                'https://cdn.rei.my.id/proxy/SOCKS4',
                "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt", 
                "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt",
                'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt'
            ],
            'socks5': [
                "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5&timeout=10000&country=all&simplified=true",
                "https://www.proxy-list.download/api/v1/get?type=socks5",
                "https://api.openproxylist.xyz/socks5.txt",
                'https://openproxy.space/list/socks5',
                'https://spys.me/socks.txt',
                'https://proxyspace.pro/socks5.txt',
                "https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt",
                'https://cdn.rei.my.id/proxy/SOCKS5',
                'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
                "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt",
                "https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt"
            ],
            'http': [
                'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
                "https://github.com/TheSpeedX/PROXY-List/raw/refs/heads/master/http.txt",
                "https://api.proxyscrape.com/?request=displayproxies&proxytype=http&timeout=10000&country=all&simplified=true",
                "https://www.proxy-list.download/api/v1/get?type=http",
                "https://api.openproxylist.xyz/http.txt",
                'https://openproxy.space/list/http',
                'https://proxyspace.pro/http.txt',
                "https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt",
                'https://cdn.rei.my.id/proxy/HTTP',
                'https://raw.githubusercontent.com/UptimerBot/proxy-list/master/proxies/http.txt',
                'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt'
            ],
            'https': [
                'https://raw.githubusercontent.com/Firdoxx/proxy-list/main/https',
                'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
                'https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt',
                'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',
                'https://raw.githubusercontent.com/zloi-user/hideip.me/main/https.txt',
                'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/https.txt',
                'https://raw.githubusercontent.com/Vann-Dev/proxy-list/main/proxies/https.txt'
            ],
            'mixed': [
                'https://github.com/jetkai/proxy-list/blob/main/online-proxies/txt/proxies.txt',
                'https://raw.githubusercontent.com/mertguvencli/http-proxy-list/main/proxy-list/data.txt',
                'https://raw.githubusercontent.com/a2u/free-proxy-list/master/free-proxy-list.txt',
                'https://raw.githubusercontent.com/mishakorzik/Free-Proxy/main/proxy.txt',
                'http://rootjazz.com/proxies/proxies.txt',
                'https://multiproxy.org/txt_all/proxy.txt',
                'https://proxy-spider.com/api/proxies.example.txt'
            ]
        }
        self.proxy_dict: Dict[str, List[str]] = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
        self.max_workers = 20  # Maximum workers for parallel requests
    
    def log(self, *args):
        """Log messages if verbose mode is enabled"""
        if self.verbose:
            print(*args)
            
    def extract_proxy(self, line: str) -> Optional[str]:
        """

        Extracts the first occurrence of an IP:port from a line.

        """
        match = re.search(r'(\d{1,3}(?:\.\d{1,3}){3}:\d{2,5})', line)
        if match:
            return match.group(1)
        return None

    def fetch_from_url(self, url: str, proxy_type: str) -> List[str]:
        """

        Fetches proxies from a given URL for the specified type.

        Returns a list of valid proxies.

        """
        proxy_list = []
        try:
            r = requests.get(url, timeout=5)
            if r.status_code == requests.codes.ok:
                for line in r.text.splitlines():
                    proxy = self.extract_proxy(line)
                    if proxy:
                        proxy_list.append(proxy)
                self.log(f"Got {len(proxy_list)} {proxy_type} proxies from {url}")
            return proxy_list
        except Exception as e:
            self.log(f"Failed to retrieve from {url}: {str(e)}")
            return []

    def fetch_proxies_parallel(self, proxy_type: str) -> List[str]:
        """

        Fetch proxies in parallel for a specific type from all sources.

        """
        if proxy_type not in self.api:
            return []
            
        all_proxies = []
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(self.fetch_from_url, url, proxy_type) 
                      for url in self.api[proxy_type]]
            for future in futures:
                all_proxies.extend(future.result())
                
        return list(set(all_proxies))  # Remove duplicates

    def get_geonode_proxies(self) -> Dict[str, List[str]]:
        """

        Retrieves proxies from geonode API

        """
        result = {'http': [], 'socks4': [], 'socks5': [], 'https': []}
        try:
            url = 'https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc'
            response = requests.get(url, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                for p in data.get('data', []):
                    for protocol in p.get('protocols', []):
                        protocol = protocol.lower()
                        # Map 'https' to 'http' in our dictionary
                        if protocol == 'https':
                            result['http'].append(f"{p['ip']}:{p['port']}")
                        elif protocol in result:
                            result[protocol].append(f"{p['ip']}:{p['port']}")
                
                self.log(f"Got {sum(len(v) for v in result.values())} proxies from GeoNode")
        except Exception as e:
            self.log(f"Failed to fetch from GeoNode: {str(e)}")
        
        return result

    def get_checkerproxy_archive(self) -> Dict[str, List[str]]:
        """

        Gets proxies from checkerproxy.net archive

        """
        result = {'http': [], 'socks5': []}
        
        for q in range(5):  # Check only last 5 days to be faster
            day = datetime.date.today() + datetime.timedelta(days=-q)
            formatted_date = f'{day.year}-{day.month}-{day.day}'
            
            try:
                r = requests.get(f'https://checkerproxy.net/api/archive/{formatted_date}', timeout=5)
                if r.text != '[]':
                    json_result = json.loads(r.text)
                    for i in json_result:
                        # Skip internal IPs
                        if re.match(r"172\.|192\.168\.|10\.", i['ip']):
                            continue
                            
                        addr = i.get('addr')
                        if not addr:
                            continue
                            
                        if i['type'] in [1, 2]:
                            result['http'].append(addr)
                        if i['type'] == 4:
                            result['socks5'].append(addr)
                    
                    self.log(f"Got {len(result['http'])} http and {len(result['socks5'])} socks5 proxies from CheckerProxy for {formatted_date}")
            except Exception as e:
                self.log(f"Failed to get archive for {formatted_date}: {str(e)}")
                
        return result

    def get_proxies(self, proxy_types: List[str] = None) -> Dict[str, List[str]]:
        """

        Get proxies of the specified types. If None, get all types.

        Returns a dictionary with proxy lists for each type.

        """
        if proxy_types is None:
            proxy_types = ['http', 'https', 'socks4', 'socks5']
            
        # Reset proxy dictionary
        self.proxy_dict = {'socks4': [], 'socks5': [], 'http': [], 'https': []}
        
        self.log("Starting proxy retrieval process")
        
        # Fetch from regular sources in parallel for each type
        for ptype in proxy_types:
            if ptype in self.api:
                self.log(f"Processing {ptype} proxy sources")
                proxies = self.fetch_proxies_parallel(ptype)
                self.proxy_dict[ptype].extend(proxies)
        
        # Add proxies from GeoNode
        geonode_proxies = self.get_geonode_proxies()
        for ptype, proxies in geonode_proxies.items():
            if ptype in proxy_types:
                self.proxy_dict[ptype].extend(proxies)
        
        # Add proxies from CheckerProxy
        checker_proxies = self.get_checkerproxy_archive()
        for ptype, proxies in checker_proxies.items():
            if ptype in proxy_types:
                self.proxy_dict[ptype].extend(proxies)
                
        # Process "mixed" sources if any proxy type is requested
        if proxy_types:
            self.log("Processing mixed proxy sources")
            for url in self.api.get('mixed', []):
                try:
                    proxies = self.fetch_from_url(url, 'mixed')
                    # Distribute mixed proxies equally among requested types
                    if proxies:
                        chunks = len(proxy_types)
                        chunk_size = len(proxies) // chunks if chunks > 0 else 0
                        for i, ptype in enumerate(proxy_types):
                            start = i * chunk_size
                            end = start + chunk_size if i < chunks - 1 else len(proxies)
                            self.proxy_dict[ptype].extend(proxies[start:end])
                except Exception as e:
                    self.log(f"Failed to process mixed proxy source: {str(e)}")
        
        # Remove duplicates for all types
        for key in self.proxy_dict:
            original_count = len(self.proxy_dict[key])
            self.proxy_dict[key] = list(set(self.proxy_dict[key]))
            new_count = len(self.proxy_dict[key])
            self.log(f"Removed {original_count - new_count} duplicate {key} proxies")
            
        self.log("Proxy retrieval process completed")
        return self.proxy_dict

    def get_random_proxy(self, proxy_type: str = None) -> Optional[str]:
        """

        Returns a random proxy of the specified type.

        If type is None, returns a random proxy from any type.

        """
        if proxy_type and proxy_type in self.proxy_dict and self.proxy_dict[proxy_type]:
            return random.choice(self.proxy_dict[proxy_type])
        elif not proxy_type:
            # Combine all proxy types and get a random one
            all_proxies = []
            for ptype in self.proxy_dict:
                all_proxies.extend(self.proxy_dict[ptype])
            if all_proxies:
                return random.choice(all_proxies)
        return None
        
    def get_random_proxies(self, count: int = 10, proxy_type: str = None) -> List[str]:
        """

        Returns a list of random proxies of the specified type.

        If type is None, returns random proxies from any type.

        """
        if proxy_type and proxy_type in self.proxy_dict:
            proxies = self.proxy_dict[proxy_type]
        else:
            # Combine all proxy types
            proxies = []
            for ptype in self.proxy_dict:
                proxies.extend(self.proxy_dict[ptype])
                
        # Get random proxies up to count or as many as available
        if not proxies:
            return []
            
        return random.sample(proxies, min(count, len(proxies)))

if __name__ == "__main__":
    # Example usage
    finder = ProxyFinder(verbose=True)
    proxies = finder.get_proxies(['http', 'socks5'])
    
    print("\nSummary:")
    for ptype, proxy_list in proxies.items():
        print(f"{ptype}: {len(proxy_list)} proxies")
    
    print("\nRandom HTTP proxy:", finder.get_random_proxy('http'))
    print("\nRandom SOCKS5 proxies:", finder.get_random_proxies(5, 'socks5'))