import argparse import re # dependencies import bs4 import httpx import user_agent def main(): base_url = 'https://spys.one/en/http-proxy-list/' client = httpx.Client( headers={ 'Host': 'spys.one', 'User-Agent': user_agent.generate_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5' }, timeout=30 ) protocol, number, ssl, anonymity, date = return_args() result = scrape(client, base_url, protocol, number, ssl, anonymity, date) for i in result: print(i) def return_args(): parser = argparse.ArgumentParser(description='Proxy-scraper for spys.one') parser.add_argument('-p', '--protocol', choices=['all', 'http', 'socks'], default='all', help='select protocol (default is all)') parser.add_argument('-n', '--number', choices=['30', '50', '100', '200', '300', '500'], default='30', help='select number of proxys to show (default is 30)') parser.add_argument('-s', '--ssl', choices=['all', '1', '0'], default='all', help='turn ssl on/off (default is all)') parser.add_argument('-a', '--anonymity', choices=['all', 'a+h', 'noa', 'anm', 'hia'], default='all', help='select level of anonymity (default is all)') parser.add_argument('-d', '--date', action='store_true', help='shows when proxy was last checked (default is off)') args = parser.parse_args() protocol_form_values = {'all': 0, 'http': 1, 'socks': 2} number_form_values = {'30': 0, '50': 1, '100': 2, '200': 3, '300': 4, '500': 5} ssl_form_values = {'all': 0, '1': 1, '0': 2} anonymity_form_values = {'all': 0, 'a+h': 1, 'noa': 2, 'anm': 3, 'hia': 4} return protocol_form_values[args.protocol], number_form_values[args.number], ssl_form_values[args.ssl], anonymity_form_values[args.anonymity], args.date def scrape(client, base_url, protocol, number, ssl, anonymity, date=False): # post instead of get to simulate first visit without form data response = client.post(base_url) soup = bs4.BeautifulSoup(response.content, 'lxml') form_data = return_form_data(soup, protocol, number, ssl, anonymity) response = client.post(base_url, data=form_data).text yield from parse_proxy_results(response, date) def return_form_data(soup, protocol=0, number=0, ssl=0, anonymity=0): form_data = {} form = soup.select_one('form') for input_tag in form.find_all('input'): form_data[input_tag['name']] = input_tag.get('value', '') for select_tag in form.find_all('select'): form_data[select_tag['name']] = select_tag.option['value'] form_data['xpp'] = number form_data['xf1'] = anonymity form_data['xf2'] = ssl form_data['xf5'] = protocol return form_data def parse_proxy_results(response, date): SCRIPT_PATTERN = r'javascript">(.*?);<' PROXY_PATTERN = r'spy14>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*?"\+(.*?)\)<' DATE_TIME_PATTERN = r'spy14>(\d{2}-\w{3}-\d{4}) (\d{2}:\d{2}) ([(]\d{1,2} \w{4,5} \w{3}[)])' # building cypher to decrypt port number cypher = {} script = re.search(SCRIPT_PATTERN, response) key_value_list = script.group(1).split(';') for key_value in key_value_list: key, value = key_value.split('=') if value.isdigit(): cypher[key] = int(value) else: part1, part2 = value.split('^') cypher[key] = int(part1) ^ int(cypher[part2]) proxy_matches = re.findall(PROXY_PATTERN, response) date_time_matches = re.findall(DATE_TIME_PATTERN, response) for proxy_data, date_time_data in zip(proxy_matches, date_time_matches): proxy, port_script = proxy_data last_check_date, last_check_time, since = date_time_data # decrypting port number using cypher port_list = [] for crypted_digit in port_script.split('+'): stripped_crypted_digit = crypted_digit[1:-1] xor1, xor2 = stripped_crypted_digit.split('^') digit = str(int(cypher[xor1]) ^ int(cypher[xor2])) port_list.append(digit) port = ''.join(port_list) if date: yield proxy + ':' + port + ' ' + last_check_date + ' ' + last_check_time + ' ' + since else: yield proxy + ':' + port if __name__ == '__main__': main()