diff --git a/NO15/ipporxy.txt b/NO15/ipporxy.txt new file mode 100644 index 0000000000000000000000000000000000000000..24757cab71a5ba8401e3d1ba0238034a2e9acee6 --- /dev/null +++ b/NO15/ipporxy.txt @@ -0,0 +1 @@ +27.14.86.171:8000 diff --git "a/NO15/\344\273\243\347\220\206IP.py" "b/NO15/\344\273\243\347\220\206IP.py" new file mode 100644 index 0000000000000000000000000000000000000000..9934e4317f7f3cc781ce97599843e73e5cf890af --- /dev/null +++ "b/NO15/\344\273\243\347\220\206IP.py" @@ -0,0 +1,230 @@ + +import requests +from lxml import etree +import random +import telnetlib + +# 代理检测函数 + + +def check_ip_port(ip_port): + for item in ip_port: + ip = item["ip"] + port = item["port"] + + try: + tn = telnetlib.Telnet(ip, port=port, timeout=3) + except: + print('[-] ip:{}:{}'.format(ip, port)) + else: + print('[+] ip:{}:{}'.format(ip, port)) + with open('ipporxy.txt', 'a') as f: + f.write(ip+':'+port+'\n') + print("阶段性检测完毕") + +# 代理IP的第二种检测办法 + + +def check_proxy(ip_port): + for item in ip_port: + ip = item["ip"] + port = item["port"] + # url = 'https://api.ipify.org/?format=json' + url = "http://icanhazip.com/" + proxies = { + "http": "http://{}:{}".format(ip, port), + "https": "https://{}:{}".format(ip, port), + } + try: + # res = requests.get(url, proxies=proxies, timeout=3).json() + res = requests.get(url, proxies=proxies, timeout=3) + # if 'ip' in res: + # print(res['ip']) + if res.status_code == 200: + print(res) + + except Exception as e: + print(e) + + +def ip89(pagesize): + + url_format = "https://www.89ip.cn/index_{}.html" + for page in range(1, pagesize+1): + url = url_format.format(page) + text = get_html(url) + ip_xpath = '//tbody/tr/td[1]/text()' + port_xpath = '//tbody/tr/td[2]/text()' + ret = format_html(text, ip_xpath, port_xpath) + # 检测代理是否可用 + check_ip_port(ret) + # check_proxy(ret) + + +def ip66(pagesize): + url_format = "http://www.66ip.cn/{}.html" + for page in range(1, pagesize+1): + url = url_format.format(page) + text = get_html(url) + ip_xpath = '//table/tr[position()>1]/td[1]/text()' + port_xpath = '//table/tr[position()>1]/td[2]/text()' + ret = format_html(text, ip_xpath, port_xpath) + # 检测代理是否可用 + check_ip_port(ret) + + +def ip3366(pagesize): + url_format = "https://proxy.ip3366.net/free/?action=china&page={}" + for page in range(1, pagesize+1): + url = url_format.format(page) + + text = get_html(url) + ip_xpath = '//td[@data-title="IP"]/text()' + port_xpath = '//td[@data-title="PORT"]/text()' + ret = format_html(text, ip_xpath, port_xpath) + # 检测代理是否可用 + check_ip_port(ret) + + +def ip_huan(): + urls = ["https://ip.ihuan.me/?page=b97827cc", "https://ip.ihuan.me/?page=4ce63706", "https://ip.ihuan.me/?page=5crfe930", + "https://ip.ihuan.me/?page=f3k1d581", "https://ip.ihuan.me/?page=ce1d45977", "https://ip.ihuan.me/?page=881aaf7b5"] + for url in urls: + text = get_html(url) + ip_xpath = '//tbody/tr/td[1]/a/text()' + port_xpath = '//tbody/tr/td[2]/text()' + ret = format_html(text, ip_xpath, port_xpath) + check_ip_port(ret) + + +def ip_kuai(pagesize): + url_format = "https://www.kuaidaili.com/free/inha/{}/" + for page in range(1, pagesize+1): + url = url_format.format(page) + text = get_html(url) + ip_xpath = '//td[@data-title="IP"]/text()' + port_xpath = '//td[@data-title="PORT"]/text()' + ret = format_html(text, ip_xpath, port_xpath) + check_ip_port(ret) + + +def ip_jiangxi(pagesize): + url_format = "https://ip.jiangxianli.com/?page={}" + for page in range(1, pagesize+1): + url = url_format.format(page) + + text = get_html(url) + ip_xpath = '//tbody/tr[position()!=7]/td[1]/text()' + port_xpath = '//tbody/tr[position()!=7]/td[2]/text()' + ret = format_html(text, ip_xpath, port_xpath) + check_ip_port(ret) + + +def ip_kaixin(pagesize): + url_format = "http://www.kxdaili.com/dailiip/1/{}.html" + for page in range(1, pagesize+1): + url = url_format.format(page) + + text = get_html(url) + ip_xpath = '//tbody/tr/td[1]/text()' + port_xpath = '//tbody/tr/td[2]/text()' + ret = format_html(text, ip_xpath, port_xpath) + check_ip_port(ret) + + +def ip_nima(pagesize): + url_format = "http://www.nimadaili.com/putong/{}/" + for page in range(1, pagesize+1): + url = url_format.format(page) + + text = get_html(url) + ip_xpath = '//tbody/tr/td[1]/text()' + ret = format_html_ext(text, ip_xpath) + check_ip_port(ret) + + +def format_html_ext(text, ip_xpath): + # 待返回的IP与端口列表 + ret = [] + html = etree.HTML(text) + ips = html.xpath(ip_xpath) + + print(ips) + + for ip in ips: + + item_dict = { + "ip": ip.split(":")[0], + "port": ip.split(":")[1] + } + ret.append(item_dict) + + return ret + + +def format_html(text, ip_xpath, port_xpath): + # 待返回的IP与端口列表 + ret = [] + html = etree.HTML(text) + ips = html.xpath(ip_xpath) + ports = html.xpath(port_xpath) + # 测试,正式运行删除本部分代码 + print(ips, ports) + ip_port = zip(ips, ports) + for ip, port in ip_port: + + item_dict = { + "ip": ip.strip(), + "port": port.strip() + } + ret.append(item_dict) + + return ret + + +def get_html(url): + headers = get_headers() + try: + res = requests.get(url, headers=headers, timeout=5) + return res.text + except Exception as e: + print("请求网址异常", e) + return None + + +def get_headers(): + uas = [ + "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)", + "Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)", + "Baiduspider-image+(+http://www.baidu.com/search/spider.htm)", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 YisouSpider/5.0 Safari/537.36", + "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + "Mozilla/5.0 (compatible; Googlebot-Image/1.0; +http://www.google.com/bot.html)", + "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", + "Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);", + "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", + "Sosospider+(+http://help.soso.com/webspider.htm)", + "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" + ] + ua = random.choice(uas) + headers = { + "user-agent": ua, + "referer": "https://www.baidu.com" + } + return headers + + +def run(): + ip89(10) + ip66(10) + ip3366(2) + ip_huan() + ip_kuai(4) + ip_jiangxi(4) + ip_kaixin(10) + ip_nima(5) + + +if __name__ == "__main__": + run()