‘‘‘Created on 2013-4-3@author: zdh‘‘‘#!/usr/bin/env python# -*- coding: utf-8 -*-import reimport urllibdef main(): die= {‘d‘:‘0‘, ‘c‘:‘1‘ ,‘k‘:‘2‘ ,‘z‘:‘3‘ ,‘m‘:‘4‘ ,‘b‘:‘5‘, ‘w‘:‘6‘, ‘i‘:‘7‘, ‘r‘:‘8‘, ‘l‘:‘9‘} headers = (‘User-Agent‘,‘Mozilla/5.0 (Windows NT 5.1; rv:14.0) Gecko/20100101 Firefox/14.0.1‘) opener = urllib.URLopener() opener.addheaders = [headers] f = open(r‘E:/ip.txt‘,‘w‘) for page in range(1,11): url = "http://www.veryhuo.com/res/ip/page_"+str(page)+".php" data = opener.open(url).read() data = data.decode(‘GBK‘) ip_list = re.findall(re.compile(r‘<td>(.*)<S‘),data) port_list = re.findall(re.compile(r‘\"(\+.*?\+.)\)‘),data) for x in range(len(ip_list)): port_list[x] = port_list[x].replace(‘+‘, ‘‘) if ‘e‘ in port_list[x] or ‘x‘ in port_list[x] or ‘a‘ in port_list[x] or ‘f‘ in port_list[x]: s = ip_list[x] else: s = (ip_list[x] + ‘:‘ + port_list[x]) for key in die: s = s.replace(key, die[key]) f.write(s + ‘\n‘) print(s) print(len(ip_list)) f.close()if __name__ == ‘__main__‘: main()本文出自 “zhangdh开放空间” 博客,请务必保留此出处http://linuxblind.blog.51cto.com/7616603/1711147
原文地址:http://linuxblind.blog.51cto.com/7616603/1711147