标签:down dir dal 入门 mozilla white range src file
import rere.search(r‘(([01]{0,1}\d{0,1}\d|2[0-4]\d|25[0-5])\.){3}([01]{0,1}\d{0,1}\d|2[0-4]\d|25[0-5])‘,‘192.168.1.1‘)
>>> p = re.compile(‘\d+‘)>>> p.findall(‘3只小甲鱼,15条腿,多出的3条在哪里?‘)[‘3‘, ‘15‘, ‘3‘]
>>> import re>>> p = re.compile(‘[a-z]+‘)>>> pre.compile(‘[a-z]+‘)>>> p.match("")>>> print(p.match(""))None>>> m = p.match(‘fishc‘)>>> m<_sre.SRE_Match object; span=(0, 5), match=‘fishc‘>
>>> m.group()‘fishc‘>>> m.start()0>>> m.end()5>>> m.span()(0, 5)
- 设置了编译标志符
charref = re.compile(r"""&[#] # 开始数字引用(0[0-7]+ # 八进制格式| [0-9]+ # 十进制格式| x[0-9a-fA-F]+ # 十六进制格式); # 结尾分号""", re.VERBOSE)
- 未设置编译标志符
charref = re.compile("&#(0[0-7]+|[0-9]+|x[0-9a-fA-F]+);")
import urllib.requestimport redef open_url(url):req = urllib.request.Request(url)req.add_header(‘User-Agent‘,‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36‘)page = urllib.request.urlopen(req)#获取页面内容html = page.read().decode(‘utf-8‘)#解码页面内容return htmldef get_img(html):p = r‘(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])‘ #匹配IP地址iplist = re.findall(p,html)for each in iplist:print(each)if __name__ == ‘__main__‘:url = "http://www.xicidaili.com/"get_img(open_url(url))
import urllib.requestimport osimport redef save_imgs(folder,img_addrs):for each in img_addrs:filename=each.split(‘/‘)[-1]with open(filename,‘wb‘) as f:img =url_open(each)f.write(img)print(1)print(2)def url_open(url):reg= urllib.request.Request(url)reg.add_header(‘User-Agent‘,‘Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36‘)response = urllib.request.urlopen(url)html= response.read()return htmldef get_page(url):html = url_open(url).decode(‘utf-8‘)#a = html.find(‘current-comment-page‘)+23a = re.search (r‘\[\d{1,4}\]‘,html)a = a.group()b=len(a)a=a[1:b-1]return adef find_imgs(url):html =url_open(url).decode(‘utf-8‘)img_addrs= []a = html.find(‘img src=‘)while a != -1:b = html.find(‘.jpg‘,a,a+255)if b!= -1 :if ‘lanya‘ in html[a+9:b+4]:passelse:img_addrs.append(‘http:‘+html[a+9:b+4])else:b=a+9a = html.find(‘img src=‘,b)print(img_addrs)return img_addrsdef download_mm(folder=‘ooxx‘,pages=4,star=0):os.mkdir(folder)os.chdir(folder)url = ‘http://jandan.net/ooxx/‘ #妹子图地址#url = ‘http://jandan.net/pic/‘ #无聊图地址if star!= 0 :page_num = starelse:page_num=int(get_page(url))for i in range(pages):page_num-=1page_url = url + ‘page-‘+str(page_num)+‘#comments‘img_addrs=find_imgs(page_url)save_imgs(folder,img_addrs)print(page_url)if __name__==‘__main__‘:download_mm()
标签:down dir dal 入门 mozilla white range src file
原文地址:http://www.cnblogs.com/tcheng/p/6886387.html