码迷,mamicode.com
首页 > 编程语言 > 详细

爬取乌云上所有人民币和乌云符号的漏洞(python脚本)

时间:2015-11-24 20:25:34      阅读:262      评论:0      收藏:0      [点我收藏+]

标签:

  1 import httplib
  2 from HTMLParser import HTMLParser
  3 import urlparse
  4 import urllib
  5 from bs4 import BeautifulSoup
  6 import re
  7 from time import sleep
  8 ‘‘‘
  9 usage: 
 10 
 11 
 12 input your wooyun cookies,then just go crawl!!
 13 
 14 author: Elliott
 15 
 16 
 17 ‘‘‘
 18 
 19 
 20 
 21 
 22 domain = wooyun.org
 23 cookies = ‘‘  # !!!!here   input your wooyun cookies
 24 user_agent = Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0
 25 
 26 
 27 
 28 def countnumber():  # the function to get page num
 29     global domain
 30     global cookies
 31     global user_agent
 32     conn = httplib.HTTPConnection(domain)
 33     conn.request(GET,/user.php?action=openbugs,‘‘,{Cookie:cookies,User-Agent:user_agent,Referer:http://wooyun.org/index.php,Host:wooyun.org})
 34     content = conn.getresponse()
 35     content = content.read()
 36     soup = BeautifulSoup(content)
 37     tag = soup.find_all(p,attrs={class:page})
 38     if len(tag) == 0:
 39         tag = None
 40     else:
 41         tag = str(tag[0])
 42     pattern = re.compile(>.*<a class=\"current\">)
 43     result = pattern.findall(tag)
 44     if len(result) == 0:
 45         result = None
 46     else:
 47         result = str(result[0])
 48     number = filter(str.isdigit, result)
 49     num = number[5:]  #get then total page number
 50     return int(num)
 51 
 52 
 53 def dealthepage(content):
 54     global domain
 55     global cookies
 56     global user_agent
 57     conn = httplib.HTTPConnection(domain)
 58     soup = BeautifulSoup(content)
 59     k = soup.find_all(a)
 60     item = k[27:47]
 61     pattern = re.compile(href=\"(.+?)\")
 62     hreaf = []
 63     for i in range(len(item)):
 64     ss = pattern.findall(str(item[i]))
 65         if len(ss) == 0:
 66             break
 67     hreaf.append(str(ss[0]))
 68     for i in hreaf:
 69         #sleep(0.5)
 70     conn.request(GET,i,‘‘,{Cookie:cookies,User-Agent:user_agent,Referer:http://wooyun.org/index.php,Host:wooyun.org})
 71     content2 = conn.getresponse()
 72     content2 = content2.read()
 73     soup2 = BeautifulSoup(content2)
 74     imgtag = soup2.find_all(class_=credit)
 75     ‘‘‘may be $ or cloud‘‘‘
 76     if len(imgtag) != 0:
 77         findcloud = re.compile(src=\"\/images\/credit\.png\")
 78         findmoney = re.compile(src=\"\/images\/m(.?)\.png\")
 79         cloudnum = findcloud.findall(content2)
 80         moneylevel = findmoney.findall(content2)
 81         cloud = 0
 82         money = 0
 83         if len(cloudnum) != 0:
 84             if len(cloudnum) == 1:
 85                 cloud = 1
 86             if len(cloudnum) == 2:
 87                 cloud = 2
 88             if len(cloudnum) == 3:
 89                 cloud = 3
 90         if len(moneylevel) != 0:
 91             if len(moneylevel) == 1:
 92                 money = 1
 93             if len(moneylevel) == 2:
 94                 money = 2
 95             if len(moneylevel) == 3:
 96                 money = 3
 97         title = soup2.findAll(attrs={"class":"wybug_title"})
 98         if len(title) == 0:
 99             title = No Title
100         else:
101             title = str(title[0])
102         deltag = re.compile(r<[^>]+>)
103         title = deltag.sub(‘‘,title)
104         author = soup2.findAll(attrs={"class":"wybug_author"})
105         if len(author) == 0:
106             author = No name
107         else:
108             author = str(author[0])
109         author = deltag.sub(‘‘,author)
110         date = soup2.findAll(attrs={"class":"wybug_date"})
111         if len(date) == 0:
112             date = No time
113         else:
114             date = str(date[0])
115         date = deltag.sub(‘‘,date)
116         link = "http://www.wooyun.org"+i
117         link = str(link)
118         f = open("test.html","a+")
119         s = "<tr><td>level:cloud="+str(cloud)+"money="+str(money)+"</td><th>"+date+"</th><td><a href=‘"+link+"‘>"+title+"</a></td><th>"+author+"</th></tr><br>"
120         f.write(s)
121         f.close
122 
123 
124 
125 
126 
127 
128 if __name__ == __main__:
129     num = countnumber()  #get page num
130     for i in range(num):
131         conn = httplib.HTTPConnection(domain)
132         conn.request(GET,/user.php?action=openbugs&pNO=+str(i+1),‘‘,{Cookie:cookies,User-Agent:user_agent,Referer:http://wooyun.org/index.php,Host:wooyun.org})
133         content = conn.getresponse()
134         content = content.read()
135         dealthepage(content)
136 
137 
138 
139 
140 
141     

 

 

 

附章效果图:技术分享

爬取乌云上所有人民币和乌云符号的漏洞(python脚本)

标签:

原文地址:http://www.cnblogs.com/elliottc/p/4992700.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!