码迷,mamicode.com
首页 > Windows程序 > 详细

从0开始学爬虫10之urllib和requests库与github/api的交互

时间:2019-07-23 13:35:50      阅读:141      评论:0      收藏:0      [点我收藏+]

标签:else   use   reason   pre   ati   url   domain   交互   ssi   

urllib库的使用

# coding=utf-8
import urllib2
import urllib

# htpbin模拟的环境
URL_IP="http://10.11.0.215:8080"
URL_GET = "http://10.11.0.215:8080/get"


def use_simple_urllib2():
    response = urllib2.urlopen(URL_IP)
    print >>>> Response Headers:
    print response.info()
    print >>>>Response Body:
    print ‘‘.join([line for line in response.readlines()])


def use_params_urllib2():
    # 构建请求参数
    params = urllib.urlencode({param1: hello,param2: world})
    print Request Params:
    print params
    # 发送请求
    response = urllib2.urlopen(?.join([URL_GET, %s]) % params)
    # 处理响应
    print >>>Response Headers:
    print response.info()
    print >>>Status code
    print response.getcode()
    print >>>Response Body
    print ‘‘.join([line for line in response.readlines()])
    # print response.readlines()


if __name__ == __main__:
    # print ‘>>>Use simple urllib2‘
    # use_simple_urllib2()
    print >>>Use params urllib2
    use_params_urllib2()

requests库的简单使用

# coding=utf-8

import requests

URL_IP="http://10.11.0.215:8080/ip"
URL_GET="http://10.11.0.215:8080/get"


def use_simple_requests():
    response = requests.get(URL_IP)
    print ">>>Response Headers:"
    print response.headers
    print ">>>Response Code:"
    print response.status_code
    print "Response Body:"
    print response.text


def use_params_requests():
    response = requests.get(URL_GET)
    print ">>>Response Headers:"
    print response.headers
    print ">>>Response Code:"
    print response.status_code
    print response.reason
    print "Response Body:"
    print response.json()


if __name__ == "__main__":
    # print "simple requests:"
    # use_simple_requests()
    print "params requests:"
    use_params_requests()

requests和github api的互动

# coding=utf-8
import json
import requests
from requests import exceptions


URL = "https://api.github.com"


def build_uri(endpoint):
    # 拼凑url为最终的api路径
    return /.join([URL, endpoint])


def better_print(json_str):
    # 格式化输出, indent=4是缩进为4个空格
    return json.dumps(json.loads(json_str), indent = 4)


def request_method():
    # 获取用户信息
    # response = requests.get(build_uri(‘users/reblue520‘))
    # response = requests.get(build_uri(‘user/emails‘), auth=(‘reblue520‘, ‘reblue520‘))
    response = requests.get(build_uri(user/public_emails), auth=(reblue520, reblue520))
    print(better_print(response.text))


def params_request():
    response = requests.get(build_uri(users), params={since:11})
    print better_print(response.text)
    print response.request.headers
    print response.url


def json_request():
    # 更新用户信息,邮箱必须是已经验证过的邮箱
    # response = requests.patch(build_uri(‘user‘), auth=(‘reblue520‘,‘reblue520‘),json={‘name‘:‘hellojack2019‘,‘email‘:‘reblue520@163.com‘})
    response = requests.post(build_uri(user/emails), auth=(reblue520,Reblue0225520),json=[hellojack2019@163.com])
    print better_print(response.text)
    print response.request.headers
    print response.request.body
    print response.status_code


def timeout_request():
    # api异常处理:超时
    try:
        response = requests.get(build_uri(user/emails), timeout=10)
        response.raise_for_status()
    except exceptions.Timeout as e:
        print e.message
    except exceptions.HTTPError as e:
        print e.message
    else:
        print response.status_code
        print response.text


def hard_requests():
    # 自定义request
    from requests import Request, Session
    s = Session()
    headers = {User-Agent: fake1.3.4}
    req = Request(GET, build_uri(user/emails), auth=(reblue520, Reblue0225520), headers=headers)
    prepped = req.prepare()
    print prepped.body
    print prepped.headers

    resp = s.send(prepped, timeout = 5)
    print resp.status_code
    print resp.request.headers
    print resp.text


if __name__ == __main__:
    # request_method()
    # params_request()
    # json_request()
    # timeout_request()
    hard_requests()

 response响应的常用api

响应的基本API
In [1]: import requests                                                                                                                                                                                              

In [2]: response = requests.get("https://api.github.com")                                                                                                                                                            

In [3]: response.status_code                                                                                                                                                                                         
Out[3]: 200

In [4]: response.reason                                                                                                                                                                                              
Out[4]: OK

In [5]: response.headers                                                                                                                                                                                             
Out[5]: {Date: Sat, 20 Jul 2019 03:48:51 GMT, Content-Type: application/json; charset=utf-8, Transfer-Encoding: chunked, Server: GitHub.com, Status: 200 OK, X-RateLimit-Limit: 60, X-RateLimit-Remaining: 47, X-RateLimit-Reset: 1563598131, Cache-Control: public, max-age=60, s-maxage=60, Vary: Accept, Accept-Encoding, ETag: W/"7dc470913f1fe9bb6c7355b50a0737bc", X-GitHub-Media-Type: github.v3; format=json, Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, Access-Control-Allow-Origin: *, Strict-Transport-Security: max-age=31536000; includeSubdomains; preload, X-Frame-Options: deny, X-Content-Type-Options: nosniff, X-XSS-Protection: 1; mode=block, Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin, Content-Security-Policy: "default-src ‘none‘", Content-Encoding: gzip, X-GitHub-Request-Id: 33D9:591B:9D084B:CF860E:5D328F23}

In [6]: response.url                                                                                                                                                                                                 
Out[6]: https://api.github.com/

In [7]: response.history                                                                                                                                                                                             
Out[7]: []

In [8]: response = requests.get("http://api.github.com")                                                                                                                                                             

In [9]: response.history                                                                                                                                                                                             
Out[9]: [<Response [301]>]

In [10]: response = requests.get("https://api.github.com")                                                                                                                                                           

In [11]: response.elapsed                                                                                                                                                                                            
Out[11]: datetime.timedelta(microseconds=459174)

In [12]: response.request                                                                                                                                                                                            
Out[12]: <PreparedRequest [GET]>

In [13]: response.request.headers                                                                                                                                                                                    
Out[13]: {User-Agent: python-requests/2.22.0, Accept-Encoding: gzip, deflate, Accept: */*, Connection: keep-alive}

In [14]: response.encoding                                                                                                                                                                                           
Out[14]: utf-8

In [15]: response.raw.read(10)                                                                                                                                                                                       
Out[15]: b‘‘

In [16]: response.content                                                                                                                                                                                            
Out[16]: b{"current_user_url":"https://api.github.com/user","current_user_authorizations_html_url":"https://github.com/settings/connections/applications{/client_id}","authorizations_url":"https://api.github.com/authorizations","code_search_url":"https://api.github.com/search/code?q={query}{&page,per_page,sort,order}","commit_search_url":"https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}","emails_url":"https://api.github.com/user/emails","emojis_url":"https://api.github.com/emojis","events_url":"https://api.github.com/events","feeds_url":"https://api.github.com/feeds","followers_url":"https://api.github.com/user/followers","following_url":"https://api.github.com/user/following{/target}","gists_url":"https://api.github.com/gists{/gist_id}","hub_url":"https://api.github.com/hub","issue_search_url":"https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}","issues_url":"https://api.github.com/issues","keys_url":"https://api.github.com/user/keys","notifications_url":"https://api.github.com/notifications","organization_repositories_url":"https://api.github.com/orgs/{org}/repos{?type,page,per_page,sort}","organization_url":"https://api.github.com/orgs/{org}","public_gists_url":"https://api.github.com/gists/public","rate_limit_url":"https://api.github.com/rate_limit","repository_url":"https://api.github.com/repos/{owner}/{repo}","repository_search_url":"https://api.github.com/search/repositories?q={query}{&page,per_page,sort,order}","current_user_repositories_url":"https://api.github.com/user/repos{?type,page,per_page,sort}","starred_url":"https://api.github.com/user/starred{/owner}{/repo}","starred_gists_url":"https://api.github.com/gists/starred","team_url":"https://api.github.com/teams","user_url":"https://api.github.com/users/{user}","user_organizations_url":"https://api.github.com/user/orgs","user_repositories_url":"https://api.github.com/users/{user}/repos{?type,page,per_page,sort}","user_search_url":"https://api.github.com/search/users?q={query}{&page,per_page,sort,order}"}

In [17]: response.json()                                                                                                                                                                                             
Out[17]: 
{current_user_url: https://api.github.com/user,
 current_user_authorizations_html_url: https://github.com/settings/connections/applications{/client_id},
 authorizations_url: https://api.github.com/authorizations,
 code_search_url: https://api.github.com/search/code?q={query}{&page,per_page,sort,order},
 commit_search_url: https://api.github.com/search/commits?q={query}{&page,per_page,sort,order},
 emails_url: https://api.github.com/user/emails,
 emojis_url: https://api.github.com/emojis,
 events_url: https://api.github.com/events,
 feeds_url: https://api.github.com/feeds,
 followers_url: https://api.github.com/user/followers,
 following_url: https://api.github.com/user/following{/target},
 gists_url: https://api.github.com/gists{/gist_id},
 hub_url: https://api.github.com/hub,
 issue_search_url: https://api.github.com/search/issues?q={query}{&page,per_page,sort,order},
 issues_url: https://api.github.com/issues,
 keys_url: https://api.github.com/user/keys,
 notifications_url: https://api.github.com/notifications,
 organization_repositories_url: https://api.github.com/orgs/{org}/repos{?type,page,per_page,sort},
 organization_url: https://api.github.com/orgs/{org},
 public_gists_url: https://api.github.com/gists/public,
 rate_limit_url: https://api.github.com/rate_limit,
 repository_url: https://api.github.com/repos/{owner}/{repo},
 repository_search_url: https://api.github.com/search/repositories?q={query}{&page,per_page,sort,order},
 current_user_repositories_url: https://api.github.com/user/repos{?type,page,per_page,sort},
 starred_url: https://api.github.com/user/starred{/owner}{/repo},
 starred_gists_url: https://api.github.com/gists/starred,
 team_url: https://api.github.com/teams,
 user_url: https://api.github.com/users/{user},
 user_organizations_url: https://api.github.com/user/orgs,
 user_repositories_url: https://api.github.com/users/{user}/repos{?type,page,per_page,sort},
 user_search_url: https://api.github.com/search/users?q={query}{&page,per_page,sort,order}}

 

从0开始学爬虫10之urllib和requests库与github/api的交互

标签:else   use   reason   pre   ati   url   domain   交互   ssi   

原文地址:https://www.cnblogs.com/reblue520/p/11230814.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!