吴裕雄 python 爬虫（1）

时间：2018-12-20 17:02:43 阅读：209 评论：0 收藏：0 [点我收藏+]

标签：class findall none 爬虫概念 not mat img int

from urllib.parse import urlparse

url = ‘http://www.pm25x.com/city/beijing.htm‘
o = urlparse(url)
print(o) 

print("scheme={}".format(o.scheme)) # http
print("netloc={}".format(o.netloc)) # www.pm25x.com
print("port={}".format(o.port))     # None
print("path={}".format(o.path))     # /city/beijing.htm
print("query={}".format(o.query))   # 空

技术分享图片

import requests

url = ‘http://www.wsbookshow.com/‘
html = requests.get(url)
html.encoding="GBK"
print(html.text)

技术分享图片

import requests

url = ‘http://www.wsbookshow.com/‘
html = requests.get(url)
html.encoding="gbk"

htmllist = html.text.splitlines()
n=0
for row in htmllist:
    if "新概念" in row: 
        n+=1
print("找到 {} 次!".format(n))

技术分享图片

import re
pat = re.compile(‘[a-z]+‘)

m = pat.match(‘tem12po‘)
print(m)

if not m==None:
    print(m.group())
    print(m.start())
    print(m.end())
    print(m.span())

技术分享图片

import re
m = re.match(r‘[a-z]+‘,‘tem12po‘)
print(m)

if not m==None:
    print(m.group())
    print(m.start())
    print(m.end())
    print(m.span())

技术分享图片

import re

pat = re.compile(‘[a-z]+‘)

m = pat.search(‘3tem12po‘)
print(m) # <_sre.SRE_Match object; span=(1, 4), match=‘tem‘>

if not m==None:
    print(m.group())  # tem
    print(m.start())  # 1
    print(m.end())    # 4
    print(m.span())   # (1,4)

技术分享图片

import re
pat = re.compile(‘[a-z]+‘)

m = pat.findall(‘tem12po‘)
print(m)  # [‘tem‘, ‘po‘]

技术分享图片

import requests,re

regex = re.compile(‘[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+‘)
url = ‘http://www.wsbookshow.com/‘
html = requests.get(url)
emails = regex.findall(html.text)
for email in emails:
    print(email)

技术分享图片

吴裕雄 python 爬虫（1）

标签：class findall none 爬虫概念 not mat img int

原文地址：https://www.cnblogs.com/tszr/p/10149919.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行