1 #爬虫 2 #自己写给自己看的,排版不是很好,请不要介意 3 4 import requests 5 res = requests.get(‘http://...‘)#注意是get 6 res.encoding = ‘utf-8‘#防治中文乱码 7 print(res.text()) 8 #print(type(res)) 9 10 11 from bs4 import BeautifulSoup#BeautifulShop 大写的S 12 html_sample = ‘...‘ 13 soup = Beautifulsoup(html_sample,‘html.parser‘)#指定过滤器 14 print(soup.text) 15 #使用select找出含有h1标签的元素 16 header = soup.select(‘h1‘) 17 print(header)#[<h1 id="title">Hello World</h1>] 18 print(header[0])#<h1 id="title">Hello World</h1> 19 print(header[0].text)#Hello World 20 #使用select找出含有a标签的元素 21 alink = soup.select(‘a‘) 22 print(alink)#[<a class="link" href="#">This is link1</a>,<a class="link" href="#">This is link2</a>] 23 for link in alink: 24 print(link.text)#This is link1 This is link2 25 #使用select找出所有id为title的元素(id前需加#) 26 alink = soup.select(‘#title‘) 27 print(alink)#[<h1 id=‘title‘>Hello World</h1>] 28 #使用select找出所有class为link的元素(class前需加.) 29 for link in soup.select(‘.link‘): 30 print(link)#<a class="link" herf="#">This is link1</a> <a class="link" herf="#">This is link2</a> 31 #使用select找出所有a tag 的herf连结 32 alinks = soup.select(‘a‘) 33 for link in alinks: 34 print(link)#<a class="link" herf="# link1">This is link1</a> <a class="link" herf="# link2">This is link2</a> 35 print(link[‘herf‘])# # link1 # link2 36 #属性为字典 37 #范例 38 a = ‘<a href="#" qoo=123 abc=456> This is a link</a>‘ 39 soup = BeautifulSoup(a,‘html.parser‘) 40 print(soup.select(‘a‘))#[<a href="#‘ qoo=123 abc=456> This is a link</a>] 41 print(soup.select(‘a‘)[0])#<a href="#‘ qoo=123 abc=456> This is a link</a> 42 print(soup.select(‘a‘)[0][‘abc‘])#456