标签:att font html cti strong nta htm container hello
CSS选择器
1、初始化
html=‘‘‘
<div>
<ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0 "><a href="link5.html">fifth item</a></li>
</ul>
</div>
‘‘‘
from pyquery import PyQuery as pq
## 字符串初始化
doc = pq(html)
print(doc(‘li‘))
## URL初始化
doc= pq(url=‘https://cuiqingcai.com‘)
print(doc(‘title‘))
## 文件初始化
doc = pq(filename=‘test.html‘)
print(doc(‘li‘))
2、CSS选择器
# CSS选择器 from pyquery import PyQuery as pq doc = pq(html) ## id用 #,class用 . print(doc(‘#container .list li‘))
3、查找节点
html=‘‘‘
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0 "><a href="link5.html">fifth item</a></li>
</ul>
</div>
‘‘‘
##查找节点
from pyquery import PyQuery as pq
doc = pq(html)
items = doc(‘.list‘)
print(items)
## find() 查找所有地子孙节点
lis = items.find(‘a‘)
print(lis)
## children() 只查找子节点
lis = items.children(‘.active‘)
print(lis)
## parent() 查找父节点
container = items.parent()
print(container)
## parents() 查找祖先节点
ancestor = items.parents()
print(ancestor)
## siblings() 查找兄弟节点
li = doc(‘.item-0.active‘)
print(li.siblings(‘.active‘))
4、遍历
from pyquery import PyQuery as pq
doc = pq(html)
## 调用items()得到一个生成器,for in 进行遍历
lis = doc(‘li‘).items()
for li in lis:
print(li)
5、获取内容
from pyquery import PyQuery as pq
doc = pq(html)
a = doc(‘.item-0 a‘)
print(a)
## 获取 a节点的 href的属性值
## attr()只返回第一个结果
print(a.attr(‘href‘))
print(a.attr.href)
## 通过遍历获取所有的属性值
for item in a.items():
print(item.attr(‘href‘))
## 只获取文本 多个节点的文本内容用 空格 间隔开 print(a.text()) ## third item fifth item ## 获取包含 a节点内部所有内容,包含节点,返回第一个 a节点 内部的HTML文本 print(a.html()) # <span class="bold">third item</span>
6、节点操作
## 节点操作
## remove()
html =‘‘‘
<div class="wrap">
Hello World
<p>呱呱呱</p>
</div>
‘‘‘
## 只想获得Hello World
from pyquery import PyQuery as pq
doc = pq(html)
wrap = doc(‘.wrap‘)
## 移除 p节点
wrap.find(‘p‘).remove()
print(wrap.text())
7、伪类选择器
html=‘‘‘
<div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0 "><a href="link5.html">fifth item</a></li>
</ul>
</div>
‘‘‘
from pyquery import PyQuery as pq
doc = pq(html)
## 第一个li节点
li = doc(‘li:first-child‘)
print(li)
## 最后一个li节点
li = doc(‘li:last-child‘)
print(li)
## 第2个li节点
li = doc(‘li:nth-child(2)‘)
print(li)
## 包含‘second’文本的li节点
li = doc(‘li:contains(second)‘)
print(li)
标签:att font html cti strong nta htm container hello
原文地址:https://www.cnblogs.com/motoharu/p/12557447.html