标签:
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><h1>Hello</h1></body></head></html>") doc_2 = pq(filename="file_path") doc_3 = pq(url="http://www.baidu.com")
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><h1>Hello</h1></body></head></html>") print doc_1('head') # <head><body><h1>Hello</h1></body></head> print doc_1('h1') # <h1>Hello</h1>
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><h1>Hello</h1></body></head></html>") print doc_1('head').html() # <body><h1>Hello</h1></body> print doc_1('head').text() # Hello
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><p>test1</p><p>test2</p></body></head></html>") print doc_1('p').eq(0).html() # test1 print doc_1('p').eq(1).html() # test2
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div class='div1'><p id='2'>test1</p></div></body></head></html>") print doc_1('.div1').html() # <p id="2">test1</p> print doc_1('#2').html() # test1
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div class='div1'><a href='http://abc.com'>test1</a></div></body></head></html>") print doc_1('div').attr('class') # div1 print doc_1('a').attr('href') # http://abc.com
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div class='div1'><a href='http://abc.com'>test1</a></div></body></head></html>") print doc_1('a').attr('href', 'http://www.google.com') # 将href属性修改为google的地址
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div class='div1'><a href='http://abc.com'>test1</a></body></head></html>") print doc_1('div').find('a') # <a href="http://abc.com">test1</a>
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div class='div1'><p id='1'>test1</p><a href='http://abc.com'>test2</a></div></body></head></html>") print doc_1('div').children() # <p id="1">test1</p><a href="http://abc.com">test2</a> print doc_1('div').children('a') # <a href="http://abc.com">test2</a>
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div class='div1'><p id='1'>test1</p><a href='http://abc.com'>test2</a></div></body></head></html>") print doc_1('a').parents() # 所有的HTML内容 print doc_1('a').parents('div') # <div class="div1"><p id="1">test1</p><a href="http://abc.com">test2</a></div>
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div><p id='1'>test1</p><a href='http://abc.com'>test2</a></div></body></head></html>") print doc_1('div').addClass('div1') # <div class="div1"><p id="1">test1</p><a href="http://abc.com">test2</a></div>
from pyquery import PyQuery as pq doc_1 = pq("<html><head><body><div><p id='1'>test1</p><a href='http://abc.com'>test2</a></div></body></head></html>") print doc_1('div').hasClass('div1') # False print doc_1('div').addClass('div1') # <div class="div1"><p id="1">test1</p><a href="http://abc.com">test2</a></div> print doc_1('div').hasClass('div1') # False
标签:
原文地址:http://blog.csdn.net/shuaijiasanshao/article/details/51350505