标签:new lis 支持 null ref span 注册 [] http
import cn.wanghaomiao.xpath.exception.NoSuchAxisException;
import cn.wanghaomiao.xpath.exception.XpathSyntaxErrorException;
import cn.wanghaomiao.xpath.model.JXDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.File;
import java.io.IOException;
import java.util.List;
/**
* Created by Administrator on 2017/5/27.
*/
public class JsoupXpathTest {
// http://www.cnblogs.com/ 为例
// "//a/@href";
//"//div[@id=‘paging_block‘]/div/a[text()=‘Next >‘]/@href";
//"//div[@id=‘paging_block‘]/div/a[text()*=‘Next‘]/@href";
//"//h1/text()";
//"//h1/allText()";
//"//h1//text()";
//"//div/a";
//"//div[@id=‘post_list‘]/div[position()<3]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[first()]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[1]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[last()]/div/h3/allText()";
////查找评论大于1000的条目(当然只是为了演示复杂xpath了,谓语中可以各种嵌套,这样才能测试的更全面嘛)
//"//div[@id=‘post_list‘]/div[./div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
////轴支持
//"//div[@id=‘post_list‘]/div[self::div/div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[2]/div/p/preceding-sibling::h3/allText()";
//"//div[@id=‘post_list‘]/div[2]/div/p/preceding-sibling::h3/allText()|//div[@id=‘post_list‘]/div[1]/div/h3/allText()";
public static void main(String[] args) throws IOException{
// String xpath="//div[text()=‘工商注册‘]/text()";
// String xpath="//div[@id=‘post_list‘]";
String xpath="//div/span[text()=‘获投信息‘]/parent::*/following-sibling::*[1]/div[1]/div[2]/table[1]/tr[position()>=1]";
// String xpath="//span[@class=‘details_1221_d05_d02_s01‘]/text()|//span[@class=‘details_1221_d05_d02_s02‘]/text()";
// String xpath="//span[@class=‘details_1221_d05_d02_s01‘]|//span[@class=‘details_1221_d05_d02_s02‘]";
// String xpath="//div[@id=‘post_list‘]/div[./div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
//通过URL数据源
/*Document doc = Jsoup.connect("http://www.cnblogs.com/").get();
JXDocument jxDocument = new JXDocument(doc);
List<Object> rs = null;
try {
rs = jxDocument.sel(xpath);
} catch (XpathSyntaxErrorException e) {
e.printStackTrace();
}
for (Object o:rs){
if (o instanceof Element){
int index = ((Element) o).siblingIndex();
System.out.println(index);
}
System.out.println("\n"+o.toString()+"\n");
}*/
//通过本地数据源
// Document doc1 = Jsoup.parse(new File("D:\\Test\\228.html"), "UTF-8");
Document doc1 = Jsoup.parse(new File("D:\\Test\\It桔子\\2.html"), "UTF-8");
JXDocument jxDocument = new JXDocument(doc1);
List<Object> rs1 = null;
try {
rs1 = jxDocument.sel(xpath);
} catch (XpathSyntaxErrorException e) {
e.printStackTrace();
}
//System.out.println("\n"+rs1.toString()+"\n");
for (Object o:rs1){
if (o instanceof Element){
int index = ((Element) o).siblingIndex();
System.out.println(index);
}
System.out.println(o.toString());
}
}
}
标签:new lis 支持 null ref span 注册 [] http
原文地址:http://www.cnblogs.com/tangyongathuse/p/6932631.html