标签:new lis 支持 null ref span 注册 [] http
import cn.wanghaomiao.xpath.exception.NoSuchAxisException;
import cn.wanghaomiao.xpath.exception.XpathSyntaxErrorException;
import cn.wanghaomiao.xpath.model.JXDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.File;
import java.io.IOException;
import java.util.List;
/**
 * Created by Administrator on 2017/5/27.
 */
public class JsoupXpathTest {
//    http://www.cnblogs.com/ 为例
//            "//a/@href";
//"//div[@id=‘paging_block‘]/div/a[text()=‘Next >‘]/@href";
//"//div[@id=‘paging_block‘]/div/a[text()*=‘Next‘]/@href";
//"//h1/text()";
//"//h1/allText()";
//"//h1//text()";
//"//div/a";
//"//div[@id=‘post_list‘]/div[position()<3]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[first()]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[1]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[last()]/div/h3/allText()";
////查找评论大于1000的条目(当然只是为了演示复杂xpath了,谓语中可以各种嵌套,这样才能测试的更全面嘛)
//"//div[@id=‘post_list‘]/div[./div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
////轴支持
//"//div[@id=‘post_list‘]/div[self::div/div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[2]/div/p/preceding-sibling::h3/allText()";
//"//div[@id=‘post_list‘]/div[2]/div/p/preceding-sibling::h3/allText()|//div[@id=‘post_list‘]/div[1]/div/h3/allText()";
    public static void main(String[] args) throws  IOException{
//        String xpath="//div[text()=‘工商注册‘]/text()";
//        String xpath="//div[@id=‘post_list‘]";
        String xpath="//div/span[text()=‘获投信息‘]/parent::*/following-sibling::*[1]/div[1]/div[2]/table[1]/tr[position()>=1]";
//        String xpath="//span[@class=‘details_1221_d05_d02_s01‘]/text()|//span[@class=‘details_1221_d05_d02_s02‘]/text()";
//        String xpath="//span[@class=‘details_1221_d05_d02_s01‘]|//span[@class=‘details_1221_d05_d02_s02‘]";
//        String xpath="//div[@id=‘post_list‘]/div[./div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
        //通过URL数据源
        /*Document doc = Jsoup.connect("http://www.cnblogs.com/").get();
        JXDocument jxDocument = new JXDocument(doc);
        List<Object> rs = null;
        try {
            rs = jxDocument.sel(xpath);
        } catch (XpathSyntaxErrorException e) {
            e.printStackTrace();
        }
        for (Object o:rs){
            if (o instanceof Element){
                int index = ((Element) o).siblingIndex();
                System.out.println(index);
            }
            System.out.println("\n"+o.toString()+"\n");
        }*/
        //通过本地数据源
//        Document doc1 = Jsoup.parse(new File("D:\\Test\\228.html"), "UTF-8");
        Document doc1 = Jsoup.parse(new File("D:\\Test\\It桔子\\2.html"), "UTF-8");
        JXDocument jxDocument = new JXDocument(doc1);
        List<Object> rs1 = null;
        try {
            rs1 = jxDocument.sel(xpath);
        } catch (XpathSyntaxErrorException e) {
            e.printStackTrace();
        }
        //System.out.println("\n"+rs1.toString()+"\n");
        for (Object o:rs1){
            if (o instanceof Element){
                int index = ((Element) o).siblingIndex();
                System.out.println(index);
            }
            System.out.println(o.toString());
        }
    }
}
标签:new lis 支持 null ref span 注册 [] http
原文地址:http://www.cnblogs.com/tangyongathuse/p/6932631.html