码迷,mamicode.com
首页 > 编程语言 > 详细

JsoupXpathTest.java

时间:2017-06-02 12:38:32      阅读:293      评论:0      收藏:0      [点我收藏+]

标签:new   lis   支持   null   ref   span   注册   []   http   

import cn.wanghaomiao.xpath.exception.NoSuchAxisException;
import cn.wanghaomiao.xpath.exception.XpathSyntaxErrorException;
import cn.wanghaomiao.xpath.model.JXDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.io.File;
import java.io.IOException;
import java.util.List;

/**
 * Created by Administrator on 2017/5/27.
 */

public class JsoupXpathTest {
//    http://www.cnblogs.com/ 为例
//            "//a/@href";
//"//div[@id=‘paging_block‘]/div/a[text()=‘Next >‘]/@href";
//"//div[@id=‘paging_block‘]/div/a[text()*=‘Next‘]/@href";
//"//h1/text()";
//"//h1/allText()";
//"//h1//text()";
//"//div/a";
//"//div[@id=‘post_list‘]/div[position()<3]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[first()]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[1]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[last()]/div/h3/allText()";
////查找评论大于1000的条目(当然只是为了演示复杂xpath了,谓语中可以各种嵌套,这样才能测试的更全面嘛)
//"//div[@id=‘post_list‘]/div[./div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
////轴支持
//"//div[@id=‘post_list‘]/div[self::div/div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
//"//div[@id=‘post_list‘]/div[2]/div/p/preceding-sibling::h3/allText()";
//"//div[@id=‘post_list‘]/div[2]/div/p/preceding-sibling::h3/allText()|//div[@id=‘post_list‘]/div[1]/div/h3/allText()";

    public static void main(String[] args) throws  IOException{
//        String xpath="//div[text()=‘工商注册‘]/text()";
//        String xpath="//div[@id=‘post_list‘]";


        String xpath="//div/span[text()=‘获投信息‘]/parent::*/following-sibling::*[1]/div[1]/div[2]/table[1]/tr[position()>=1]";
//        String xpath="//span[@class=‘details_1221_d05_d02_s01‘]/text()|//span[@class=‘details_1221_d05_d02_s02‘]/text()";
//        String xpath="//span[@class=‘details_1221_d05_d02_s01‘]|//span[@class=‘details_1221_d05_d02_s02‘]";

//        String xpath="//div[@id=‘post_list‘]/div[./div/div/span[@class=‘article_view‘]/a/num()>1000]/div/h3/allText()";
        //通过URL数据源
        /*Document doc = Jsoup.connect("http://www.cnblogs.com/").get();
        JXDocument jxDocument = new JXDocument(doc);
        List<Object> rs = null;
        try {
            rs = jxDocument.sel(xpath);
        } catch (XpathSyntaxErrorException e) {
            e.printStackTrace();
        }
        for (Object o:rs){
            if (o instanceof Element){
                int index = ((Element) o).siblingIndex();
                System.out.println(index);
            }
            System.out.println("\n"+o.toString()+"\n");
        }*/

        //通过本地数据源
//        Document doc1 = Jsoup.parse(new File("D:\\Test\\228.html"), "UTF-8");
        Document doc1 = Jsoup.parse(new File("D:\\Test\\It桔子\\2.html"), "UTF-8");
        JXDocument jxDocument = new JXDocument(doc1);
        List<Object> rs1 = null;
        try {
            rs1 = jxDocument.sel(xpath);
        } catch (XpathSyntaxErrorException e) {
            e.printStackTrace();
        }
        //System.out.println("\n"+rs1.toString()+"\n");
        for (Object o:rs1){
            if (o instanceof Element){
                int index = ((Element) o).siblingIndex();
                System.out.println(index);
            }
            System.out.println(o.toString());
        }


    }
}

  

JsoupXpathTest.java

标签:new   lis   支持   null   ref   span   注册   []   http   

原文地址:http://www.cnblogs.com/tangyongathuse/p/6932631.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!