码迷,mamicode.com
首页 > Web开发 > 详细

Htmlunit使用

时间:2015-04-11 16:28:38      阅读:217      评论:0      收藏:0      [点我收藏+]

标签:

import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.HtmlPage;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.html.HtmlDivision;import com.gargoylesoftware.htmlunit.html.HtmlAnchor;import com.gargoylesoftware.htmlunit.*;import com.gargoylesoftware.htmlunit.WebClientOptions;import com.gargoylesoftware.htmlunit.html.HtmlInput;import com.gargoylesoftware.htmlunit.html.HtmlBody;import java.util.List;public class helloHtmlUnit{    public static void main(String[] args) throws Exception{
        String str;        //创建一个webclient
        WebClient webClient = new WebClient();        //htmlunit 对css和javascript的支持不好,所以请关闭之
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setCssEnabled(false);        //获取页面
        HtmlPage page = webClient.getPage("http://www.baidu.com/");        //获取页面的TITLE
        str = page.getTitleText();
        System.out.println(str);        //获取页面的XML代码
        str = page.asXml();
        System.out.println(str);        //获取页面的文本
        str = page.asText();
        System.out.println(str);        //关闭webclient        webClient.closeAllWindows();
    }
}

3.2 使用不同版本的浏览器打开

import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.HtmlPage;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.html.HtmlDivision;import com.gargoylesoftware.htmlunit.html.HtmlAnchor;import com.gargoylesoftware.htmlunit.*;import com.gargoylesoftware.htmlunit.WebClientOptions;import com.gargoylesoftware.htmlunit.html.HtmlInput;import com.gargoylesoftware.htmlunit.html.HtmlBody;import java.util.List;public class helloHtmlUnit{    public static void main(String[] args) throws Exception{
        String str;        //使用FireFox读取网页
        WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24);        //htmlunit 对css和javascript的支持不好,所以请关闭之
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setCssEnabled(false);
        HtmlPage page = webClient.getPage("http://www.baidu.com/");
        str = page.getTitleText();
        System.out.println(str);        //关闭webclient        webClient.closeAllWindows();
    }
}

3.3 找到页面中特定的元素

public class helloHtmlUnit{    public static void main(String[] args) throws Exception{        //创建webclient
        WebClient webClient = new WebClient(BrowserVersion.CHROME);        //htmlunit 对css和javascript的支持不好,所以请关闭之
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setCssEnabled(false);
        HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/");        //通过id获得"百度一下"按钮
        HtmlInput btn = (HtmlInput)page.getHtmlElementById("su");
        System.out.println(btn.getDefaultValue());        //关闭webclient        webClient.closeAllWindows();
    }
}

3.4 元素检索

public class helloHtmlUnit{    public static void main(String[] args) throws Exception{        //创建webclient
        WebClient webClient = new WebClient(BrowserVersion.CHROME);        //htmlunit 对css和javascript的支持不好,所以请关闭之
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setCssEnabled(false);
        HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/");        //查找所有div
        List<?> hbList = page.getByXPath("//div");
        HtmlDivision hb = (HtmlDivision)hbList.get(0);
        System.out.println(hb.toString());        //查找并获取特定input
        List<?> inputList = page.getByXPath("//input[@id=‘su‘]");
        //List links = (List) page.getByXPath ("//*[@id=\"groups_tab\"]/div[1]/ul/li[1]/a"); 
        HtmlInput input = (HtmlInput)inputList.get(0);
        System.out.println(input.toString());        //关闭webclient        webClient.closeAllWindows();
    }
}

3.5 提交搜索

public class helloHtmlUnit{    public static void main(String[] args) throws Exception{        //创建webclient
        WebClient webClient = new WebClient(BrowserVersion.CHROME);        //htmlunit 对css和javascript的支持不好,所以请关闭之
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.getOptions().setCssEnabled(false);
        HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/");        //获取搜索输入框并提交搜索内容
        HtmlInput input = (HtmlInput)page.getHtmlElementById("kw");
        System.out.println(input.toString());
        input.setValueAttribute("雅蠛蝶");
        System.out.println(input.toString());        //获取搜索按钮并点击
        HtmlInput btn = (HtmlInput)page.getHtmlElementById("su");
        HtmlPage page2 = btn.click();        //输出新页面的文本        System.out.println(page2.asText());
    }
}


Htmlunit使用

标签:

原文地址:http://my.oschina.net/u/852445/blog/398931

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!