标签:java 网页抓取
HTTP请求方式有GET,POST两种。这里使用GET方式,请求"https://www.baidu.com"。
引进 appache 的httpclient包,下载地址http://hc.apache.org/httpcomponents-client-4.5.x/download.html
先贴代码
package Test; import java.io.IOException; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; public class Test1 { public static void main(String[] args) { // 创建httpCilent客户端 CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建get方法,目标url为百度 HttpGet hg = new HttpGet("http://baidu.com"); // 打印获取的html内容 httpGetMethod(httpClient, hg); // 释放 hg.releaseConnection(); } public static void httpGetMethod(CloseableHttpClient httpClient, HttpGet hg) { try { //执行请求 HttpResponse resp = httpClient.execute(hg); //获取请求结果的html 实体 HttpEntity entity = resp.getEntity(); // 使用EntityUtils toString方法将entity转换为String ,编码为gbk String entitString = EntityUtils.toString(entity, "gbk"); System.out.println("获取的网页内容:\n" + entitString); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
本文出自 “爱技术,爱生活” 博客,请务必保留此出处http://youmiao.blog.51cto.com/6833914/1727852
标签:java 网页抓取
原文地址:http://youmiao.blog.51cto.com/6833914/1727852