首先,源代码下载地址:http://down.51cto.com/data/2270088
然后,如果有问题,或者是积分不够可以发送邮件到我的邮箱whsgzcy@foxmail.com,我可以把源程序分享给大家,如果对用google抓取接口不熟悉的话,也可以直接找我,我的手机号码是:15656098064,很高兴能和各位同行分享,如果是打电话要我上厕所,我可以要骂人的呦~。
最后,直接上代码。
package com.iwant.download2geodata; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import javax.naming.InitialContext; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import com.iwant.download2geodata.data.ShopInfo; import com.iwant.download2geodata.data.ShopList; import com.iwant.download2geodata.data.TemplateData; import jxl.Workbook; import jxl.write.Label; import jxl.write.WritableSheet; import jxl.write.WritableWorkbook; import net.sf.json.JSONArray; import net.sf.json.JSONException; import net.sf.json.JSONObject; /** * @Description: 获取高德所有数据 * @author: whsgzcy * @date: 2016-12-17 下午1:04:31 首先是从高德copy一个链接,只要修改pagernum参数即可拼接成新的链接 * 抓取方式较原始,但可维护性高 */ public class GeoGetDataThread extends Thread { public int pagernum = 1;// 跳转到下一页参数 public String murl = "http://ditu.amap.com/service/poiInfo?query_type=TQUERY&pagesize=20&pagenum="; public String nurl = "&qii=true&cluster_state=5&need_utd=true&utd_sceneid=1000&" + "div=PC1000&addr_poi_merge=true&is_classify=true&" + "city=321200&geoobj=119.848676%7C32.40107%7C119.93279%7C32.527222&keywords=%E7%94%B5%E5%8A%A8%E8%BD%A6"; private HSSFWorkbook workbook = null; private ShopList shopList = new ShopList(); private List<ShopInfo> shopInfoList = new ArrayList<ShopInfo>(); private List<TemplateData> templateDataList = new ArrayList<TemplateData>(); /** * @Description: 根据pagernum跳转下一页 * @author: whsgzcy * @date: 2016-12-17 下午1:17:08 * @param pagernum */ @Override public void run() { super.run(); // // /先用本次文件进行测试 // String encoding = "UTF-8"; // StringBuilder builder = null; // try { // File file = new File("C:\\Users\\suzhe\\Desktop\\t.txt"); // InputStreamReader read = new InputStreamReader(new FileInputStream( // file), encoding);// 考虑到编码格式 // BufferedReader bufferedReader = new BufferedReader(read); // builder = new StringBuilder(); // for (String s = bufferedReader.readLine(); s != null; s = bufferedReader // .readLine()) { // builder.append(s); // } // } catch (Exception e) { // e.printStackTrace(); // } // if (null == builder) return; // jsonObject = JSONObject.fromObject(builder.toString()); // 请求数据 // 根据现有的数据量没有到list极限 // 使用list存储数据,并在每次请求数据时休眠5秒,放缓线程以及防止扒取数据IP被封 try { for(;;pagernum++){ // 线程休眠 // try {Thread.sleep(5000);} catch (InterruptedException e1) {e1.printStackTrace();} String url = murl+pagernum+nurl; System.out.println(url); JSONObject jsonObject = new JSONObject(); jsonObject = HttpRequestUtil.getJsonObject(url); // 先打印count 判断是否有数据 int count = jsonObject.getInt("count"); if(0 == count) break; System.out.println("count = " + count); if (0 != count) { JSONArray dataArray = jsonObject.getJSONArray("data"); JSONArray listArray = dataArray.getJSONObject(0).getJSONArray("list"); JSONObject line = null; for (int i = 0; i < listArray.size(); i++) { ShopInfo shopInfo = new ShopInfo(); TemplateData template = new TemplateData(); line = listArray.getJSONObject(i); String name = line.getString("name"); shopInfo.setName(name); String tel = line.getString("tel"); shopInfo.setTel(tel); String address = line.getString("address"); shopInfo.setAddress(address); String longitude = line.getString("longitude"); shopInfo.setLongitude(longitude); String latitude = line.getString("latitude"); shopInfo.setLatitude(latitude); JSONObject templateData = line.getJSONObject("templateData"); if (templateData.has("pic_info")) { String pic_info = templateData.getString("pic_info"); template.setPic_info(pic_info); templateDataList.add(template); shopInfo.setTemplateData(templateDataList); }else{ template.setPic_info(""); templateDataList.add(template); shopInfo.setTemplateData(templateDataList); } shopInfoList.add(shopInfo); System.out.println("数据读写中"); } // 至此 第一页请求数据完成 shopList.setmShopInfoList(shopInfoList); } // 将数据写进Excel String title[] = {"店铺名称","店主","店铺简介","店铺电话","地址","经度","纬度","101充电桩","10有充电桩","照片","是否提供上门服务","图片链接"}; createExcel("C:/Users/suzhe/Desktop/test2.xls","sheet1",title); try { workbook = new HSSFWorkbook(new FileInputStream("C:/Users/suzhe/Desktop/test2.xls")); //流 FileOutputStream out = null; HSSFSheet sheet = workbook.getSheet("sheet1"); // 获取表格的总行数 int rowCount = sheet.getLastRowNum() + 1; // 需要加一 // 获取表头的列数 int columnCount = sheet.getRow(0).getLastCellNum(); Row row = sheet.createRow(rowCount); //最新要添加的一行 HSSFRow titleRow = sheet.getRow(0); if(titleRow!=null){ for(int i = 0; i < shopList.getmShopInfoList().size(); i++){ row = sheet.createRow(i+1); //最新要添加的一行 for(int k = 0; k < 12; k++){ String name = shopInfoList.get(i).getName(); Cell cellName = row.createCell(0); cellName.setCellValue(name); Cell cellMaster = row.createCell(1); cellMaster.setCellValue("无"); Cell cellSay = row.createCell(2); cellSay.setCellValue("无"); String tel = shopInfoList.get(i).getTel(); Cell cellTel = row.createCell(3); cellTel.setCellValue(tel); String address = shopInfoList.get(i).getAddress(); Cell cellAddress = row.createCell(4); cellAddress.setCellValue(address); String longitude = shopInfoList.get(i).getLongitude(); Cell cellLongitude = row.createCell(5); cellLongitude.setCellValue(longitude); String latitude = shopInfoList.get(i).getLatitude(); Cell cellLatitude = row.createCell(6); cellLatitude.setCellValue(latitude); Cell cellIsCharge = row.createCell(7); cellIsCharge.setCellValue("0"); Cell cellHaveCharge = row.createCell(8); cellHaveCharge.setCellValue("0"); String pic_info = shopInfoList.get(i).getTemplateData().get(i).getPic_info(); if(pic_info.equals("")){ Cell cellPicUrl= row.createCell(11); cellPicUrl.setCellValue(""); Cell cellPic = row.createCell(9); cellPic.setCellValue(""); }else{ Cell cellPicUrl= row.createCell(11); cellPicUrl.setCellValue(pic_info); Cell cellPic = row.createCell(9); cellPic.setCellValue("taizhou/"+(i+1)+".jpg"); } Cell cellHome = row.createCell(10); cellHome.setCellValue("暂不提供上门服务"); } } } out = new FileOutputStream("C:/Users/suzhe/Desktop/test2.xls"); workbook.write(out); } catch (Exception e) { e.printStackTrace(); } } } catch (JSONException e) { e.printStackTrace(); } } public void writeToExcel(String fileDir,String sheetName){ //创建workbook File file = new File(fileDir); try { workbook = new HSSFWorkbook(new FileInputStream(file)); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } //流 FileOutputStream out = null; HSSFSheet sheet = workbook.getSheet(sheetName); // 获取表格的总行数 int rowCount = sheet.getLastRowNum() + 1; // 需要加一 // 获取表头的列数 int columnCount = sheet.getRow(0).getLastCellNum(); try { Row row = sheet.createRow(rowCount); //最新要添加的一行 //通过反射获得object的字段,对应表头插入 // 获取该对象的class对象 // Class class_ = object.getClass(); // 获得表头行对象 HSSFRow titleRow = sheet.getRow(0); if(titleRow!=null){ for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { //遍历表头 String title = titleRow.getCell(columnIndex).toString().trim().toString().trim(); Cell cell1 = row.createCell(0); cell1.setCellValue("111"); Cell cell2 = row.createCell(1); cell2.setCellValue("222"); Cell cell3 = row.createCell(2); cell3.setCellValue("333"); } } out = new FileOutputStream(fileDir); workbook.write(out); } catch (Exception e) { e.printStackTrace(); } finally { try { out.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * 创建新excel. * @param fileDir excel的路径 * @param sheetName 要创建的表格索引 * @param titleRow excel的第一行即表格头 */ public void createExcel(String fileDir,String sheetName,String titleRow[]){ //创建workbook workbook = new HSSFWorkbook(); //添加Worksheet(不添加sheet时生成的xls文件打开时会报错) Sheet sheet1 = workbook.createSheet(sheetName); //新建文件 FileOutputStream out = null; try { //添加表头 Row row = workbook.getSheet(sheetName).createRow(0); //创建第一行 for(int i = 0;i < titleRow.length;i++){ Cell cell = row.createCell(i); cell.setCellValue(titleRow[i]); } out = new FileOutputStream(fileDir); workbook.write(out); } catch (Exception e) { e.printStackTrace(); } finally { try { out.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * * @Description:读取本地文件---/Users/whsgzcy/Desktop/t.txt * @author: whsgzcy * @date: 2016-12-17 下午5:35:21 * @param filePath * void * @throws */ public static void readTxtFile(String filePath) { try { String encoding = "UTF-8"; File file = new File(filePath); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader( new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; while ((lineTxt = bufferedReader.readLine()) != null) { System.out.println(lineTxt); } read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } } public static void main(String[] args) { GeoGetDataThread dg = new GeoGetDataThread(); dg.start(); } }
package com.iwant.download2geodata; import java.io.BufferedReader; import java.io.InputStreamReader; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; import net.sf.json.JSONObject; /** * @Description: * @author: whsgzcy * @date: 2016-12-17 下午1:30:05 * */ public class HttpRequestUtil { /** * @Description:纯get请求 * @author: whsgzcy * @date: 2016-12-17 下午1:51:14 * @param url * @return * JSONObject * @throws */ public static JSONObject getJsonObject(String url) { JSONObject jsonObject = null; try { HttpClient httpClient = new DefaultHttpClient(); HttpGet httpGet = new HttpGet(url); HttpParams httpParams = httpClient.getParams(); HttpConnectionParams.setConnectionTimeout(httpParams, 5000); HttpResponse response = httpClient.execute(httpGet); StringBuilder builder = new StringBuilder(); BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(response.getEntity().getContent(), "utf-8")); for (String s = bufferedReader.readLine(); s != null; s = bufferedReader .readLine()) { builder.append(s); } jsonObject = JSONObject.fromObject(builder.toString()); } catch (Exception e) { e.printStackTrace(); jsonObject = null; } return jsonObject; } }
package com.iwant.download2geodata.data; import java.io.Serializable; import java.util.List; /** * @Description: * @author: whsgzcy * @date: 2016-12-17 下午2:07:25 * */ public class ShopInfo implements Serializable{ private String name; private String tel; private String address; private String longitude;//经度 private String latitude;//纬度 private List<TemplateData> templateData;// 图片 public String getName() { return name; } public void setName(String name) { this.name = name; } public String getTel() { return tel; } public void setTel(String tel) { this.tel = tel; } public String getAddress() { return address; } public void setAddress(String address) { this.address = address; } public String getLongitude() { return longitude; } public void setLongitude(String longitude) { this.longitude = longitude; } public String getLatitude() { return latitude; } public void setLatitude(String latitude) { this.latitude = latitude; } public List<TemplateData> getTemplateData() { return templateData; } public void setTemplateData(List<TemplateData> templateData) { this.templateData = templateData; } }
package com.iwant.download2geodata.data; import java.io.Serializable; import java.util.List; /** * @Description: * @author: whsgzcy * @date: 2016-12-17 下午2:40:12 * */ public class ShopList implements Serializable{ private List<ShopInfo> mShopInfoList; public List<ShopInfo> getmShopInfoList() { return mShopInfoList; } public void setmShopInfoList(List<ShopInfo> mShopInfoList) { this.mShopInfoList = mShopInfoList; } }
package com.iwant.download2geodata.data; import java.io.Serializable; /** * @Description: * @author: whsgzcy * @date: 2016-12-17 下午2:20:13 * */ public class TemplateData implements Serializable{ private String pic_info; public String getPic_info() { return pic_info; } public void setPic_info(String pic_info) { this.pic_info = pic_info; } }
很简单,懂java的应该都能看懂,在此提出一个设计思路,我代码中屏蔽了一个睡眠线程,是防止我的IP被屏蔽,如果加入sleep,那么,线程就会在这段时间执行主线程上的逻辑,我是一次性通过,就懒得加了,有兴趣的同学可以将这块加上,thanks a lot。
本文出自 “7851921” 博客,请务必保留此出处http://7861921.blog.51cto.com/7851921/1883764
原文地址:http://7861921.blog.51cto.com/7851921/1883764