码迷,mamicode.com
首页 > Web开发 > 详细

jsoup抓取借书记录

时间:2015-12-21 23:17:51      阅读:298      评论:0      收藏:0      [点我收藏+]

标签:

  1 package tushuguan; 
  2 
  3 import java.io.IOException;  
  4 import java.util.ArrayList;  
  5 import java.util.HashMap;  
  6 import java.util.Iterator;  
  7 import java.util.List;  
  8 import java.util.Set;  
  9   
 10 import org.apache.http.Header;  
 11 import org.apache.http.HeaderElement;  
 12 import org.apache.http.HttpEntity;  
 13 import org.apache.http.HttpResponse;  
 14 import org.apache.http.NameValuePair;  
 15 import org.apache.http.ParseException;  
 16 import org.apache.http.client.ClientProtocolException;  
 17 import org.apache.http.client.entity.UrlEncodedFormEntity;  
 18 import org.apache.http.client.methods.HttpGet;  
 19 import org.apache.http.client.methods.HttpPost;  
 20 import org.apache.http.client.params.ClientPNames;  
 21 import org.apache.http.impl.client.DefaultHttpClient;  
 22 import org.apache.http.message.BasicNameValuePair;  
 23 import org.apache.http.util.EntityUtils;  
 24 import org.jsoup.Jsoup;  
 25 import org.jsoup.nodes.Document;  
 26 import org.jsoup.nodes.Element;  
 27 import org.jsoup.select.Elements;  
 28   
 29 public class tushuguan {  
 30    private static String LoginUrl = "http://222.200.98.171:81/login.aspx";  
 31    private static String Host = "http://222.200.98.171:81";  
 32    private static String mainUrl = "";  
 33    private static String borrowedBooksUrl = "";  
 34    private static String cookie = "";  
 35    private static String location = "";  
 36  
 37    /** 
 38     * @param args 
 39     */  
 40    public static void main(String[] args) {  
 41        // TODO Auto-generated method stub  
 42        getMyBorrowedBooks();  
 43    }  
 44  
 45    public static void getMyBorrowedBooks() {  
 46        try {  
 47            Document document = Jsoup.parse(login());  
 48            Elements elements1 = document  
 49                    .getElementsContainingOwnText("当前借阅情况和续借");// 通过text关键字找到所要的<a>标签  
 50            String url = elements1.first().attr("href");  
 51            for(int i=1;i<=4;i++){
 52            borrowedBooksUrl = "http://222.200.98.171:81/user/bookborrowedhistory.aspx?page="+i;// 取值和mainUrl进行拼凑组织借阅情况地址  
 53            System.out.println("链接如下:"+borrowedBooksUrl);
 54            getBookBorrowedData(getHtml(borrowedBooksUrl));  
 55            }
 56  
 57        } catch (IOException e) {  
 58            // TODO Auto-generated catch block  
 59            e.printStackTrace();  
 60        }  
 61    }  
 62  
 63    /** 
 64     * 获取借书情况具体数据(List<BookEntity>) 
 65     *  
 66     * @param src 
 67     * @return List<BookEntity> 
 68     */  
 69    private static List<BookEntity> getBookBorrowedData(String src) {  
 70        List<BookEntity> data = new ArrayList<BookEntity>();  
 71        Document document = Jsoup.parse(src);  
 72        Element element = document.select("[id=UserMasterRight]").first()  
 73                .getElementsByTag("table").first();  
 74        Elements elements2 = element.getElementsByTag("tr");  
 75        for (Element temp2 : elements2) {  
 76            Elements elements3 = temp2.getElementsByTag("td");  
 77            BookEntity entity = new tushuguan().new BookEntity()  
 78                    .setIsFullData(elements3.get(4).text())  
 79                    .setData2Return(elements3.get(1).text())  
 80                    .setName(elements3.get(2).text())  
 81                    .setData2Borrowed(elements3.get(0).text());  
 82            data.add(entity);  
 83  
 84        }  
 85        data.remove(0);  
 86        System.out.println("借书情况\n");  
 87  
 88        for (BookEntity temp : data) {  
 89            System.out.println(temp.getName() + "\n" + temp.getData2Borrowed()  
 90                    + "\n" + temp.getData2Return() + "\n"  
 91                    + temp.getIsFullData());  
 92        }  
 93        return data;  
 94  
 95    }  
 96  
 97    /** 
 98     * 图书馆登陆 
 99     *  
100     * @param context 
101     * @return 返回登陆后的界面Html代码 
102     * @throws ClientProtocolException 
103     * @throws IOException 
104     */  
105    public static String login() throws ClientProtocolException, IOException {  
106        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
107        parmasList = initLoginParmas("3113003802", "092137");  
108        HttpPost post = new HttpPost(LoginUrl);  
109        post.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, false);  
110        // 阻止自动重定向,目的是获取第一个ResponseHeader的Cookie和Location  
111        post.setHeader("Content-Type",  
112                "application/x-www-form-urlencoded;charset=gbk");  
113        // 设置编码为GBK  
114        post.setEntity(new UrlEncodedFormEntity(parmasList, "GBK"));  
115        HttpResponse response = new DefaultHttpClient().execute(post);  
116        cookie = response.getFirstHeader("Set-Cookie").getValue();  
117        // 取得cookie并保存起来  
118        // System.out.println("cookie= " + cookie);  
119        location = response.getFirstHeader("Location").getValue();  
120        // 重定向地址,目的是连接到主页  
121        mainUrl = Host + location;  
122        // 构建主页地址  
123        String html = getHtml(mainUrl);  
124        return html;  
125  
126    }  
127  
128    /** 
129     * 获取网页HTML源代码 
130     *  
131     * @param url 
132     * @return  
133     * @throws ParseException 
134     * @throws IOException 
135     */  
136  
137    private static String getHtml(String url) throws ParseException,  
138            IOException {  
139        // TODO Auto-generated method stub  
140        HttpGet get = new HttpGet(url);  
141        if ("" != cookie) {  
142            get.addHeader("Cookie", cookie);  
143        }  
144        HttpResponse httpResponse = new DefaultHttpClient().execute(get);  
145        HttpEntity entity = httpResponse.getEntity();  
146        return EntityUtils.toString(entity);  
147    }  
148  
149    /** 
150     * 初始化参数 
151     *  
152     * @param userName 
153     * @param passWord 
154     * @return  
155     * @throws ParseException 
156     * @throws IOException 
157     */  
158    public static List<NameValuePair> initLoginParmas(String userName,  
159            String passWord) throws ParseException, IOException {  
160        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
161        HashMap<String, String> parmasMap = getLoginFormData(LoginUrl);  
162        Set<String> keySet = parmasMap.keySet();  
163  
164        for (String temp : keySet) {  
165            if (temp.contains("Username")) {  
166                parmasMap.put(temp, userName);  
167            } else if (temp.contains("txtPas")) {  
168                parmasMap.put(temp, passWord);  
169            }  
170        }  
171  
172        Set<String> keySet2 = parmasMap.keySet();  
173        System.out.println("表单内容:");  
174        for (String temp : keySet2) {  
175            System.out.println(temp + " = " + parmasMap.get(temp));  
176        }  
177        for (String temp : keySet2) {  
178            parmasList.add(new BasicNameValuePair(temp, parmasMap.get(temp)));  
179        }  
180  
181        // System.out.println("initParams \n" + parmasMap);  
182  
183        return parmasList;  
184  
185    }  
186  
187    /** 
188     * 获取登录表单input内容 
189     *  
190     * @param url 
191     * @return  
192     * @throws IOException 
193     * @throws ParseException 
194     */  
195    public static HashMap<String, String> getLoginFormData(String url)  
196            throws ParseException, IOException {  
197        Document document = Jsoup.parse(getHtml(url));  
198        Elements element1 = document.getElementsByTag("form");// 找出所有form表单  
199        Element element = element1.select("[method=post]").first();// 筛选出提交方法为post的表单  
200        Elements elements = element.select("input[name]");// 把表单中带有name属性的input标签取出  
201        HashMap<String, String> parmas = new HashMap<String, String>();  
202        for (Element temp : elements) {  
203            parmas.put(temp.attr("name"), temp.attr("value"));// 把所有取出的input,取出其name,放入Map中  
204        }  
205        return parmas;  
206    }  
207  
208    class BookEntity {  
209        /** 
210         * 书名 
211         *  
212         */  
213        private String name;  
214        /** 
215         * 可借数 
216         */  
217        private String leandableNum;  
218        /** 
219         * 索引号 
220         */  
221        private String callNumber;  
222        /** 
223         * 作者 
224         */  
225        private String writer;  
226        /** 
227         * 出版社 
228         */  
229        private String publisher;  
230        /** 
231         * 还书时间 
232         */  
233        private String data2Return;  
234        /** 
235         * 借书时间 
236         */  
237        private String data2Borrowed;  
238        /** 
239         * 是否续满 
240         */  
241        private String isFullData;  
242  
243        public BookEntity() {  
244  
245        }  
246  
247        public String getName() {  
248            return name;  
249        }  
250  
251        public String getLeandableNum() {  
252            return leandableNum;  
253        }  
254  
255        public String getCallNumber() {  
256            return callNumber;  
257        }  
258  
259        public String getWriter() {  
260            return writer;  
261        }  
262  
263        public String getPublisher() {  
264            return publisher;  
265        }  
266  
267        public BookEntity setName(String name) {  
268            this.name = name;  
269            return this;  
270        }  
271  
272        public BookEntity setLeandableNum(String leandableNum) {  
273            this.leandableNum = leandableNum;  
274            return this;  
275        }  
276  
277        public BookEntity setCallNumber(String callNumber) {  
278            this.callNumber = callNumber;  
279            return this;  
280        }  
281  
282        public BookEntity setWriter(String writer) {  
283            this.writer = writer;  
284            return this;  
285        }  
286  
287        public BookEntity setPublisher(String publisher) {  
288            this.publisher = publisher;  
289            return this;  
290        }  
291  
292        public String getData2Return() {  
293            return data2Return;  
294        }  
295  
296        public String getData2Borrowed() {  
297            return data2Borrowed;  
298        }  
299  
300        public String getIsFullData() {  
301            return isFullData;  
302        }  
303  
304        public BookEntity setData2Return(String data2Return) {  
305            this.data2Return = data2Return;  
306            return this;  
307        }  
308  
309        public BookEntity setData2Borrowed(String data2Borrowed) {  
310            this.data2Borrowed = data2Borrowed;  
311            return this;  
312        }  
313  
314        public BookEntity setIsFullData(String isFullData) {  
315            this.isFullData = isFullData;  
316            return this;  
317        }  
318  
319    }  
320  
321 }  

技术分享

结果如下:

表单内容:
__VIEWSTATE = /wEPDwULLTE0MjY3MDAxNzcPZBYCZg9kFgoCAQ8PFgIeCEltYWdlVXJsBRt+XGltYWdlc1xoZWFkZXJvcGFjNGdpZi5naWZkZAICDw8WAh4EVGV4dAUt5bm/5Lic5bel5Lia5aSn5a2m5Zu+5Lmm6aaG5Lmm55uu5qOA57Si57O757ufZGQCAw8PFgIfAQUcMjAxNeW5tDEy5pyIMjHml6UgIOaYn+acn+S4gGRkAgQPZBYEZg9kFgQCAQ8WAh4LXyFJdGVtQ291bnQCCBYSAgEPZBYCZg8VAwtzZWFyY2guYXNweAAM55uu5b2V5qOA57SiZAICD2QWAmYPFQMTcGVyaV9uYXZfY2xhc3MuYXNweAAM5YiG57G75a+86IiqZAIDD2QWAmYPFQMOYm9va19yYW5rLmFzcHgADOivu+S5puaMh+W8lWQCBA9kFgJmDxUDCXhzdGIuYXNweAAM5paw5Lmm6YCa5oqlZAIFD2QWAmYPFQMUcmVhZGVycmVjb21tZW5kLmFzcHgADOivu+iAheiNkOi0rWQCBg9kFgJmDxUDE292ZXJkdWVib29rc19mLmFzcHgADOaPkOmGkuacjeWKoWQCBw9kFgJmDxUDEnVzZXIvdXNlcmluZm8uYXNweAAP5oiR55qE5Zu+5Lmm6aaGZAIID2QWAmYPFQMbaHR0cDovL2xpYnJhcnkuZ2R1dC5lZHUuY24vAA/lm77kuabppobpppbpobVkAgkPZBYCAgEPFgIeB1Zpc2libGVoZAIDDxYCHwJmZAIBD2QWBAIDD2QWBAIBDw9kFgIeDGF1dG9jb21wbGV0ZQUDb2ZmZAIHDw8WAh8BZWRkAgUPZBYGAgEPEGRkFgFmZAIDDxBkZBYBZmQCBQ8PZBYCHwQFA29mZmQCBQ8PFgIfAQWlAUNvcHlyaWdodCAmY29weTsyMDA4LTIwMDkuIFNVTENNSVMgT1BBQyA0LjAxIG9mIFNoZW56aGVuIFVuaXZlcnNpdHkgTGlicmFyeS4gIEFsbCByaWdodHMgcmVzZXJ2ZWQuPGJyIC8+54mI5p2D5omA5pyJ77ya5rex5Zyz5aSn5a2m5Zu+5Lmm6aaGIEUtbWFpbDpzenVsaWJAc3p1LmVkdS5jbmRkZBFPBFe3T/k7AJVSx8iKDmNVbdHT
ctl00$ContentPlaceHolder1$txtPas_Lib = 092137
ctl00$ContentPlaceHolder1$btnLogin_Lib = 登录
ctl00$ContentPlaceHolder1$txtlogintype = 0
ctl00$ContentPlaceHolder1$txtUsername_Lib = 3113003802
__EVENTVALIDATION = /wEWBQKs47i8AwKOmK5RApX9wcYGAsP9wL8JAqW86pcIDebecgohSzUlmvgecvTU4k49zAw=
链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=1
借书情况

回乡记 [专著]/贺雪峰主编
2015-09-01
2015-10-15
A3226253
土木工程CAD基础 [专著]:AutoCAD软件基础教程=CAD foundation of civil engineering:AutoCAD software basic course/邓芃主编
2015-07-20
2015-10-15
A3138201
李光耀传 [专著]/凌翔著
2015-07-20
2015-10-15
A3210306
工程CAD基础理论与上机操作习题集 [专著]/于奕峰,杨松林主编
2015-07-20
2015-10-15
A3258522
消失的17岁 [专著]/(美) 诺瓦·伦·苏玛著=17 & gone/Nova Ren Suma;刘丽洁译
2015-06-03
2015-09-01
A3213437
汤姆叔叔的小屋 [专著]=Uncle tom‘s cabin:插图·中文导读英文版/(美)比彻·斯托夫人著;王勋,纪飞等编译
2015-03-27
2015-06-01
A3002490
商务口译 [专著]=Business interpreting/刘建珠主编
2015-03-27
2015-06-01
A3003500
2014年季度精选集 [汇编]·春季卷/《读者·乡土人文版》编辑部主编
2015-03-27
2015-06-01
A3210150
可口可乐不规则营销 [专著]/(美)洛威尔著;龙文元译
2015-03-17
2015-06-16
A1501833
工程经济学 [专著]/关罡, 郝彤主编
2015-03-17
2015-04-29
A3109697
链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=2
借书情况

讴歌母爱 关注人生 [专著]:冰心小说全集/冰 心著
2015-03-03
2015-06-01
A5143376
林徽因小说:九十九度中/林徽因[著];陈学勇编选
2015-03-03
2015-06-01
A5188772
骆驼祥子·黑白李 [专著]/老舍著
2015-03-03
2015-06-01
A0957524
1937年的爱情 [专著]/叶兆言著
2014-11-27
2015-01-10
A1509614
理工大风流往事 [专著]/zt著
2014-11-27
2014-12-16
A1847222
酒殇 [专著]:一个酒业王国的兴衰/杨小凡著
2014-11-27
2015-01-10
A1948680
那时年少 [专著]/一草著
2014-11-27
2014-12-16
A2992422
不能承受的生命之轻 [专著]/(捷克斯洛伐克)米兰·昆德拉(Milan Kundera)著=L‘insoutenable legerete de l‘etre/许钧译
2014-11-18
2015-01-10
A0520872
读者精华本 [汇编]/万文海主编
2014-11-18
2015-01-10
A1547276
谁在让子弹飞 [专著]/曹保印著
2014-11-18
2014-12-16
A3147373
链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=3
借书情况

孤独是不人道的 [专著]/郭鹏著
2014-11-18
2014-12-16
A3147367
且听风吟 [专著]/(日)村上春树著;林少华译
2014-09-23
2014-11-10
A2516969
可怕的巧合 [专著]/石岩编著
2014-09-23
2014-11-13
A3158433
你好,总统 [专著]:乌戈·查韦斯与他的委内瑞拉=Comandante:inside Hugo Chavez‘s venezuela/(英)洛里·卡洛尔(Rory Carroll)著;徐天鹏译
2014-09-23
2014-11-13
A3129490
肝胆相照 [专著]:吴孟超传/方鸿辉著
2014-09-23
2014-11-10
A3139385
林徽因经典作品 [专著]:你是人间的四月天九十九度中/林徽因著
2014-05-19
2014-07-10
A2386519
梁思成的山河岁月 [专著]/林与舟编著
2014-05-19
2014-05-27
A1210449
人物中国 [汇编]/龚莉主编;《人物中国》编委会编
2014-05-19
2014-07-10
A2603584
百年大案追踪 [专著]/郭学德,崔爱鹏,李海涛著
2014-04-24
2014-06-11
A0283139
聚焦名人名案 [专著]/窦欣平,叶知秋著
2014-04-24
2014-06-11
A0547714
链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=4
借书情况

孙子兵法经典故事 [专著]/李济生编著
2014-04-24
2014-06-17
A0565277
危险游戏 [汇编]:典型犯罪案例评说/郭春孚,张翔鹰主编
2014-04-24
2014-06-03
A1360621
家庭常用药物手册 [专著]/白禾夏主编
2014-03-17
2014-03-25
A0483737
药用观赏植物栽培与利用 [专著]/张永清编著
2014-03-17
2014-04-13
A0614935
排毒不如无毒 [专著]:远离生活中的有毒物质/(美) 黛布拉·林恩·戴德著 ;常媛译=Toxic free: how to protect your health and home from the chemicals that are making you sick
2014-03-17
2014-03-25
A3116154
新版以案说法 [专著]/曾宪义总主编
2014-02-27
2014-03-17
A1595640
飞去的诗人:徐志摩传 [专著]/展望之,张方晦著
2014-02-25
2014-03-20
A8152588
高四凶猛 [专著]/耿萧著
2014-02-25
2014-02-27
A0547642

其实我是转载改了点东西而已:http://my.oschina.net/dfsfsdf/blog/116279?fromerr=jQsroe5A

jsoup抓取借书记录

标签:

原文地址:http://www.cnblogs.com/w1570631036/p/5065140.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!