标签:bsp escape decode epo ons nec htm out group
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.4</version> </dependency> --> public static String getNextPage(String web) throws Exception { HttpComponentsClientHttpRequestFactory factory=new HttpComponentsClientHttpRequestFactory(); // factory.setConnectTimeout(60000); // factory.set String regx="上一页</a>)(<a.*?href=[\\\"‘]?(((http|https)?://)?/?[^\\\"‘]+)[\\\"‘]?.*?>(.+)</a>"; RestTemplate template=new RestTemplate(); URI uri=new URI(URLDecoder.decode(web,"utf-8")); String stri = template.getForObject(uri, String.class); Pattern pattern=Pattern.compile(regx); Matcher matcher = pattern.matcher(stri); matcher.find(); String group = matcher.group(); group = group.substring(group.indexOf("href=\"/") + 7, group.indexOf("\" title=\"")); group="http://www.youbianku.com/"+group; group= StringEscapeUtils.unescapeHtml4(group); return group; }
标签:bsp escape decode epo ons nec htm out group
原文地址:http://www.cnblogs.com/wangyang108/p/6010145.html