标签:coding sea ati 形参 findall apache ESS int dom
使用Spring Data ElasticSearch+Jsoup操作集群数据存储
1、使用Jsoup爬取京东商城的商品数据
1)获取商品名称、价格以及商品地址,并封装为一个Product对象,代码截图:
2)创建Product实体类,完成对索引、类型、映射以及文档的配置,代码截图:
3)将爬取到的商品对象存储到集群中,代码截图:
4)完成对商品信息的查询、分页、删除和更新操作,代码截图:
applicationContext.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 <beans xmlns="http://www.springframework.org/schema/beans" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context" 4 xmlns:elasticsearch="http://www.springframework.org/schema/data/elasticsearch" 5 xsi:schemaLocation="http://www.springframework.org/schema/beans 6 http://www.springframework.org/schema/beans/spring-beans.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd http://www.springframework.org/schema/data/elasticsearch http://www.springframework.org/schema/data/elasticsearch/spring-elasticsearch.xsd"> 7 <!--开启包扫描--> 8 <context:component-scan base-package="com.elasticsearch"/> 9 <!--配置集群信息--> 10 <elasticsearch:transport-client id="esClient" cluster-name="my-cluster" cluster-nodes="127.0.0.1:9301, 11 127.0.0.1:9302,127.0.0.1:9303"/> 12 <!--注入ESTemplate模板--> 13 <bean id="elasticsearchTemplate" class="org.springframework.data.elasticsearch.core.ElasticsearchTemplate"> 14 <constructor-arg name="client" ref="esClient"/> 15 </bean> 16 <!--扫描Mapper(mybatis中直接操作数据),在对应的包下BeanMapper,Bean在ES中是类型(表)--> 17 <elasticsearch:repositories base-package="com.elasticsearch.mapper"/> 18 </beans>
pom.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 3 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 5 <modelVersion>4.0.0</modelVersion> 6 7 <groupId>com.elasticsearch</groupId> 8 <artifactId>eshm0430</artifactId> 9 <version>1.0-SNAPSHOT</version> 10 11 <name>eshm0430</name> 12 <!-- FIXME change it to the project‘s website --> 13 <url>http://www.example.com</url> 14 15 <properties> 16 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 17 <maven.compiler.source>1.7</maven.compiler.source> 18 <maven.compiler.target>1.7</maven.compiler.target> 19 </properties> 20 21 <dependencies> 22 <dependency> 23 <groupId>junit</groupId> 24 <artifactId>junit</artifactId> 25 <version>4.11</version> 26 <scope>test</scope> 27 </dependency> 28 29 30 <dependency> 31 <groupId>org.springframework.data</groupId> 32 <artifactId>spring-data-elasticsearch</artifactId> 33 <version>3.1.9.RELEASE</version> 34 <exclusions> 35 <exclusion> 36 <groupId>org.elasticsearch.plugin</groupId> 37 <artifactId>transport‐netty4‐client</artifactId> 38 </exclusion> 39 </exclusions> 40 </dependency> 41 42 <dependency> 43 <groupId>org.springframework</groupId> 44 <artifactId>spring-test</artifactId> 45 <version>5.1.5.RELEASE</version> 46 <scope>test</scope> 47 </dependency> 48 <dependency> 49 <groupId>junit</groupId> 50 <artifactId>junit</artifactId> 51 <version>4.12</version> 52 <scope>compile</scope> 53 </dependency> 54 <dependency> 55 <groupId>org.springframework</groupId> 56 <artifactId>spring-test</artifactId> 57 <version>5.2.5.RELEASE</version> 58 <scope>compile</scope> 59 </dependency> 60 <!--// jsoup--> 61 <dependency> 62 <groupId>org.jsoup</groupId> 63 <artifactId>jsoup</artifactId> 64 <version>1.11.3</version> 65 </dependency> 66 </dependencies> 67 68 <build> 69 <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> 70 <plugins> 71 <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> 72 <plugin> 73 <artifactId>maven-clean-plugin</artifactId> 74 <version>3.1.0</version> 75 </plugin> 76 <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> 77 <plugin> 78 <artifactId>maven-resources-plugin</artifactId> 79 <version>3.0.2</version> 80 </plugin> 81 <plugin> 82 <artifactId>maven-compiler-plugin</artifactId> 83 <version>3.8.0</version> 84 </plugin> 85 <plugin> 86 <artifactId>maven-surefire-plugin</artifactId> 87 <version>2.22.1</version> 88 </plugin> 89 <plugin> 90 <artifactId>maven-jar-plugin</artifactId> 91 <version>3.0.2</version> 92 </plugin> 93 <plugin> 94 <artifactId>maven-install-plugin</artifactId> 95 <version>2.5.2</version> 96 </plugin> 97 <plugin> 98 <artifactId>maven-deploy-plugin</artifactId> 99 <version>2.8.2</version> 100 </plugin> 101 <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> 102 <plugin> 103 <artifactId>maven-site-plugin</artifactId> 104 <version>3.7.1</version> 105 </plugin> 106 <plugin> 107 <artifactId>maven-project-info-reports-plugin</artifactId> 108 <version>3.0.0</version> 109 </plugin> 110 </plugins> 111 </pluginManagement> 112 <plugins> 113 <plugin> 114 <groupId>org.apache.maven.plugins</groupId> 115 <artifactId>maven-compiler-plugin</artifactId> 116 <configuration> 117 <source>8</source> 118 <target>8</target> 119 </configuration> 120 </plugin> 121 </plugins> 122 </build> 123 </project>
Product
1 package com.elasticsearch.entity; 2 import org.springframework.data.annotation.Id; 3 import org.springframework.data.elasticsearch.annotations.Document; 4 import org.springframework.data.elasticsearch.annotations.Field; 5 import org.springframework.data.elasticsearch.annotations.FieldType; 6 7 @Document(indexName = "my-index3", type = "Product") 8 public class Product { 9 @Id 10 11 @Field(type = FieldType.Long,index = false,store = true) 12 private Long id; 13 14 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word") 15 private String pname; 16 17 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word") 18 private String pprice; 19 20 @Field(type = FieldType.Text,index = true,store = true,analyzer = "ik_max_word") 21 private String padress; 22 23 public Long getId() { 24 return id; 25 } 26 27 public void setId(Long id) { 28 this.id = id; 29 } 30 31 public String getPname() { 32 return pname; 33 } 34 35 public void setPname(String pname) { 36 this.pname = pname; 37 } 38 39 public String getPprice() { 40 return pprice; 41 } 42 43 public void setPprice(String pprice) { 44 this.pprice = pprice; 45 } 46 47 public String getPadress() { 48 return padress; 49 } 50 51 public void setPadress(String padress) { 52 this.padress = padress; 53 } 54 55 @Override 56 public String toString() { 57 return "Product{" + 58 "id=" + id + 59 ", pname=‘" + pname + ‘\‘‘ + 60 ", pprice=‘" + pprice + ‘\‘‘ + 61 ", padress=‘" + padress + ‘\‘‘ + 62 ‘}‘; 63 } 64 }
ProductMapper
1 package com.elasticsearch.mapper; 2 3 import com.elasticsearch.entity.Product; 4 import org.springframework.data.domain.Pageable; 5 import org.springframework.data.elasticsearch.repository.ElasticsearchCrudRepository; 6 import org.springframework.stereotype.Repository; 7 8 import java.util.List; 9 10 @Repository 11 public interface ProductMapper extends ElasticsearchCrudRepository <Product,Long> { 12 13 14 // 根据标题查询并分页 15 List<Product> findByPname(String pname , Pageable pageable); 16 17 }
ProductService
1 package com.elasticsearch.service; 2 3 import com.elasticsearch.entity.Product; 4 import org.springframework.data.domain.Pageable; 5 6 import java.util.List; 7 import java.util.Optional; 8 9 10 public interface ProductService { 11 12 // 新增文档的方法 13 void save(Product product); 14 15 // 根据文档查询商品信息 16 Optional<Product> findById(Long id); 17 18 // 根据id删除 19 void deleteById(Long id); 20 21 // 根据id更新文件 22 void updateById(Product product); 23 24 // 根据标题查询并分页 25 List<Product> findByPname(String pname ,Pageable pageable); 26 27 28 }
ProductServiceImp
1 package com.elasticsearch.service.Imp; 2 3 import com.elasticsearch.entity.Product; 4 import com.elasticsearch.mapper.ProductMapper; 5 import com.elasticsearch.service.ProductService; 6 import org.springframework.beans.factory.annotation.Autowired; 7 import org.springframework.data.domain.Pageable; 8 import org.springframework.stereotype.Service; 9 10 import java.util.List; 11 import java.util.Optional; 12 13 @Service("ProductService") 14 public class ProductServiceImp implements ProductService { 15 @Autowired 16 private ProductMapper productMapper; 17 18 @Override 19 public void save(Product product) { 20 productMapper.save(product); 21 } 22 23 @Override 24 public Optional<Product> findById(Long id) { 25 return productMapper.findById(id); 26 } 27 28 @Override 29 public void deleteById(Long id) { 30 productMapper.deleteById(id); 31 } 32 33 @Override 34 public void updateById(Product product) { 35 productMapper.save(product); 36 } 37 38 @Override 39 public List<Product> findByPname(String pname, Pageable pageable) { 40 return productMapper.findByPname(pname,pageable); 41 } 42 }
SpringDataESTest
1 package com.elasticsearch; 2 import com.elasticsearch.entity.Product; 3 import com.elasticsearch.service.ProductService; 4 import org.jsoup.Jsoup; 5 import org.jsoup.nodes.Document; 6 import org.jsoup.nodes.Element; 7 import org.jsoup.select.Elements; 8 import org.junit.Test; 9 import org.junit.runner.RunWith; 10 import org.springframework.beans.factory.annotation.Autowired; 11 import org.springframework.data.domain.PageRequest; 12 import org.springframework.data.elasticsearch.core.ElasticsearchTemplate; 13 import org.springframework.test.context.ContextConfiguration; 14 import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; 15 16 import java.io.IOException; 17 import java.util.List; 18 import java.util.Optional; 19 20 @RunWith(SpringJUnit4ClassRunner.class) 21 @ContextConfiguration(locations = "classpath:applicationContext.xml") 22 public class SpringDataESTest { 23 24 @Autowired 25 private ElasticsearchTemplate elasticsearchTemplate; 26 27 @Autowired 28 private ProductService productService; 29 30 @Test //import org.junit.Test; 不要自己创建一个名称为Test类 31 public void createIndex() { 32 //创建空的索引库 33 elasticsearchTemplate.createIndex(Product.class); 34 //添加映射 35 elasticsearchTemplate.putMapping(Product.class); 36 } 37 38 // 创建 39 @Test 40 public void createDocument(){ 41 Document doc = null; 42 String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&psort=3&click=0"; 43 // String url = "https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=4cbce742a5634b66996fa09045840c0e"; 44 try { 45 doc = Jsoup.connect(url).get(); 46 //Element:页面中的所有 ul > li , li特点是 class = gl-item,使用类选择器 47 Elements liLists = doc.select(".gl-item"); 48 long i=0; 49 for (Element li : liLists) { 50 //分析 li 结构 51 //1)获取图片地址 class= p-img ,查找img标签,获取 img 的src 属性的值 52 //String pimgsrc = li.select(".p-img").select("img").attr("src"); 53 //System.out.println(pimgsrc); 54 55 //2)获取商品价格: class = p-price ,查找 i 标签,获取 i 标签包含的内容 <i>12324</i> 56 String pprice = li.select(".p-price").select("i").text(); 57 System.out.println(pprice); 58 59 //3)获取商品名称: class= p-name p-name-type-2,查找 em 标签,获取 em 标签的内容 60 String pname = li.select(".p-name").select(".p-name-type-2").select("em").text(); 61 String pname2 = li.select("div[class=‘p-name p-name-type-2‘]").select("em").text(); 62 63 System.out.println(pname); 64 System.out.println(pname2); 65 66 //4)获取商品地址 67 String padress = li.select(".p-img").select("a").attr("href"); 68 System.out.println(padress); 69 i++; 70 Product product = new Product(); 71 product.setId(i); 72 product.setPname(pname); 73 product.setPprice(pprice); 74 product.setPadress(padress); 75 76 productService.save(product); 77 } 78 } catch (IOException e) { 79 e.printStackTrace(); 80 } 81 } 82 83 @Test 84 public void getDocumentById(){ 85 Optional<Product> byId = productService.findById(1L); 86 Product product = byId.get(); 87 System.out.println("根据id查询"+product); 88 89 } 90 91 // 根据id删除文件 92 @Test 93 public void deleteDocumentById(){ 94 productService.deleteById(30L); 95 96 } 97 98 @Test 99 // 根据id更新文件 100 public void updateDocumentById(){ 101 Product product = new Product(); 102 product.setId(29L); 103 product.setPprice("2"); 104 product.setPname("根据id更新的名字"); 105 product.setPadress("更新的"); 106 productService.updateById(product); 107 System.out.println("更新后的文件"+product); 108 } 109 110 // 根据title查询 并且分页 111 @Test 112 public void getDocumentByPnameAndPage(){ 113 List<Product> byPnameAndPage = productService.findByPname("华为", PageRequest.of(0, 10)); 114 System.out.println(byPnameAndPage); 115 } 116 117 118 119 120 // 创建 121 // @Test 122 // public void createDocument(){ 123 // for (Long i = 1L;i <= 10L; i++){ 124 // // 批量创建Hello对象 125 // Hello hello = new Hello(); 126 // hello.setId(i); 127 // hello.setTitle("新增的title"+i); 128 // hello.setContent("新增的content"+i); 129 // helloService.save(hello); 130 // } 131 // 132 // } 133 // 134 // // 根据id查询 135 // @Test 136 // public void getDocumentById(){ 137 // Optional<Hello> helloOptional = helloService.findById(1L); 138 // Hello hello = helloOptional.get(); 139 // System.out.println("根据id查询hello:"+hello); 140 // } 141 // 142 // // 查询所有hello 143 // @Test 144 // public void getAllDocument(){ 145 // Iterable<Hello> all = helloService.findAll(); 146 // 147 // //方法一 148 //// Iterator<Hello> iterator = all.iterator();// 10个 149 //// while (iterator.next() != null){ 150 //// Hello hello = iterator.next(); 151 //// System.out.println("查询所有hello"+hello); 152 //// } 153 // 154 // // 方法二 155 // // forEach(Consumer),Consumer接口通过@FunctionallInterface修饰 156 // // 表示他是一个函数式 157 // // 如果一个方法是形参是函数接口,传递形参时可以使用Lambda表达式,特点是使用箭头符号 158 // // void accept(T t) 159 // all.forEach(item-> System.out.println("查询所有hello"+item)); 160 // 161 // 162 // } 163 // 164 // // 根据id更新 165 // @Test 166 // public void updateDocumentById(){ 167 // Hello hello = new Hello(); 168 // hello.setId(1L); 169 // hello.setTitle("更新修改的title"); 170 // hello.setContent("更新修改的Content"); 171 // helloService.save(hello); 172 // System.out.println("更新后的为"+hello); 173 // } 174 // 175 // // 根据id删除文档 176 // @Test 177 // public void deleteDocumentById(){ 178 // helloService.deleteById(10L); 179 // } 180 // 181 // // 删除所有文档 182 //// @Test 183 //// public void deleteAllDocument(){ 184 //// helloService.deleteAll(); 185 //// } 186 // 187 // // 根据title查询 188 // @Test 189 // public void getDocumentByTitle(){ 190 // List<Hello> hs = helloService.findByTitle("新增"); 191 // System.out.println(hs); 192 // } 193 // 194 // @Test 195 // public void getDocumentByTitleAndPage(){ 196 // List<Hello> hs = helloService.findByTitle("新增"); 197 // System.out.println(hs); 198 // 199 // List<Hello> hs1 = helloService.findByTitle("新增", PageRequest.of(1,3)); 200 // System.out.println("---------"+hs1); 201 // } 202 203 204 205 }
1
// System.out.println(hs);
使用Spring Data ElasticSearch+Jsoup操作集群数据存储
标签:coding sea ati 形参 findall apache ESS int dom
原文地址:https://www.cnblogs.com/caoxinfang/p/12828662.html