标签:lucene
--------------------------------------------------整个项目周期就只有一个IndexReader
1.// IndexReader的设计
private static IndexReader reader = null;search.close();
try { if (reader == null) { reader = IndexReader.open(directory); //reader = IndexReader.open(directory,false); //不设置为只读的reader } else { // 如果Index索引改变了将返回一个新的reader,否则将返回null IndexReader read = IndexReader.openIfChanged(reader); if (read != null) { //把原来的reader给close()掉 reader.close(); reader = read; } } return new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null;
// reader.deleteDocuments(new Term("id","1")); // reader.close();有时候整个项目周期中只有一个IndexWriter
public IndexSearcher getSearch() { try { if (reader == null) { reader = IndexReader.open(directory); } else { IndexReader read = IndexReader.openIfChanged(reader); if (read != null) { reader.close(); reader = read; } } return new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; }
IndexSearcher search = getSearch(); Query query = new TermQuery(new Term(field, name)); TopDocs tds = search.search(query, num); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); } search.close();
IndexSearcher search = getSearch(); Query query = new TermRangeQuery(field, start, end, true, true); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits);
IndexSearcher search = getSearch(); // true表示闭区间 Query query = NumericRangeQuery.newIntRange(field, start, end,true, true); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits);
public void SearchByTermRange(String field, String start, String end,int num) { try { IndexSearcher search = getSearch(); // 范围查询 // true表示闭区间(是否包含开始字符和结束字符,默认为true) Query query = new TermRangeQuery(field, start, end, true, true); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } }
Query query = new PrefixQuery(new Term(field, value));
Query query = new WildcardQuery(new Term(field, value));//使用方法
sutil.SearchByWildCard("name", "l*", 3);在传入的value中可以使用通配符? 和 *
BooleanQuery query = new BooleanQuery(); // Occur.Must表必须 //Occur.SHOULD表示可有可无 //Occur.MUST_NOT表示必须没有 query.add(new TermQuery(new Term("name", "lili")), Occur.MUST); query.add(new TermQuery(new Term("content", "hello")), Occur.MUST);
PhraseQuery query = new PhraseQuery(); // setSlop()设置跳数,及两个单词之间有几个单词 query.setSlop(1); // 设置field字段,即哪两个单词 // 第一个term query.add(new Term("content", "i")); // 产生距离后的第二个term query.add(new Term("content", "basketball"));
Query query=new FuzzyQuery(new Term("name", "mirk"));
//基于字符串操作 public void SearchByQueryParse(Query query,int num){ try { IndexSearcher search = getSearch(); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } }
//使用query查询(创建queryparser,再通过queryparser创建query) // 1.创建Parse对象(设置默认搜索域为content) QueryParser parse = new QueryParser(Version.LUCENE_35, "content",new StandardAnalyzer(Version.LUCENE_35)); // 改变空格的默认操作(改为AND型) parse.setDefaultOperator(Operator.AND); // 开启第一个字符的通配符匹配(*xxx,?xxx),默认关闭,因为效率比较低 parse.setAllowLeadingWildcard(true); // 2.通过parse生成query(搜索content域中包含有like的) Query query = parse.parse("like"); // 能够一直加条件(空格默认就是OR) query = parse.parse("basketball i"); // 改变搜索域(域:值) query = parse.parse("name:mark"); // 同样能进行*或?的通配符匹配(通配符默认不能放在首位) query = parse.parse("name:*i"); // name中不包含mark,但是content中包含basketball(-和+必须放在域说明的前面) query = parse.parse("- name:mark + basketball"); // id的1~3(TO表示一个闭区间,TO必须是大写的) query = parse.parse("id:[1 TO 3]"); // {}表示1~3的开区间匹配 query = parse.parse("id:{1 TO 3}"); // name域值是lili或mark,默认域值是game query = parse.parse("name:(lili OR mark) AND game"); // 两个‘’号表示短语匹配 query = parse.parse("'i like basketball'"); // 表示i basketball之间有一个单词遗漏的匹配 query = parse.parse("\"i basketball\"~1"); // 加个~就能模糊查询mark query = parse.parse("name:mirk~"); // 没有办法匹配数字范围(自己扩展parse) query = parse.parse("attach:[1 TO 3]"); sutil.SearchByQueryParse(query, 5);
public void searchPage(String query,int pageIndex,int pageSize) { try { Directory dir = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(dir); QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35)); Query q = parser.parse(query); TopDocs tds = searcher.search(q, 500); ScoreDoc[] sds = tds.scoreDocs; int start = (pageIndex-1)*pageSize; int end = pageIndex*pageSize; for(int i=start;i<end;i++) { Document doc = searcher.doc(sds[i].doc); System.out.println(sds[i].doc+":"+doc.get("path")+"-->"+doc.get("filename")); } searcher.close(); } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher searcher) throws IOException { if(pageIndex==1)return null;//如果是第一页就返回空 int num = pageSize*(pageIndex-1);//获取上一页的数量 //每次只取上面所有的元素 TopDocs tds = searcher.search(query, num); return tds.scoreDocs[num-1]; } public void searchPageByAfter(String query,int pageIndex,int pageSize) { try { Directory dir = FileIndexUtils.getDirectory(); IndexSearcher searcher = getSearcher(dir); QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35)); Query q = parser.parse(query); //先获取上一页的最后一个元素 ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher); //通过最后一个元素搜索下页的pageSize个元素 TopDocs tds = searcher.searchAfter(lastSd,q, pageSize); for(ScoreDoc sd:tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(sd.doc+":"+doc.get("path")+"-->"+doc.get("filename")); } searcher.close(); } catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
代码片段
package test.lucene.index; import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeFilter; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; public class SearchUtil { /* * 假设6个文档 */ private String[] ids = { "1", "2", "3", "4", "5", "6" }; private String[] emails = { "aa@qq.com", "bb@sina.com", "cc@163.com", "dd@google.com", "ee@baidu.com", "ff@heima.com" }; private String[] contents = { "hello boy,i like pingpang", "like boy", "xx bye i like swim", "hehe, i like basketball", "dd fsfs, i like movie", "hello xxx,i like game" }; private int[] attachs = { 2, 3, 1, 4, 5, 5 }; private String[] names = { "lili", "wangwu", "lisi", "jack", "tom", "mark" }; // 设置加权map private Map<String, Float> scores = new HashMap<String, Float>(); private Directory directory; private IndexReader reader; public SearchUtil() { directory = new RAMDirectory(); } /* * 添加索引 */ public void index() { IndexWriter writer = null; try { writer = new IndexWriter(directory, new IndexWriterConfig( Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.deleteAll(); // 创建documents Document document = null; for (int i = 0; i < ids.length; i++) { document = new Document(); document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); document.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); // 不分词 document.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED)); document.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); // 为数字添加索引 document.add(new NumericField("attach", Field.Store.YES, true) .setIntValue(attachs[i])); /* * document.setBoost(float) 设置评级 */ String et = emails[i].substring(emails[i].lastIndexOf("@") + 1); // System.out.println(et); if (scores.containsKey(et)) { document.setBoost(scores.get(et)); } else { document.setBoost(0.5f); } writer.addDocument(document); } } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); writer = null; } catch (IOException e) { e.printStackTrace(); } } } } /* * 创建IndexSearch的方法 */ public IndexSearcher getSearch() { try { if (reader == null) { reader = IndexReader.open(directory); } else { IndexReader read = IndexReader.openIfChanged(reader); if (read != null) { reader.close(); reader = read; } } return new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } // 精确匹配查询 public void SearchByTerm(String field, String name, int num) { try { IndexSearcher search = getSearch(); Query query = new TermQuery(new Term(field, name)); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 基于字符串的范围的查询 public void SearchByTermRange(String field, String start, String end, int num) { try { IndexSearcher search = getSearch(); // 范围查询 // true表示闭区间 Query query = new TermRangeQuery(field, start, end, true, true); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 基于数字的范围的查询 public void SearchByNumricRange(String field, int start, int end, int num) { try { IndexSearcher search = getSearch(); // 范围查询 // true表示闭区间 Query query = NumericRangeQuery.newIntRange(field, start, end, true, true); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 前缀搜索 public void SearchByPrefix(String field, String value, int num) { try { IndexSearcher search = getSearch(); Query query = new PrefixQuery(new Term(field, value)); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 通配符搜索 public void SearchByWildCard(String field, String value, int num) { try { IndexSearcher search = getSearch(); Query query = new WildcardQuery(new Term(field, value)); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 多个条件搜索 public void SearchByBoolean(int num) { try { IndexSearcher search = getSearch(); BooleanQuery query = new BooleanQuery(); // Occur.Must表必须 Occur.SHOULD表示可有可无 Occur.MUST_NOT表示必须没有 query.add(new TermQuery(new Term("name", "lili")), Occur.MUST); query.add(new TermQuery(new Term("content", "hello")), Occur.MUST); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 短语查询 public void SearchByPhrase(int num) { try { IndexSearcher search = getSearch(); PhraseQuery query = new PhraseQuery(); // setSlop()设置跳数,及两个单词之间有几个单词 query.setSlop(1); // 设置field字段,即哪两个单词 query.add(new Term("content", "i")); query.add(new Term("content", "basketball")); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } // 模糊查询 public void SearchByFuzzy(int num) { try { IndexSearcher search = getSearch(); Query query=new FuzzyQuery(new Term("name", "mirk")); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } //基于字符串操作 public void SearchByQueryParse(Query query,int num){ try { IndexSearcher search = getSearch(); TopDocs tds = search.search(query, num); System.out.println("一共查询了:" + tds.totalHits); for (ScoreDoc sdc : tds.scoreDocs) { Document doc = search.doc(sdc.doc); System.out.println(sdc.doc + doc.get("name") + "[" + doc.get("email") + "," + doc.get("id") + "," + doc.get("attach") + "]"); } search.close(); } catch (IOException e) { e.printStackTrace(); } } }
package test.lucene.index; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser.Operator; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; import org.junit.Before; import org.junit.Test; public class SearchTest { private SearchUtil sutil; @Before public void init() throws Exception { sutil = new SearchUtil(); } @Test public void searchByterm() { sutil.index(); sutil.SearchByTerm("name", "mark", 3); } @Test public void searchByRangeTerm() { sutil.index(); sutil.SearchByTermRange("id", "1", "3", 10); // 查询name以a开头和s结尾的 sutil.SearchByTermRange("name", "a", "s", 10); // 由于attach是数字类型,使用termrange无法查询 sutil.SearchByTermRange("attach", "1", "5", 10); } @Test public void searchByNumricRange() { sutil.index(); // 由于attach是数字类型,使用NumricRange进行查询 sutil.SearchByNumricRange("attach", 2, 5, 10); } @Test public void searchByPrefix() { sutil.index(); // 前缀搜索 sutil.SearchByPrefix("name", "l", 3); } @Test public void searchByWildCard() { sutil.index(); // 通配符搜索 sutil.SearchByWildCard("name", "l*", 3); } @Test public void searchByBoolean() { sutil.index(); // 多条件查询 sutil.SearchByBoolean(3); } @Test public void searchByPhrase() { sutil.index(); // 短语查询 sutil.SearchByPhrase(5); } @Test public void searchByFuzzy() { sutil.index(); // 模糊查询 sutil.SearchByFuzzy(5); } @Test public void searchByqueryParse() throws Exception { sutil.index(); // 1.创建Parse对象(设置默认搜索域为content) QueryParser parse = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); // 改变空格的默认操作(改为AND型) parse.setDefaultOperator(Operator.AND); // 开启第一个字符的通配符匹配(*xxx,?xxx),默认关闭,因为效率比较低 parse.setAllowLeadingWildcard(true); // 2.通过parse生成query(搜索content域中包含有like的) Query query = parse.parse("like"); // 能够一直加条件(空格默认就是OR) query = parse.parse("basketball i"); // 改变搜索域(域:值) query = parse.parse("name:mark"); // 同样能进行*或?的通配符匹配(通配符默认不能放在首位) query = parse.parse("name:*i"); // name中不包含mark,但是content中包含basketball(-和+必须放在域说明的前面) query = parse.parse("- name:mark + basketball"); // id的1~3(TO表示一个闭区间,TO必须是大写的) query = parse.parse("id:[1 TO 3]"); // {}表示1~3的开区间匹配 query = parse.parse("id:{1 TO 3}"); // name域值是lili或mark,默认域值是game query = parse.parse("name:(lili OR mark) AND game"); // 两个‘’号表示短语匹配 query = parse.parse("'i like basketball'"); // 表示i basketball之间有一个单词遗漏的匹配 query = parse.parse("\"i basketball\"~1"); // 加个~就能模糊查询mark query = parse.parse("name:mirk~"); // 没有办法匹配数字范围(自己扩展parse) query = parse.parse("attach:[1 TO 3]"); sutil.SearchByQueryParse(query, 5); } }
标签:lucene
原文地址:http://blog.csdn.net/u010366796/article/details/44872993