标签:lucene 检索
1.创建IndexReader
package com.mzsx.index; import java.io.File; import java.io.IOException; importorg.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import com.mzsx.write.DirectoryConext; public class IndexReaderContext { privatestatic IndexReader reader = null; privatestatic Directory directory=null; privateIndexReaderContext(){} publicstatic IndexReader getIndexReader(String fileName){ if(reader==null) { directory=DirectoryConext.getDirectory(fileName); synchronized(IndexReaderContext.class){ if(reader==null) { try{ reader=IndexReader.open(directory,false); }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } } } } returnreader; } publicstatic IndexReader getIndexReader(Directory dir){ if(reader==null) { directory=dir; synchronized(IndexReaderContext.class){ if(reader==null) { try{ reader=IndexReader.open(directory,false); }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } } } } returnreader; } }
2.创建IndexSearcher
IndexSearcher searcher=new IndexSearcher(indexReader);
3.创建Term和TermQuery
Term term=new Term(field,name); TermQuery termQuery=new TermQuery(term);
4.根据TermQuery获取TopDocs
TopDocs topDocs= searcher.search(termQuery, num); int length= topDocs.totalHits; System.out.println("总共查询出来总数:"+length);
5.根据TopDocs获取ScoreDoc和ScoreDoc获取相应文档
ScoreDoc[] scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0, 50) ); }
6.整体
//精确查询 publicvoid searchByTerm(String field,String name,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); Termterm=new Term(field,name); TermQuerytermQuery=new TermQuery(term); TopDocs topDocs= searcher.search(termQuery, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0, 50) ); } }catch (IOException e) { e.printStackTrace(); } }
1. TermQuery—单个精确查询
//精确查询 publicvoid searchByTerm(String field,String name,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); Termterm=new Term(field,name); TermQuerytermQuery=new TermQuery(term); TopDocs topDocs= searcher.search(termQuery, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0, 50) ); } }catch (IOException e) { e.printStackTrace(); } }
2.TermRangeQuery—查找字符串某个范围
public void searchByTermRange(String field,StringlowerTerm,String upperTerm,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); TermRangeQuery range=new TermRangeQuery(field, lowerTerm,upperTerm, true, true); TopDocs topDocs= searcher.search(range, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0, 50) ); } }catch (CorruptIndexException e) { //TODO Auto-generated catch block e.printStackTrace(); }catch (IOException e) { //TODO Auto-generated catch block e.printStackTrace(); } }
3. NumericRangeQuery--查询某个数字的范围
public void searchByNumricRange(String field,longmin,long max,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); NumericRangeQuery range=NumericRangeQuery.newLongRange(field,min, max, true, true); TopDocs topDocs= searcher.search(range, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0, 50) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
4.PrefixQuery—前缀查询
public void searchByPrefix(String field,Stringvalue,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); PrefixQuery range=new PrefixQuery(new Term(field,value)); TopDocs topDocs= searcher.search(range, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents") ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
5. WildcardQuery—通配符查询
//通配符模糊搜索 //在传入的value中可以使用通配符:?和*,?表示匹配一个字符,*表示匹配任意多个字符 publicvoid searchByWildcard(String field,String value,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); WildcardQuery range=new WildcardQuery(newTerm(field,value)); TopDocs topDocs= searcher.search(range, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0,40) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
6.BooleanQuery—联合查询
public void searchByBoolean(int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); BooleanQueryquery = new BooleanQuery(); query.add(newTermQuery(new Term("filename","凤凰台")), Occur.SHOULD); query.add(newTermQuery(new Term("contents","用户")),Occur.SHOULD); TopDocs topDocs= searcher.search(query, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0,40) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
7. PhraseQuery--短语检索
public void searchByPhrase(int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); PhraseQueryquery = new PhraseQuery(); query.add(newTerm("filename","项目")); query.setSlop(3); query.add(newTerm("filename","系统")); TopDocs topDocs= searcher.search(query, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0,40) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
8.FuzzyQuery--模糊匹配
public void searchByFuzzy(int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); FuzzyQueryquery = new FuzzyQuery(new Term("contens","安全"),0.3f,0); TopDocs topDocs= searcher.search(query, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0,40) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
1.QueryParser简介
Mike | 默认域包含mike |
Mike john Mike OR john | 默认域包含mike或者john |
+mike +address:zhaotong Mike AND address:zhaotong | 默认域即使mike并且address是zhaotong |
id :2 | Id域为2 |
Address:Kunming –desc:she Address:Kunming AND NOT desc:she | Address是kunming并且desc不是she |
(mike OR john) AND address:zhaotong | 默认域是mike或者john 并且address是zhaotong |
Desc: “she like” | Desc域是she like |
desc:”happy girl”~5 | 查找happy和girl之间距离小于5的文档 |
J* | 默认域是j开头 |
Johe~ | 模糊搜索johe |
Id:[“1” TO “3”] | Id从1到3 |
2.QueryParser实例
public void searchByQueryParse(StringqueryContion,int num) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); QueryParserparser = new QueryParser(Version.LUCENE_35,"contents",analyzer); org.apache.lucene.search.Queryquery=parser.parse(queryContion); TopDocs topDocs= searcher.search(query, num); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; for(ScoreDoc scoreDoc : scoreDocs) { Documentdoc=searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0,40) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); }catch (ParseException e) { e.printStackTrace(); } }
//测试代码 @Test publicvoid searchByQueryParse(){ SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true); opera.searchByQueryParse("filename:[aTO z]",10); //opera.searchByQueryParse("filename:{aTO g}",10); //没有办法匹配数字范围(自己扩展Parser) //opera.searchByQueryParse("size:[200TO 13000]",10); //完全匹配 //opera.searchByQueryParse("contents:\"完全是宠溺\"",10); //距离为1匹配 //opera.searchByQueryParse("contents:\"完全宠溺\"~1",10); //模糊查询 //opera.searchByQueryParse("contents:*",10); }
1.第一种方式:再查询
public void searchPage(String queryContion,intpageIndex,int pageSize){ try{ IndexSearchersearcher=new IndexSearcher(indexReader); QueryParserparser = new QueryParser(Version.LUCENE_35,"contents",analyzer); org.apache.lucene.search.Queryquery=parser.parse(queryContion); TopDocs topDocs= searcher.search(query, 500); intlength= topDocs.totalHits; System.out.println("总共查询出来总数:"+length); ScoreDoc[]scoreDocs= topDocs.scoreDocs; intstart = (pageIndex-1)*pageSize; intend = pageIndex*pageSize; for(int i=start;i<end;i++) { Documentdoc=searcher.doc(scoreDocs[i].doc); System.out.println(doc.get("id")+ "---->" +doc.get("filename") + "[" + doc.get("fullpath") +"]-->\n" + doc.get("contents").substring(0,40) ); } }catch (CorruptIndexException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); }catch (ParseException e) { e.printStackTrace(); } }
//测试代码 @Test publicvoid searchPage(){ SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true); opera.searchPage("filename:[aTO z]", 1, 3); }
2.第二章方式:searchAfter
public void searchPageByAfter(String query,intpageIndex,int pageSize) { try{ IndexSearchersearcher=new IndexSearcher(indexReader); QueryParserparser = new QueryParser(Version.LUCENE_35,"content",analyzer); Queryq = parser.parse(query); //先获取上一页的最后一个元素 ScoreDoclastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher); //通过最后一个元素搜索下页的pageSize个元素 TopDocstds = searcher.searchAfter(lastSd,q, pageSize); for(ScoreDocsd:tds.scoreDocs) { Documentdoc = searcher.doc(sd.doc); System.out.println(sd.doc+":"+doc.get("path")+"-->"+doc.get("filename")); } searcher.close(); }catch (org.apache.lucene.queryParser.ParseException e) { e.printStackTrace(); }catch (IOException e) { e.printStackTrace(); } }
/** * 根据页码和分页大小获取上一次的最后一个ScoreDoc */ privateScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearchersearch) throws IOException { if(pageIndex==1)returnnull;//如果是第一页就返回空 intnum = pageSize*(pageIndex-1);//获取上一页的数量 TopDocstds = search.search(query, num); returntds.scoreDocs[num-1]; }
//测试代码 @Test publicvoid searchPageByAfter(){ SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true); opera.searchPageByAfter("filename:[aTO z]",1,3); }
本文出自 “梦朝思夕” 博客,请务必保留此出处http://qiangmzsx.blog.51cto.com/2052549/1440489
标签:lucene 检索
原文地址:http://qiangmzsx.blog.51cto.com/2052549/1440489