码迷,mamicode.com
首页 > 其他好文 > 详细

第三章 检索

时间:2014-07-20 15:26:11      阅读:313      评论:0      收藏:0      [点我收藏+]

标签:lucene 检索

3.1 搜索的简单实现

1.创建IndexReader

package com.mzsx.index;
 
import java.io.File;
import java.io.IOException;
 
importorg.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
 
import com.mzsx.write.DirectoryConext;
 
public class IndexReaderContext {
         privatestatic IndexReader reader = null;
         privatestatic Directory directory=null;
         privateIndexReaderContext(){}
         
         publicstatic IndexReader getIndexReader(String fileName){
                   if(reader==null) {
                            directory=DirectoryConext.getDirectory(fileName);
                            synchronized(IndexReaderContext.class){
                                     if(reader==null) {
                                               try{
                                                        reader=IndexReader.open(directory,false);
                                               }catch (CorruptIndexException e) {
                                                        e.printStackTrace();
                                               }catch (IOException e) {
                                                        e.printStackTrace();
                                               }
                                     }
                            }
                            
                   }
                   returnreader;
         }
         publicstatic IndexReader getIndexReader(Directory dir){
                   if(reader==null) {
                            directory=dir;
                            synchronized(IndexReaderContext.class){
                                     if(reader==null) {
                                               try{
                                                        reader=IndexReader.open(directory,false);
                                               }catch (CorruptIndexException e) {
                                                        e.printStackTrace();
                                               }catch (IOException e) {
                                                        e.printStackTrace();
                                               }
                                     }
                            }
                            
                   }
                   returnreader;
         }
}

 

2.创建IndexSearcher

IndexSearcher searcher=new IndexSearcher(indexReader);

 

3.创建TermTermQuery

Term term=new Term(field,name);
TermQuery termQuery=new TermQuery(term);

4.根据TermQuery获取TopDocs

TopDocs  topDocs= searcher.search(termQuery, num);
int length= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);

5.根据TopDocs获取ScoreDocScoreDoc获取相应文档

ScoreDoc[] scoreDocs= topDocs.scoreDocs;
         for(ScoreDoc scoreDoc : scoreDocs) {
                   Documentdoc=searcher.doc(scoreDoc.doc);
                   System.out.println(doc.get("id")+ "---->"
                            +doc.get("filename") + "[" + doc.get("fullpath")
                            +"]-->\n" + doc.get("contents").substring(0, 50) );
         }

 

6.整体

//精确查询
         publicvoid searchByTerm(String field,String name,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            Termterm=new Term(field,name);
                            TermQuerytermQuery=new TermQuery(term);
                            TopDocs  topDocs= searcher.search(termQuery, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0, 50) );
                            }
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

3.2 其他搜索Query

1. TermQuery—单个精确查询

//精确查询
         publicvoid searchByTerm(String field,String name,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            Termterm=new Term(field,name);
                            TermQuerytermQuery=new TermQuery(term);
                            TopDocs  topDocs= searcher.search(termQuery, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0, 50) );
                            }
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

 

2TermRangeQuery—查找字符串某个范围

public void searchByTermRange(String field,StringlowerTerm,String upperTerm,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            TermRangeQuery  range=new TermRangeQuery(field, lowerTerm,upperTerm, true, true);
                            TopDocs  topDocs= searcher.search(range, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0, 50) );
                            }
                   }catch (CorruptIndexException e) {
                            //TODO Auto-generated catch block
                            e.printStackTrace();
                   }catch (IOException e) {
                            //TODO Auto-generated catch block
                            e.printStackTrace();
                   }
         }

 

 

3 NumericRangeQuery--查询某个数字的范围

public void searchByNumricRange(String field,longmin,long max,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            NumericRangeQuery  range=NumericRangeQuery.newLongRange(field,min, max, true, true);
                            TopDocs  topDocs= searcher.search(range, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0, 50) );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

 

4PrefixQuery—前缀查询

public void searchByPrefix(String field,Stringvalue,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            PrefixQuery  range=new PrefixQuery(new Term(field,value));
                            TopDocs  topDocs= searcher.search(range, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents") );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

5 WildcardQuery—通配符查询

//通配符模糊搜索
         //在传入的value中可以使用通配符:?和*,?表示匹配一个字符,*表示匹配任意多个字符
         publicvoid searchByWildcard(String field,String value,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            WildcardQuery  range=new WildcardQuery(newTerm(field,value));
                            TopDocs  topDocs= searcher.search(range, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0,40) );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

6BooleanQuery—联合查询

public void searchByBoolean(int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            BooleanQueryquery = new BooleanQuery();
                            query.add(newTermQuery(new Term("filename","凤凰台")), Occur.SHOULD);
                            query.add(newTermQuery(new Term("contents","用户")),Occur.SHOULD);
                            TopDocs  topDocs= searcher.search(query, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0,40) );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

7 PhraseQuery--短语检索

public void searchByPhrase(int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            PhraseQueryquery = new PhraseQuery();
                            query.add(newTerm("filename","项目"));
                            query.setSlop(3);
                            query.add(newTerm("filename","系统"));
                            TopDocs  topDocs= searcher.search(query, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0,40) );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

 

8FuzzyQuery--模糊匹配

public void searchByFuzzy(int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            FuzzyQueryquery = new FuzzyQuery(new Term("contens","安全"),0.3f,0);
                            TopDocs  topDocs= searcher.search(query, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0,40) );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

 

 

3.3 QueryParser

1QueryParser简介

Mike

默认域包含mike

Mike  john

Mike  OR  john

默认域包含mike或者john

+mike +address:zhaotong

Mike  AND  address:zhaotong

默认域即使mike并且addresszhaotong

id :2

Id域为2

Address:Kunming –desc:she

Address:Kunming AND NOT desc:she

Addresskunming并且desc不是she

(mike OR john) AND address:zhaotong

默认域是mike或者john 并且addresszhaotong

Desc: “she like”

Desc域是she like

desc:”happy girl”~5

查找happygirl之间距离小于5的文档

J*

默认域是j开头

Johe~

模糊搜索johe

Id:[“1” TO “3”]

Id13

 

2QueryParser实例

public void searchByQueryParse(StringqueryContion,int num) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            QueryParserparser = new QueryParser(Version.LUCENE_35,"contents",analyzer);
                            org.apache.lucene.search.Queryquery=parser.parse(queryContion);
                            TopDocs  topDocs= searcher.search(query, num);
                            intlength= topDocs.totalHits;
                            System.out.println("总共查询出来总数:"+length);
                            ScoreDoc[]scoreDocs= topDocs.scoreDocs;
                            for(ScoreDoc scoreDoc : scoreDocs) {
                                     Documentdoc=searcher.doc(scoreDoc.doc);
                                     System.out.println(doc.get("id")+ "---->"
                                                        +doc.get("filename") + "[" + doc.get("fullpath")
                                                        +"]-->\n" + doc.get("contents").substring(0,40) );
                            }
                   }catch (CorruptIndexException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }catch (ParseException e) {
                            e.printStackTrace();
                   }
         }

 

//测试代码
@Test
         publicvoid searchByQueryParse(){
                   SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true);
                   opera.searchByQueryParse("filename:[aTO z]",10);
                   //opera.searchByQueryParse("filename:{aTO g}",10);
                   //没有办法匹配数字范围(自己扩展Parser)
                   //opera.searchByQueryParse("size:[200TO 13000]",10);
                   
                   //完全匹配
                   //opera.searchByQueryParse("contents:\"完全是宠溺\"",10);
                   //距离为1匹配
                   //opera.searchByQueryParse("contents:\"完全宠溺\"~1",10);
                   //模糊查询
                   //opera.searchByQueryParse("contents:*",10);
         }

 

 

3.4 分页搜索

1.第一种方式:再查询

public void searchPage(String queryContion,intpageIndex,int pageSize){
       try{
           IndexSearchersearcher=new IndexSearcher(indexReader);
           QueryParserparser = new QueryParser(Version.LUCENE_35,"contents",analyzer);
           org.apache.lucene.search.Queryquery=parser.parse(queryContion);
           TopDocs  topDocs= searcher.search(query, 500);
           intlength= topDocs.totalHits;
           System.out.println("总共查询出来总数:"+length);
           ScoreDoc[]scoreDocs= topDocs.scoreDocs;
           intstart = (pageIndex-1)*pageSize;
           intend = pageIndex*pageSize;
           for(int i=start;i<end;i++) {
               Documentdoc=searcher.doc(scoreDocs[i].doc);
               System.out.println(doc.get("id")+ "---->"
                                  +doc.get("filename") + "[" + doc.get("fullpath")
                                  +"]-->\n" + doc.get("contents").substring(0,40) );
            }
         }catch (CorruptIndexException e) {
                 e.printStackTrace();
         }catch (IOException e) {
                e.printStackTrace();
         }catch (ParseException e) {
                e.printStackTrace();
         }
}

 

//测试代码
@Test
publicvoid searchPage(){
        SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true);
        opera.searchPage("filename:[aTO z]", 1, 3);
}

 

2.第二章方式:searchAfter

public void searchPageByAfter(String query,intpageIndex,int pageSize) {
                   try{
                            IndexSearchersearcher=new IndexSearcher(indexReader);
                            QueryParserparser = new QueryParser(Version.LUCENE_35,"content",analyzer);
                            Queryq = parser.parse(query);
                            //先获取上一页的最后一个元素
                            ScoreDoclastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher);
                            //通过最后一个元素搜索下页的pageSize个元素
                            TopDocstds = searcher.searchAfter(lastSd,q, pageSize);
                            for(ScoreDocsd:tds.scoreDocs) {
                                     Documentdoc = searcher.doc(sd.doc);
                                     System.out.println(sd.doc+":"+doc.get("path")+"-->"+doc.get("filename"));
                            }
                            searcher.close();
                   }catch (org.apache.lucene.queryParser.ParseException e) {
                            e.printStackTrace();
                   }catch (IOException e) {
                            e.printStackTrace();
                   }
         }

   

    /**
          * 根据页码和分页大小获取上一次的最后一个ScoreDoc
          */
         privateScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearchersearch) throws IOException {
                   if(pageIndex==1)returnnull;//如果是第一页就返回空
                   intnum = pageSize*(pageIndex-1);//获取上一页的数量
                   TopDocstds = search.search(query, num);
                   returntds.scoreDocs[num-1];
         }

   

 

//测试代码
@Test
         publicvoid searchPageByAfter(){
                   SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true);
                   opera.searchPageByAfter("filename:[aTO z]",1,3);
         }

本文出自 “梦朝思夕” 博客,请务必保留此出处http://qiangmzsx.blog.51cto.com/2052549/1440489

第三章 检索

标签:lucene 检索

原文地址:http://qiangmzsx.blog.51cto.com/2052549/1440489

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!