lucene学习笔记(二)

时间：2014-05-29 22:51:15 阅读：317 评论：0 收藏：0 [点我收藏+]

package index;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class IndexUtil {
    private String[] ids = {"1","2","3","4","5","6"};
    private String[] emails = {"aa@qq.org","bb@qq.org","cc@qq.org","dd@qq.org","ee@qq.org","ff@qq.org"};
    private String[] contents = {
            "welcome to visited the space",
            "hello boy",
            "my name is cc",
            "I like football",
            "I like football and I like basketball too",
            "I like movie and swim"
            };
    private int[] attachs = {2,3,1,4,5,5};
    private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
    private Directory directory = null;
    public IndexUtil() throws IOException{
        directory = FSDirectory.open(new File("E:\\lucene20140528\\index"));
    }
    public void query() throws Exception{
        IndexReader reader = IndexReader.open(directory);
        //可以有效获取到文档的数量
        System.out.println("numDocs"+reader.numDocs());
        System.out.println("maxDocs"+reader.maxDoc());
    }
    public void index() throws Exception{
        IndexWriter writer = null;
        try {
            writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
            //文档Document相当于表中的每一条记录
            //域Field相当于表中的每一个字段
            Document doc = null;
            for (int i = 0; i < ids.length; i++) {
                doc = new Document();
                /*Field.Store.YES:表示把内容完全存储到索引里面，可以完全的还原(可以用doc.get())
                Field.Store.NO:表示这个域的内容不存储到文件中，但是可以被索引。此时内容无法完全还原
                
                Field.Index(索引选项)
                Index.ANALYZED:进行分词和索引，适合于标题、内容等
                Index.NOT_ANALYZED:进行索引、但是不进行分词、例如身份证号，姓名，ID等，适用于精确搜索
                Index.ANALYZED_NOT_NORMS:进行分词但是存储norms信息，这些norms信息包含创建索引的时间和权值等
                Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息
                Index.NO:不进行索引
                 */
                doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
                doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
                doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
                doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
                writer.addDocument(doc);
            }
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }finally{
            if(writer!=null){
                writer.close();
            }
        }
        
    }
}

package test;


import index.IndexUtil;

import org.junit.Test;

public class MyTest {
    @Test
    public void testIndex() throws Exception{
        IndexUtil util = new IndexUtil();
        util.index();
    }
    @Test
    public void testQuery() throws Exception{
        IndexUtil util = new IndexUtil();
        util.query();
    }
}

lucene学习笔记(二),布布扣,bubuko.com

lucene学习笔记(二)

标签：Lucene c style class blog code

原文地址：http://www.cnblogs.com/mrgong/p/3758080.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行