标签:Lucene c style class blog code
package index; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; public class IndexUtil { private String[] ids = {"1","2","3","4","5","6"}; private String[] emails = {"aa@qq.org","bb@qq.org","cc@qq.org","dd@qq.org","ee@qq.org","ff@qq.org"}; private String[] contents = { "welcome to visited the space", "hello boy", "my name is cc", "I like football", "I like football and I like basketball too", "I like movie and swim" }; private int[] attachs = {2,3,1,4,5,5}; private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"}; private Directory directory = null; public IndexUtil() throws IOException{ directory = FSDirectory.open(new File("E:\\lucene20140528\\index")); } public void query() throws Exception{ IndexReader reader = IndexReader.open(directory); //可以有效获取到文档的数量 System.out.println("numDocs"+reader.numDocs()); System.out.println("maxDocs"+reader.maxDoc()); } public void index() throws Exception{ IndexWriter writer = null; try { writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); //文档Document相当于表中的每一条记录 //域Field相当于表中的每一个字段 Document doc = null; for (int i = 0; i < ids.length; i++) { doc = new Document(); /*Field.Store.YES:表示把内容完全存储到索引里面,可以完全的还原(可以用doc.get()) Field.Store.NO:表示这个域的内容不存储到文件中,但是可以被索引。此时内容无法完全还原 Field.Index(索引选项) Index.ANALYZED:进行分词和索引,适合于标题、内容等 Index.NOT_ANALYZED:进行索引、但是不进行分词、例如身份证号,姓名,ID等,适用于精确搜索 Index.ANALYZED_NOT_NORMS:进行分词但是存储norms信息,这些norms信息包含创建索引的时间和权值等 Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息 Index.NO:不进行索引 */ doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED)); doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); }finally{ if(writer!=null){ writer.close(); } } } }
package test; import index.IndexUtil; import org.junit.Test; public class MyTest { @Test public void testIndex() throws Exception{ IndexUtil util = new IndexUtil(); util.index(); } @Test public void testQuery() throws Exception{ IndexUtil util = new IndexUtil(); util.query(); } }
标签:Lucene c style class blog code
原文地址:http://www.cnblogs.com/mrgong/p/3758080.html