标签:Lucene c style class blog code
好几天没更新了。更新一下,方便自己和大家学习。
这是最基本的代码
package index; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.StaleReaderException; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; public class IndexUtil { private String[] ids = {"1","2","3","4","5","6"}; private String[] emails = {"aa@bb.org","bb@qq.org","cc@bb.org","dd@aa.org","ee@qq.org","ff@cc.org"}; private String[] contents = { "welcome to visited the space,I like book", "hello boy,I like pingpeng bail", "my name is cc,I like game", "I like football,I like football", "I like football and I like basketball too", "I like movie and swim" }; private int[] attachs = {2,3,1,4,5,5}; private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"}; private Directory directory = null; private Map<String, Float> scores = new HashMap<String, Float>(); public IndexUtil() throws IOException{ scores.put("cc.org", 2.0f); scores.put("aa.org", 1.5f); directory = FSDirectory.open(new File("E:\\lucene20140528\\index")); } public void query() throws Exception{ IndexReader reader = IndexReader.open(directory); //可以有效获取到文档的数量 System.out.println("numDocs"+reader.numDocs()); System.out.println("maxDocs"+reader.maxDoc()); System.out.println("numDeleteDocs"+reader.numDeletedDocs()); } public void index() throws Exception{ IndexWriter writer = null; try { writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.deleteAll(); //文档Document相当于表中的每一条记录 //域Field相当于表中的每一个字段 Document doc = null; for (int i = 0; i < ids.length; i++) { doc = new Document(); /*Field.Store.YES:表示把内容完全存储到索引里面,可以完全的还原(可以用doc.get()) Field.Store.NO:表示这个域的内容不存储到文件中,但是可以被索引。此时内容无法完全还原 Field.Index(索引选项) Index.ANALYZED:进行分词和索引,适合于标题、内容等 Index.NOT_ANALYZED:进行索引、但是不进行分词、例如身份证号,姓名,ID等,适用于精确搜索 Index.ANALYZED_NOT_NORMS:进行分词但是存储norms信息,这些norms信息包含创建索引的时间和权值等 Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息 Index.NO:不进行索引 */ doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("content",contents[i],Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); String et = emails[i].substring(emails[i].lastIndexOf("@")+1); //加权 if(scores.containsKey(et)){ doc.setBoost(scores.get(et)); }else { doc.setBoost(0.5f); } writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); }finally{ if(writer!=null){ writer.close(); } } } public void delete(){ IndexWriter writer = null; try { writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35))); //参数是一个选项,可以是一个query,也可以是一个term(精确查找的值) writer.deleteDocuments(new Term("id", 1+"")); writer.close(); System.out.println("ole"); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void forceDelete() throws Exception{ IndexWriter writer = null; writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.forceMergeDeletes(); writer.close(); } public void merge() throws Exception{ IndexWriter writer = null; writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35))); //将索引合并为2段,这两段中的删除的数据将会被删除 writer.forceMerge(2); writer.close(); } public void undelete() throws Exception{ //使用IndexReader进行恢复(恢复时必须把indexReader的只读设置为false) IndexReader reader = null; try { reader = IndexReader.open(directory,false); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } reader.undeleteAll(); reader.close(); } public void update() throws Exception{ IndexWriter writer = null; writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35))); /** * lucene 并没有提供更新,这里的更新操作就是如下两个操作的合集 * 先删除后添加 */ Document doc = new Document(); doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("emails",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED)); doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); writer.updateDocument(new Term("id", "1"), doc); writer.close(); } public void search() throws Exception{ IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TermQuery query = new TermQuery(new Term("content", "like")); TopDocs tds = searcher.search(query, 10); for (ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); System.out.println(doc.get("name")+":"+doc.get("emails")); } } }
测试类沾上:
package test; import index.IndexUtil; import org.junit.Test; public class MyTest { @Test public void testIndex() throws Exception{ IndexUtil util = new IndexUtil(); util.index(); } @Test public void testQuery() throws Exception{ IndexUtil util = new IndexUtil(); util.query(); } @Test public void testDelete() throws Exception{ IndexUtil util = new IndexUtil(); util.delete(); } @Test public void testUnDelete() throws Exception{ IndexUtil util = new IndexUtil(); util.undelete(); } @Test public void testForceDelete() throws Exception{ IndexUtil util = new IndexUtil(); util.forceDelete(); } @Test public void testMerge() throws Exception{ IndexUtil util = new IndexUtil(); util.merge(); } @Test public void testUpdate() throws Exception{ IndexUtil util = new IndexUtil(); util.update(); } @Test public void testSearch() throws Exception{ IndexUtil util = new IndexUtil(); util.search(); } }
今天看到了一个好用的工具luke(每个版本的lucene都会有这么一个工具。用来查看二进制的。非常不错)
标签:Lucene c style class blog code
原文地址:http://www.cnblogs.com/mrgong/p/3764776.html