标签:Lucene style blog http java color
package org.itat.test; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * (1)导入源代码的能力一定要会 * @author Administrator * 时间:20140620 * */ public class HelloLucene { /** * (1)建立索引,注意,这样创建的索引是增量索引 */ public void index(){ IndexWriter writer = null; try { // (1)创建Directory new RAMDirectory();建立在内存中 // Directory directory = new RAMDirectory(); Directory directory = FSDirectory.open(new File("D:/lucene/index01"));//意为将索引创建在硬盘上 // (2)创建IndexWriter,通过它来写索引 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); writer = new IndexWriter(directory, iwc); // IndexWriter用完之后必须关闭 // (3)创建Document对象,大小,路径、内容等信息作为Field存在在Document里面 Document document = null; // (4)为Document文档添加Field File f = new File("D:\\lucene\\example"); for(File file : f.listFiles()){ document = new Document(); // 先存储路径、名字、内容 document.add(new Field("content", new FileReader(file))); // Field.Store.YES是否把这个文件的全名存储到硬盘中 // Field.Index.NOT_ANALYZED没有必要分词 document.add(new Field("file_name", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // (5)通过IndexWriter添加文档到索引中 writer.addDocument(document); } } catch (IOException e) { e.printStackTrace(); }finally{ if(writer!=null){ try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } } /** * (2)搜索 */ public void searcher(){ Directory directory = null; IndexReader reader = null; try { // (1)创建Directory,要去什么地方搜索 // 说明:要在建立索引的地方进行搜索 directory = FSDirectory.open(new File("D:\\lucene\\index01"));// 创建在硬盘上 // (2)创建IndexReader,通过它来读取索引 reader = IndexReader.open(directory); // (3)根据IndexReader来创建IndexSearcher IndexSearcher searcher = new IndexSearcher(reader); // (4)创建搜索的Query // 创建parser来确定要搜索文件的内容,创建搜索的域,创建索引时设置的值 // 表示在“content”域里面查找 QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); // 搜索内容里面包含"java"的文档 // 创建query,表示搜索域content中包含"java"的文档 Query query = parser.parse("England"); // 执行搜索 // (5)根据Searcher搜索并返回TopDocs TopDocs tds = searcher.search(query, 10); // (6)根据TopDocs获取ScoreDoc对象 ScoreDoc[] sds =tds.scoreDocs; //这是获取一个属性 for(ScoreDoc sd :sds ){ // (7)根据Searcher和ScoreDoc对象获取具体的Document对象 System.out.println("sd.doc " + sd.doc); Document document = searcher.doc(sd.doc); // (8)根据Document对象获取需要的值 System.out.println(document.get("file_name")+ "][ " + document.get("path")); } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally{ // (9)关闭Reader if(reader!=null){ try { reader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } }
测试类的代码:
package org.itat.test; import org.junit.Test; public class TestHelloLucene { @Test public void testIndex(){ HelloLucene hl = new HelloLucene(); hl.index(); } @Test public void testSearcher(){ HelloLucene hl = new HelloLucene(); hl.searcher(); // 0 1 2 3 // 4 5 6 7 // 8 9 10 11 } }
标签:Lucene style blog http java color
原文地址:http://www.cnblogs.com/liwei1419/p/3798435.html