标签:apache java lucene search content
一 DirectoryProvider 提供索引的存储方式
package com; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; public class DirectoryProvider { public static Directory createRAMDirectory(){ return new RAMDirectory();// 使用内存存储索引 } public static Directory createFSDirectory(String filePath){ Path path = Paths.get(filePath); try { return FSDirectory.open(path);// 使用磁盘使用索引 } catch (IOException e) { throw new RuntimeException(e); } } }
二 DocumentLoader创建文档
package com; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; public class DocumentLoader { private static DocumentLoader documentLoader = new DocumentLoader(); private DocumentLoader(){} public static DocumentLoader getInstance(){ return documentLoader; } public List<Document> loadDefaultDocuments(){ String filePath = "D:\\"; File dir = new File(filePath); File[] files = dir.listFiles(); List<Document> documents = new ArrayList<Document>(); for(File file : files){ String filename = file.getName(); String content = readFileContent(file); Document document = new Document(); document.add(new TextField("filename",filename, Field.Store.YES)); document.add(new TextField("content",content, Field.Store.YES)); documents.add(document); } return documents; } public String readFileContent(File file) { StringBuilder sb = new StringBuilder(); BufferedReader br = null; try { br = new BufferedReader(new FileReader(file)); String line = null; while((line=br.readLine())!=null){ sb.append(line).append("\r\n"); } } catch (Exception e) { throw new RuntimeException(e); }finally{ if(br!=null){ try { br.close(); } catch (IOException e) {} } } return sb.toString(); } }
三 IndexCreator 创建索引
package com; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; public class IndexCreator { public static Directory createIndex(Analyzer analyzer,Directory indexDirectory,List<Document> documents) throws Exception { IndexWriterConfig config = new IndexWriterConfig(analyzer); // config.setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDirectory, config); for(Document document:documents){ indexWriter.addDocument(document); } indexWriter.close(); return indexDirectory; } }
四 Searcher 查询器,查询指定文档
package com; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.store.Directory; public class Searcher { private StandardAnalyzer analyzer; private Directory indexDirectory; public Searcher(StandardAnalyzer analyzer, Directory indexDirectory) { this.analyzer = analyzer; this.indexDirectory = indexDirectory; } public List<Document> query(String queryKey,String queryValue) throws Exception { // do search DirectoryReader directoryReader = DirectoryReader.open(indexDirectory); IndexSearcher isearcher = new IndexSearcher(directoryReader); QueryParser parser = new QueryParser(queryKey, analyzer); Query query = parser.parse(queryValue); ScoreDoc[] hits = isearcher.search(query, 1000).scoreDocs; List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document doc = isearcher.doc(docId); docs.add(doc); } directoryReader.close(); return docs; } }
五 Main 测试
package com; import java.io.IOException; import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; public class Main { /** * @param args * @throws IOException */ public static void main(String[] args) throws Exception { StandardAnalyzer analyzer = new StandardAnalyzer(); Directory indexDirectory = DirectoryProvider.createRAMDirectory(); // Directory indexDirectory = DirectoryProvider.createFSDirectory("E:\\mine\\j2ee\\Lucene\\src\\indexes"); List<Document> loadedDocuments = DocumentLoader.getInstance().loadDefaultDocuments(); IndexCreator.createIndex(analyzer,indexDirectory,loadedDocuments); Searcher searcher = new Searcher(analyzer,indexDirectory); List<Document> res = searcher.query("filename", "artist"); printResult(res); } private static void printResult(List<Document> documents){ System.out.println("found "+documents.size()); for(Document document : documents){ String filename = document.get("filename"); String content = document.get("content"); System.out.println("filename:"+filename); System.out.println("content:\n"+content); } } }
标签:apache java lucene search content
原文地址:http://antlove.blog.51cto.com/10057557/1662434