标签:
1、下载Lucene开发包,请到:http://lucene.apache.org/
2、在myeclipse环境部署该开发包:
3、代码编写:
package Lucene; import java.io.File; import java.io.FileFilter; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 建立索引 * @author Administrator * */ public class Indexer { /** * @param args */ public static void main(String[] args) throws Exception{ String indexDir = "E:\\index";///在指定目录创建索引文件夹 String dataDir = "E:\\dataSource";///对指定目录中的“.txt”文件进行索引 long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try{ numIndexed = indexer.index(dataDir, new TextFilesFilter()); }finally{ indexer.close(); } long end = System.currentTimeMillis(); System.out.println("索引 "+ numIndexed + " 文件花费 "+ (end - start) + "ms"); } private IndexWriter writer; //创建Lucene Index Writer public Indexer(String indexDir)throws IOException{ Directory dir = FSDirectory.open(new File(indexDir)); /* * Version.LUCENE_30:是版本号参数,Lucene会根据输入的版本值, * 针对该值对应的版本进行环境和行为匹配 */ writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); } //关闭Index Writer public void close()throws IOException{ writer.close(); } //返回被索引文档文档数 public int index(String dataDir, FileFilter filter)throws Exception{ File[] files = new File(dataDir).listFiles(); for(File f:files){ if(!f.isDirectory() && !f.isHidden()&& f.exists()&& f.canRead()&& (filter == null || filter.accept(f))){ indexFile(f); } } return writer.numDocs(); } //只索引.txt文件,采用FileFilter private static class TextFilesFilter implements FileFilter{ @Override public boolean accept(File pathname) { // TODO Auto-generated method stub return pathname.getName().toLowerCase().endsWith(".txt"); } } protected Document getDocument(File f) throws Exception{ Document doc = new Document(); doc.add(new Field("contents", new FileReader(f)));//索引文件内容 doc.add(new Field("filename", f.getName(),//索引文件名 Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("fullpath", f.getCanonicalPath(),//索引文件完整路径 Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; } //向Lucene索引中添加文档 private void indexFile(File f) throws Exception{ System.out.println("Indexing "+f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } }
这时编译运行代码,如果没出错的话,会出现下面的结果:
Indexing E:\dataSource\1.txt
Indexing E:\dataSource\2.txt
Indexing E:\dataSource\3.txt
Indexing E:\dataSource\4.txt
索引 4 文件花费 259ms
参考:http://biancheng.dnbcw.info/1000wen/448393.html
标签:
原文地址:http://www.cnblogs.com/milantgh/p/4714019.html