标签:
package com.lin.util; import java.io.File; import java.io.FileFilter; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class Indexer { private IndexWriter writer; /** * 主程序 * * @param indexDir * 索引位置 * @param dataDir * 数据来源 * @throws Exception */ public static void index(String indexDir, String dataDir) throws Exception { if (indexDir == null || dataDir == null) { throw new IllegalArgumentException("请检查你的参数是否正确"); } long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } /** * 初始化writer(用与建立索引) * * @param indexDir * @throws IOException */ private Indexer(String indexDir) throws IOException { Directory dir = FSDirectory.open(new File(indexDir)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_2, new IKAnalyzer()); writer = new IndexWriter(dir, config); } /** * * @param dataDir * @param filter * @return 索引的文件数 * @throws IOException */ public int index(String dataDir, FileFilter filter) throws IOException { File[] files = new File(dataDir).listFiles(); for (File f : files) { if (!f.isDirectory() && !f.isHidden() && f.canRead() && f.exists() && (filter == null || filter.accept(f))) { indexFile(f); } } return writer.numDocs(); } private void indexFile(File f) throws IOException { System.out.println("indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } @SuppressWarnings("deprecation") protected Document getDocument(File f) throws IOException { Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; } /** * 文件过滤器 * * @author zan * */ private static class TextFilesFilter implements FileFilter { public boolean accept(File f) { return f.getName().toLowerCase().endsWith(".txt"); } } public void close() throws IOException { if (writer != null) { writer.close(); } } public static void main(String[] args) throws Exception { Indexer.index("d:\\index", "D:\\Program Files\\TortoiseSVN"); } }
标签:
原文地址:http://blog.csdn.net/hackcoder/article/details/42080877