标签:
内容就是标题了。是要重构下上一节的代码,大体上按如下的思路:
两个工具类StringUtils和TxtUtils。
StringUtils,主要是获取当前系统的换行符:
package com.zhyea.util; public class StringUtils { public static final String NEWLINE = System.getProperty("line.separator"); }
TxtUtils,主要是读取txt文件,这里使用了一个自定义类FileCharsetDetector,可以点击这个超链接:
package com.zhyea.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; /** * txt文件处理工具类 * * @author robin * */ public class TxtUtils { /** * 检查txt文件编码格式 * * @param file * txt文件对象 * @return * @throws IOException */ public static String checkEncode(File file) throws IOException { String encode = FileCharsetDetector.checkEncoding(file); return (encode.equals("windows-1252") ? "Unicode" : encode); } /** * 读取txt文件内容 * * @param file * Txt文件对象 * @return * @throws IOException */ public static String readTxt(File file) throws IOException { BufferedReader reader = null; try { String encode = checkEncode(file); reader = new BufferedReader(new InputStreamReader( new FileInputStream(file), encode)); StringBuilder builder = new StringBuilder(); String content = null; while (null != (content = reader.readLine())) { builder.append(content).append(StringUtils.NEWLINE); } return builder.toString(); } finally { reader.close(); } } }
然后是拆分后的Lucene操作类:
package com.zhyea.doggie; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.zhyea.util.TxtUtils; public class DoggieLucene { /** * 分词器 */ private static Analyzer analyzer; /** * 创建分词器实例 * * @param clazz * 创建分词器使用的类 * @return * @throws InstantiationException * @throws IllegalAccessException */ public static Analyzer createAnalyzer(Class<?> clazz) throws InstantiationException, IllegalAccessException { if (null != analyzer && analyzer.getClass().equals(clazz)) { return analyzer; } return analyzer = (Analyzer) clazz.newInstance(); } /** * 创建索引写出器 * * @param analyzer * 分词器 * @param indexPath * 索引存储路径 * @return * @throws IOException */ public static IndexWriter createIndexWriter(Analyzer analyzer, String indexPath) throws IOException { // 创建索引存储目录 Directory dir = FSDirectory.open(new File(indexPath)); // 创建索引写入器配置 IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); // 创建索引写入器 return new IndexWriter(dir, config); } /** * 写入索引,索引文件为本地文本文件 * * @param writer * 索引写出器 * @param localDocPath * 本地文本文件存储地址 * @throws IOException */ public static void addLocalDocument(IndexWriter writer, String localDocPath) throws IOException { File directory = new File(localDocPath); for (File tmp : directory.listFiles()) { Document doc = new Document(); doc.add(new StringField("path", tmp.getCanonicalPath(), Field.Store.YES)); doc.add(new TextField("content", TxtUtils.readTxt(tmp), Field.Store.YES)); writer.addDocument(doc); writer.commit(); } } /** * 创建索引写入器 * * @param indexPath * 索引存储路径 * @return * @throws IOException */ public static IndexReader createIndexReader(String indexPath) throws IOException { return DirectoryReader.open(FSDirectory.open(new File(indexPath))); } /** * 创建索引搜索器 * * @param reader * 索引写入器 * @return */ public static IndexSearcher createIndexSearcher(IndexReader reader) { return new IndexSearcher(reader); } /** * 执行搜索 * * @param searcher * 搜索器 * @param target * 搜索对象 * @return * @throws IOException */ public static TopDocs executeSearch(IndexSearcher searcher, Query query) throws IOException { return searcher.search(query, 10000); } /** * 展示查询结果 * * @param docs * 查询结果文档 * @throws IOException */ public static void showResult(TopDocs docs, IndexReader reader) throws IOException { Document doc = null; for (ScoreDoc tmp : docs.scoreDocs) { doc = reader.document(tmp.doc); System.out.println(tmp.score + " " + doc.get("path")); // System.out.println(doc.getField("path").stringValue()); } } }
拆的比较琐碎了,凑合看吧。
创建索引的类:
package com.zhyea.doggie; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.index.IndexWriter; import com.zhyea.util.FileUtil; public class IndexTest{ String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index"; String docPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\docs"; public static void main(String[] args){ try{ new IndexTest().createIndex(); }catch(Exception e){ e.printStackTrace(); } } /** * 创建索引 * @throws IOException * @throws InstantiationException * @throws IllegalAccessException */ private void createIndex() throws IOException, InstantiationException, IllegalAccessException{ IndexWriter writer = null; try{ Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class); writer = DoggieLucene.createIndexWriter(analyzer, indexPath); DoggieLucene.addLocalDocument(writer, docPath); }finally{ if(null!=writer)writer.close(); } } }
执行搜索的类:
package com.zhyea.doggie; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; public class SearchTest { String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index"; public static void main(String[] args){ try{ new SearchTest().executeSearch(); }catch(Exception e){ e.printStackTrace(); } } public void executeSearch() throws ParseException, IOException, InstantiationException, IllegalAccessException{ IndexReader reader = null; try{ reader = DoggieLucene.createIndexReader(indexPath); IndexSearcher searcher = DoggieLucene.createIndexSearcher(reader); Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class); Query query = new QueryParser("content", analyzer).parse("杨过"); TopDocs docs = DoggieLucene.executeSearch(searcher, query); DoggieLucene.showResult(docs, reader); }finally{ if(null!=reader)reader.close(); } } }
OK。
标签:
原文地址:http://www.cnblogs.com/amunote/p/4178542.html