标签:
package com.fxr.test2;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFileIndex {
/**
* ??????ж????????????
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String dataDir = "f:/lucenedata";
String indexDir = "f:/ChinesLuceneIndex";
File [] files = new File(dataDir).listFiles();
System.out.println(files.length);
Analyzer analyzer = new PaodingAnalyzer();
Directory directory = FSDirectory.getDirectory(indexDir);
IndexWriter indexWriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.LIMITED);
for(int i=0;i<files.length;i++){
StringBuffer strBuffer = new StringBuffer();
String line = "";
FileInputStream is = new FileInputStream(files[i].getCanonicalFile());
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
System.out.println(files[i].getName());
line = reader.readLine();
while(line != null){
strBuffer.append(line);
strBuffer.append("\n");
line = reader.readLine();
}
Document document = new Document();
document.add(new Field("fileName",files[i].getName(),Field.Store.YES,Field.Index.ANALYZED));
document.add(new Field("contents",strBuffer.toString(),Field.Store.YES,Field.Index.ANALYZED));
indexWriter.addDocument(document);
is.close();
reader.close();
System.out.println(strBuffer.toString());
}
indexWriter.close();
directory.close();
}
}
package com.fxr.test2;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFileSearch {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String indexDir = "f:/ChinesLuceneIndex";
Directory directory = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(directory);
ScoreDoc [] hits = null;
Term term = new Term("contents","中国");
TermQuery query = new TermQuery(term);
TopDocs topDocs = indexSearcher.search(query, 100);
hits = topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc = indexSearcher.doc(hits[i].doc);
System.out.print(hits[i].score+" ");
System.out.println(doc.get("fileName")+" ");
System.out.println(doc.get("contents")+" ");
}
indexSearcher.close();
directory.close();
}
}
package com.fxr.test2;
import java.io.IOException;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestIndex {
/**
* 中文分词的效果
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String [] ids = {"1","2","3","4"};
String [] names = {"张三","李逵","zhangsan","zhangsun"};
String [] addresses = {"居住在北京","居住在南京","北京海淀","nanning"};
String [] birthdays = {"19820720","19840203","19770409","19830130"};
Analyzer analyzer = new PaodingAnalyzer();
String indexDir = "f:/ChinesLuceneIndex";
Directory directory = FSDirectory.getDirectory(indexDir);
//true表示创建或者覆盖当前的索引,FALSE表示当前的索引进行追加
IndexWriter indexWriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.LIMITED);
for(int i=0;i<ids.length;i++){
Document document = new Document();
document.add(new Field("id",ids[i],Field.Store.YES,Field.Index.ANALYZED));
document.add(new Field("name",names[i],Field.Store.YES,Field.Index.ANALYZED));
document.add(new Field("address",addresses[i],Field.Store.YES,Field.Index.ANALYZED));
document.add(new Field("birthday",birthdays[i],Field.Store.YES,Field.Index.ANALYZED));
indexWriter.addDocument(document);
}
indexWriter.optimize();
indexWriter.close();
System.out.println("ok!");
}
}
============================================
package com.fxr.test2;
import java.io.IOException;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestQueryParser {
/**
* @param args
* @throws IOException
* @throws ParseException
*/
public static void main(String[] args) throws IOException, ParseException {
String indexDir = "f:/ChinesLuceneIndex";
Analyzer analyzer = new PaodingAnalyzer();
Directory directory = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(directory);
ScoreDoc [] hits = null;
QueryParser parser = new QueryParser("address",analyzer);
Query query = parser.parse("(海淀 OR 居住)AND北京");
TopDocCollector topdoc = new TopDocCollector(100);
indexSearcher.search(query,topdoc);
hits = topdoc.topDocs().scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=indexSearcher.doc(hits[i].doc);
System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
indexSearcher.close();
directory.close();
}
}
==========================================
package com.fxr.test2;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestSearch {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String indexDir = "f:/ChinesLuceneIndex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher indexSearcher = new IndexSearcher(dir);
ScoreDoc [] hits = null;
Term term = new Term("address","*海*");
//TermQuery termQuery = new TermQuery(term);
//PrefixQuery prefixQuery = new PrefixQuery(term);
WildcardQuery wildcardQuery = new WildcardQuery(term);
TopDocs topDocs = indexSearcher.search(wildcardQuery, 100);
hits = topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc = indexSearcher.doc(hits[i].doc);
System.out.print(hits[i].score);
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
indexSearcher.close();
dir.close();
}
}
标签:
原文地址:http://www.cnblogs.com/aicpcode/p/4303247.html