标签:Lucene blog http java get 使用
使用Lucene来搜索内容,搜索结果的显示顺序当然是比较重要的.Lucene中Build-in的几个排序定义在大多数情况下是不适合我们使用的.要适合自己的应用程序的场景,就只能自定义排序功能,本节我们就来看看在Lucene中如何实现自定义排序功能.
Lucene中的自定义排序功能和Java集合中的自定义排序的实现方法差不多,都要实现一下比较接口.
在Java中只要实现Comparable接口就可以了.但是在Lucene中要实现SortComparatorSource接口和
ScoreDocComparator接口.在了解具体实现方法之前先来看看这两个接口的定义吧.
SortComparatorSource接口的功能是返回一个用来排序ScoreDocs的comparator(Expert: returns a comparator for sorting ScoreDocs).该接口只定义了一个方法.如下:
- public ScoreDocComparator newComparator(IndexReader reader,String fieldname) throws IOException
-
- public ScoreDocComparator newComparator(IndexReader reader,String fieldname) throws IOException
该方法只是创造一个ScoreDocComparator
实例用来实现排序.所以我们还要实现ScoreDocComparator 接口.来看看ScoreDocComparator
接口.功能是比较来两个ScoreDoc 对象来排序(Compares two ScoreDoc objects for sorting)
里面定义了两个Lucene实现的静态实例.如下:
-
- public static final ScoreDocComparator RELEVANCE;
-
- public static final ScoreDocComparator INDEXORDER;
-
- public static final ScoreDocComparator RELEVANCE;
-
- public static final ScoreDocComparator INDEXORDER;
-
有3个方法与排序相关,需要我们实现 分别如下:
-
- public int compare(ScoreDoc i,ScoreDoc j);
-
- public Comparable sortValue(ScoreDoc i);
-
- public int sortType();
-
-
- public int compare(ScoreDoc i,ScoreDoc j);
-
-
- public Comparable sortValue(ScoreDoc i);
-
-
- public int sortType();
看个例子吧!
该例子为Lucene in Action中的一个实现,用来搜索距你最近的餐馆的名字. 餐馆坐标用字符串"x,y"来存储.
-
- package com.nikee.lucene;
-
- import java.io.IOException;
-
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.index.TermDocs;
- import org.apache.lucene.index.TermEnum;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.ScoreDocComparator;
- import org.apache.lucene.search.SortComparatorSource;
- import org.apache.lucene.search.SortField;
-
- public class DistanceComparatorSource implements SortComparatorSource {
- private static final long serialVersionUID = 1L;
-
-
- private int x;
- private int y;
-
- public DistanceComparatorSource(int x, int y) {
- this.x = x;
- this.y = y;
- }
-
-
- public ScoreDocComparator newComparator(IndexReader reader, String fieldname) throws IOException {
- return new DistanceScoreDocLookupComparator(reader, fieldname, x, y);
- }
-
-
- private static class DistanceScoreDocLookupComparator implements ScoreDocComparator {
- private float[] distances;
-
-
- public DistanceScoreDocLookupComparator(IndexReader reader, String fieldname, int x, int y) throws IOException {
- System.out.println("fieldName2="+fieldname);
- final TermEnum enumerator = reader.terms(new Term(fieldname, ""));
-
- System.out.println("maxDoc="+reader.maxDoc());
- distances = new float[reader.maxDoc()];
- if (distances.length > 0) {
- TermDocs termDocs = reader.termDocs();
- try {
- if (enumerator.term() == null) {
- throw new RuntimeException("no terms in field " + fieldname);
- }
- int i = 0,j = 0;
- do {
- System.out.println("in do-while :" + i ++);
- Term term = enumerator.term();
- if (term.field() != fieldname)
- break;
-
-
-
- termDocs.seek(enumerator);
- while (termDocs.next()) {
- System.out.println(" in while :" + j ++);
- System.out.println(" in while ,Term :" + term.toString());
-
- String[] xy = term.text().split(",");
- int deltax = Integer.parseInt(xy[0]) - x;
- int deltay = Integer.parseInt(xy[1]) - y;
-
- distances[termDocs.doc()] = (float) Math.sqrt(deltax * deltax + deltay * deltay);
- }
- }
- while (enumerator.next());
- } finally {
- termDocs.close();
- }
- }
- }
-
-
- public int compare(ScoreDoc i, ScoreDoc j) {
- if (distances[i.doc] < distances[j.doc])
- return -1;
- if (distances[i.doc] > distances[j.doc])
- return 1;
- return 0;
- }
-
-
- public Comparable sortValue(ScoreDoc i) {
- return new Float(distances[i.doc]);
- }
-
-
- public int sortType() {
- return SortField.FLOAT;
- }
- }
-
- public String toString() {
- return "Distance from (" + x + "," + y + ")";
- }
- }
-
- package com.nikee.lucene;
-
- import java.io.IOException;
-
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.index.TermDocs;
- import org.apache.lucene.index.TermEnum;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.ScoreDocComparator;
- import org.apache.lucene.search.SortComparatorSource;
- import org.apache.lucene.search.SortField;
-
- public class DistanceComparatorSource implements SortComparatorSource {
- private static final long serialVersionUID = 1L;
-
-
- private int x;
- private int y;
-
- public DistanceComparatorSource(int x, int y) {
- this.x = x;
- this.y = y;
- }
-
-
- public ScoreDocComparator newComparator(IndexReader reader, String fieldname) throws IOException {
- return new DistanceScoreDocLookupComparator(reader, fieldname, x, y);
- }
-
-
- private static class DistanceScoreDocLookupComparator implements ScoreDocComparator {
- private float[] distances;
-
-
- public DistanceScoreDocLookupComparator(IndexReader reader, String fieldname, int x, int y) throws IOException {
- System.out.println("fieldName2="+fieldname);
- final TermEnum enumerator = reader.terms(new Term(fieldname, ""));
-
- System.out.println("maxDoc="+reader.maxDoc());
- distances = new float[reader.maxDoc()];
- if (distances.length > 0) {
- TermDocs termDocs = reader.termDocs();
- try {
- if (enumerator.term() == null) {
- throw new RuntimeException("no terms in field " + fieldname);
- }
- int i = 0,j = 0;
- do {
- System.out.println("in do-while :" + i ++);
- Term term = enumerator.term();
- if (term.field() != fieldname)
- break;
-
-
-
- termDocs.seek(enumerator);
- while (termDocs.next()) {
- System.out.println(" in while :" + j ++);
- System.out.println(" in while ,Term :" + term.toString());
-
- String[] xy = term.text().split(",");
- int deltax = Integer.parseInt(xy[0]) - x;
- int deltay = Integer.parseInt(xy[1]) - y;
-
- distances[termDocs.doc()] = (float) Math.sqrt(deltax * deltax + deltay * deltay);
- }
- }
- while (enumerator.next());
- } finally {
- termDocs.close();
- }
- }
- }
-
-
- public int compare(ScoreDoc i, ScoreDoc j) {
- if (distances[i.doc] < distances[j.doc])
- return -1;
- if (distances[i.doc] > distances[j.doc])
- return 1;
- return 0;
- }
-
-
- public Comparable sortValue(ScoreDoc i) {
- return new Float(distances[i.doc]);
- }
-
-
- public int sortType() {
- return SortField.FLOAT;
- }
- }
-
- public String toString() {
- return "Distance from (" + x + "," + y + ")";
- }
- }
这是一个实现了上面两个接口的两个类, 里面带有详细注释, 可以看出 自定义排序并不是很难的. 该实现能否正确实现,我们来看看测试代码能否通过吧.
-
- package com.nikee.lucene.test;
-
- import java.io.IOException;
-
- import junit.framework.TestCase;
-
- import org.apache.lucene.analysis.WhitespaceAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.FieldDoc;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TermQuery;
- import org.apache.lucene.search.TopFieldDocs;
- import org.apache.lucene.store.RAMDirectory;
-
- import com.nikee.lucene.DistanceComparatorSource;
-
- public class DistanceComparatorSourceTest extends TestCase {
- private RAMDirectory directory;
-
- private IndexSearcher searcher;
- private Query query;
-
-
- protected void setUp() throws Exception {
- directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
-
- addPoint(writer, "El Charro", "restaurant", 1, 2);
- addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9);
- addPoint(writer, "Los Betos", "restaurant", 9, 6);
- addPoint(writer, "Nico‘s Taco Shop", "restaurant", 3, 8);
-
- writer.close();
- searcher = new IndexSearcher(directory);
- query = new TermQuery(new Term("type", "restaurant"));
- }
-
- private void addPoint(IndexWriter writer, String name, String type, int x, int y) throws IOException {
- Document doc = new Document();
- doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));
- doc.add(new Field("type", type, Field.Store.YES, Field.Index.TOKENIZED));
- doc.add(new Field("location", x + "," + y, Field.Store.YES, Field.Index.UN_TOKENIZED));
- writer.addDocument(doc);
- }
-
- public void testNearestRestaurantToHome() throws Exception {
-
- Sort sort = new Sort(new SortField("location", new DistanceComparatorSource(0, 0)));
- Hits hits = searcher.search(query, sort);
-
-
- assertEquals("closest", "El Charro", hits.doc(0).get("name"));
- assertEquals("furthest", "Los Betos", hits.doc(3).get("name"));
- }
-
- public void testNeareastRestaurantToWork() throws Exception {
- Sort sort = new Sort(new SortField("location", new DistanceComparatorSource(10, 10)));
-
-
- TopFieldDocs docs = searcher.search(query, null, 3, sort);
-
- assertEquals(4, docs.totalHits);
- assertEquals(3, docs.scoreDocs.length);
-
-
- FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[0];
-
- assertEquals("(10,10) -> (9,6) = sqrt(17)", new Float(Math.sqrt(17)), fieldDoc.fields[0]);
- Document document = searcher.doc(fieldDoc.doc);
- assertEquals("Los Betos", document.get("name"));
- dumpDocs(sort, docs);
- }
-
-
- private void dumpDocs(Sort sort, TopFieldDocs docs) throws IOException {
- System.out.println("Sorted by: " + sort);
- ScoreDoc[] scoreDocs = docs.scoreDocs;
- for (int i = 0; i < scoreDocs.length; i++) {
- FieldDoc fieldDoc = (FieldDoc) scoreDocs[i];
- Float distance = (Float) fieldDoc.fields[0];
- Document doc = searcher.doc(fieldDoc.doc);
- System.out.println(" " + doc.get("name") + " @ (" + doc.get("location") + ") -> " + distance);
- }
- }
- }
-
- package com.nikee.lucene.test;
-
- import java.io.IOException;
-
- import junit.framework.TestCase;
-
- import org.apache.lucene.analysis.WhitespaceAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.FieldDoc;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TermQuery;
- import org.apache.lucene.search.TopFieldDocs;
- import org.apache.lucene.store.RAMDirectory;
-
- import com.nikee.lucene.DistanceComparatorSource;
-
- public class DistanceComparatorSourceTest extends TestCase {
- private RAMDirectory directory;
-
- private IndexSearcher searcher;
- private Query query;
-
-
- protected void setUp() throws Exception {
- directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
-
- addPoint(writer, "El Charro", "restaurant", 1, 2);
- addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9);
- addPoint(writer, "Los Betos", "restaurant", 9, 6);
- addPoint(writer, "Nico‘s Taco Shop", "restaurant", 3, 8);
-
- writer.close();
- searcher = new IndexSearcher(directory);
- query = new TermQuery(new Term("type", "restaurant"));
- }
-
- private void addPoint(IndexWriter writer, String name, String type, int x, int y) throws IOException {
- Document doc = new Document();
- doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));
- doc.add(new Field("type", type, Field.Store.YES, Field.Index.TOKENIZED));
- doc.add(new Field("location", x + "," + y, Field.Store.YES, Field.Index.UN_TOKENIZED));
- writer.addDocument(doc);
- }
-
- public void testNearestRestaurantToHome() throws Exception {
-
- Sort sort = new Sort(new SortField("location", new DistanceComparatorSource(0, 0)));
- Hits hits = searcher.search(query, sort);
-
-
- assertEquals("closest", "El Charro", hits.doc(0).get("name"));
- assertEquals("furthest", "Los Betos", hits.doc(3).get("name"));
- }
-
- public void testNeareastRestaurantToWork() throws Exception {
- Sort sort = new Sort(new SortField("location", new DistanceComparatorSource(10, 10)));
-
-
- TopFieldDocs docs = searcher.search(query, null, 3, sort);
-
- assertEquals(4, docs.totalHits);
- assertEquals(3, docs.scoreDocs.length);
-
-
- FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[0];
-
- assertEquals("(10,10) -> (9,6) = sqrt(17)", new Float(Math.sqrt(17)), fieldDoc.fields[0]);
- Document document = searcher.doc(fieldDoc.doc);
- assertEquals("Los Betos", document.get("name"));
- dumpDocs(sort, docs);
- }
-
-
- private void dumpDocs(Sort sort, TopFieldDocs docs) throws IOException {
- System.out.println("Sorted by: " + sort);
- ScoreDoc[] scoreDocs = docs.scoreDocs;
- for (int i = 0; i < scoreDocs.length; i++) {
- FieldDoc fieldDoc = (FieldDoc) scoreDocs[i];
- Float distance = (Float) fieldDoc.fields[0];
- Document doc = searcher.doc(fieldDoc.doc);
- System.out.println(" " + doc.get("name") + " @ (" + doc.get("location") + ") -> " + distance);
- }
- }
- }
转载 http://zhxmyself.iteye.com/blog/478638
Lucene 中自定义排序的实现,布布扣,bubuko.com
Lucene 中自定义排序的实现
标签:Lucene blog http java get 使用
原文地址:http://www.cnblogs.com/chenying99/p/3814019.html