码迷,mamicode.com
首页 > Web开发 > 详细

Lucene3.5自定义评分以及根据域进行自定义评分设定

时间:2015-03-29 15:11:29      阅读:173      评论:0      收藏:0      [点我收藏+]

标签:

一、首先来综述一下Lucene自定义评分的步骤:
1、创建一个评分域
FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);
2、根据评分域和原有的query创建自定义的query对象
MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);
@SuppressWarnings("serial")
	private class MyCustomScoreQuery extends CustomScoreQuery {

		public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {
			super(subQuery, valSrcQuery);
			
		}
		
		@Override
		protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)
				throws IOException {
			//默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终评分的
			//为了根据不同的需求进行评分,需要自己进行评分的设定
			/**
			 * 自定评分的步骤
			 * 创建一个类继承于CustomScoreProvider
			 * 覆盖customScore方法
			 */
			//return super.getCustomScoreProvider(reader);
			return new MyCustomScoreProvider(reader);
		}
	}



3、创建一个类继承于CustomScoreProvider,覆盖customScore方法
private class MyCustomScoreProvider extends CustomScoreProvider {

		public MyCustomScoreProvider(IndexReader reader) {
			super(reader);
		}
        /**
         * subQueryScore表示默认文档的打分
         * valSrcScore表示评分域的打分
         */
		@Override
		public float customScore(int doc, float subQueryScore, float valSrcScore)
				throws IOException {
			//return super.customScore(doc, subQueryScore, valSrcScore);
			return subQueryScore/valSrcScore;
		}
		
	}

二、根据域进行自定义评分设定
1、根据文件后缀名进行自定义评分
private class FilenameScoreQuery extends CustomScoreQuery {

		public FilenameScoreQuery(Query subQuery) {
			super(subQuery);
		}

		@Override
		protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)
				throws IOException {
			//return super.getCustomScoreProvider(reader);
			return new FilenameScoreProvider(reader);
		}
	}
	
	private class FilenameScoreProvider extends CustomScoreProvider {
		String [] filenames = null;
		public FilenameScoreProvider(IndexReader reader) {
			super(reader);
			try {
				filenames = FieldCache.DEFAULT.getStrings(reader, "filename");
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		@Override
		public float customScore(int doc, float subQueryScore, float valSrcScore)
				throws IOException {
			//如何根据doc获取相应的field的值
			/**
			 * 在reader没有关闭之前,所有的数据会存储在一个缓存域中,可以通过缓存获取很多有用的信息
			 * filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取所有的filename域的信息
			 */
			String filename = filenames[doc];
			if(filename.endsWith(".txt")||filename.endsWith(".ini")) {
				return subQueryScore*1.5f;
			}
			//return super.customScore(doc, subQueryScore, valSrcScore);
			return subQueryScore/1.5f;
		}
	}
2、根据日期进行自定义评分
private class DateScoreProvider extends CustomScoreProvider {
		long[] dates = null;
		public DateScoreProvider(IndexReader reader) {
			super(reader);
			try {
				dates = FieldCache.DEFAULT.getLongs(reader, "date");
			} catch (IOException e) {
				e.printStackTrace();
			}
				
		}
		
		@Override
		public float customScore(int doc, float subQueryScore, float valSrcScore)
				throws IOException {
			long date = dates[doc];
			long today = new Date().getTime();
			long year = 1000*60*60*365;
			if(today - date <= year) {
				//为其加分
			}
			
			return super.customScore(doc, subQueryScore, valSrcScore);
		}
		
	}

Lucene实现自定义评分的关键思想:
indexSearch.search中要传入一个CustomScoreQuery,要覆盖getCustomScoreProvider方法,并且要返回CustomScoreProvider 对象,在用匿名内部内的方式写一个CustomScoreProvider 覆盖customScore方法,这个方法有3个参数,第一个参数代表文档id,第二个参数代表原来评分,最后一个代表我们设置的评分域,然后我们就可以定义自己的一套评分算法为我们的搜索制定评分了。

完整代码如下:
1、工具类:
package com.dhb.util;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class FileIndexUtils {
	private static Directory directory = null;
	static {
		try {
			directory = FSDirectory.open(new File("D:/luceneData/files/"));
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public static Directory getDirectory() {
		return directory;
	}
	public static void index(boolean hasNew) {
		IndexWriter writer = null;
		try {
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, 
					new StandardAnalyzer(Version.LUCENE_35));
			writer = new IndexWriter(directory, iwc);
			//是否新建索引
			if(hasNew) {
				writer.deleteAll();
			}
			Document doc = null;
			File f = new File("D:/luceneData/example");
			
			Random rand = new Random();
			
			int index = 0;
			
			for (File file : f.listFiles()) {
				int score = rand.nextInt(600);  //测试自定义评分用的
				
				doc = new Document();
				//测试自定义Filter用的
				doc.add(new Field("id", String.valueOf(index++), Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
				
				doc.add(new Field("content", new FileReader(file)));
				doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
				doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length())));
				
				doc.add(new NumericField("score", Field.Store.YES, true).setIntValue(score));
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null)
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
		}
	}
}
注意:(先自己生成索引,我这里就没调用了,因为放在另一个地方了,没有贴了)
2、自定义类
package com.dhb.util;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.FieldScoreQuery.Type;
import org.apache.lucene.search.function.ValueSourceQuery;

public class MyScoreQuery {
	public void searchByScoreQuery() {
		try {
			IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory()));
		    Query q = new TermQuery(new Term("content", "java"));
		    //1、创建一个评分域
		    FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);
		    //2、根据评分域和原有的query创建自定义的query对象
		    MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);
		    
		    TopDocs tds = null;
		    tds = searcher.search(query, 100);
		    
		    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			for (ScoreDoc sd : tds.scoreDocs) {
				Document d = searcher.doc(sd.doc);
				System.out.println(sd.doc + ":(" + sd.score + ")["
						+ d.get("filename") + "【" + d.get("path") + "】---"
						+ d.get("size") + "----" + sdf.format(Long.valueOf(d.get("date")))+"自定义评分:"+d.get("score"));
			}
		    
		    searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public void searchByFileScoreQuery() {
		try {
			IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory()));
		    Query q = new TermQuery(new Term("content", "java"));
		    //1、创建一个评分域
		    //FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);
		    FilenameScoreQuery query = new FilenameScoreQuery(q);
		    
		    //2、根据评分域和原有的query创建自定义的query对象
		    //MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);
		    
		    TopDocs tds = null;
		    tds = searcher.search(query, 100);
		    
		    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
			for (ScoreDoc sd : tds.scoreDocs) {
				Document d = searcher.doc(sd.doc);
				System.out.println(sd.doc + ":(" + sd.score + ")["
						+ d.get("filename") + "【" + d.get("path") + "】---"
						+ d.get("size") + "----" + sdf.format(Long.valueOf(d.get("date")))+"自定义评分:"+d.get("score"));
			}
		    
		    searcher.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	@SuppressWarnings("serial")
	private class MyCustomScoreQuery extends CustomScoreQuery {

		public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {
			super(subQuery, valSrcQuery);
			
		}
		
		@Override
		protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)
				throws IOException {
			//默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终评分的
			//为了根据不同的需求进行评分,需要自己进行评分的设定
			/**
			 * 自定评分的步骤
			 * 创建一个类继承于CustomScoreProvider
			 * 覆盖customScore方法
			 */
			//return super.getCustomScoreProvider(reader);
			return new MyCustomScoreProvider(reader);
		}
	}
	
	private class MyCustomScoreProvider extends CustomScoreProvider {

		public MyCustomScoreProvider(IndexReader reader) {
			super(reader);
		}
        /**
         * subQueryScore表示默认文档的打分
         * valSrcScore表示评分域的打分
         */
		@Override
		public float customScore(int doc, float subQueryScore, float valSrcScore)
				throws IOException {
			//return super.customScore(doc, subQueryScore, valSrcScore);
			return subQueryScore/valSrcScore;
		}
		
	}
	
	@SuppressWarnings("serial")
	private class FilenameScoreQuery extends CustomScoreQuery {

		public FilenameScoreQuery(Query subQuery) {
			super(subQuery);
		}

		@Override
		protected CustomScoreProvider getCustomScoreProvider(IndexReader reader)
				throws IOException {
			//return super.getCustomScoreProvider(reader);
			return new FilenameScoreProvider(reader);
		}
	}
	
	private class FilenameScoreProvider extends CustomScoreProvider {
		String [] filenames = null;
		public FilenameScoreProvider(IndexReader reader) {
			super(reader);
			try {
				filenames = FieldCache.DEFAULT.getStrings(reader, "filename");
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		@Override
		public float customScore(int doc, float subQueryScore, float valSrcScore)
				throws IOException {
			//如何根据doc获取相应的field的值
			/**
			 * 在reader没有关闭之前,所有的数据会存储在一个缓存域中,可以通过缓存获取很多有用的信息
			 * filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取所有的filename域的信息
			 */
			String filename = filenames[doc];
			if(filename.endsWith(".txt")||filename.endsWith(".ini")) {
				return subQueryScore*1.5f;
			}
			//return super.customScore(doc, subQueryScore, valSrcScore);
			return subQueryScore/1.5f;
		}
	}
	@SuppressWarnings("unused")
	private class DateScoreProvider extends CustomScoreProvider {
		long[] dates = null;
		public DateScoreProvider(IndexReader reader) {
			super(reader);
			try {
				dates = FieldCache.DEFAULT.getLongs(reader, "date");
			} catch (IOException e) {
				e.printStackTrace();
			}
				
		}
		
		@Override
		public float customScore(int doc, float subQueryScore, float valSrcScore)
				throws IOException {
			long date = dates[doc];
			long today = new Date().getTime();
			long year = 1000*60*60*365;
			if(today - date <= year) {
				//为其加分
			}
			
			return super.customScore(doc, subQueryScore, valSrcScore);
		}
		
	}
}
3、测试类
package com.dhb.test;

import org.junit.Test;

import com.dhb.util.MyScoreQuery;

public class TestCustomScore {
	@Test
	public void test01() {
		MyScoreQuery msq = new MyScoreQuery();
		msq.searchByScoreQuery();
	}
	@Test
	public void test02() {
		MyScoreQuery msq = new MyScoreQuery();
		msq.searchByFileScoreQuery();
	}
}








Lucene3.5自定义评分以及根据域进行自定义评分设定

标签:

原文地址:http://blog.csdn.net/victor_cindy1/article/details/44726461

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!