标签:blog java 使用 io 文件 数据 ar cti
将各个计算用户相似度的方法弄过来了,可以参考下。实际运行代码
数据文件 intro.csv内容: 直接复制就行了
1,101,5.0
1,102,3.0
1,103,2.5
2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0
3,101,2.5
3,104,4.0
3,105,4.5
3,107,5.0
4,101,5.0
4,103,3.0
4,104,4.5
4,106,4.0
5,101,4.0
5,102,3.0
5,103,2.0
5,104,4.0
5,105,3.5
5,106,4.0
代码:都带有注释的。结果就不发了。
package mahout; import java.io.File; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.common.Weighting; import org.apache.mahout.cf.taste.eval.DataModelBuilder; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.eval.RecommenderEvaluator; import org.apache.mahout.cf.taste.impl.common.FastByIDMap; import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator; import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity; import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity; import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity; import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity; import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.PreferenceArray; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; import org.apache.mahout.common.RandomUtils; /** * * @author Administrator * */ public class TestRecommenderEvaluator2 { public static void main(String[] args) throws Exception { //强制每次生成相同的随机值,生成可重复的结果 RandomUtils.useTestSeed(); //数据装填,无偏好值的处理 //DataModel dataModel = new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File("data/ua.base")))); DataModel dataModel = new FileDataModel(new File("data/intro.csv")); //推荐评估,使用平均值 RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator(); //推荐评估,使用均方差 //RecommenderEvaluator evaluator = new RMSRecommenderEvaluator(); //用于生成推荐引擎的构建器,与上一例子实现相同 RecommenderBuilder builder = new RecommenderBuilder() { public Recommender buildRecommender(DataModel model) throws TasteException { // TODO Auto-generated method stub //用户相似度,多种方法 //皮尔逊相关系数,未引入权重,同余弦相似度 //UserSimilarity similarity = new PearsonCorrelationSimilarity(model); // 皮尔逊相关系数,引入了权重 //UserSimilarity similarity = new PearsonCorrelationSimilarity(model,Weighting.WEIGHTED); // 欧式距离定义相似度 //UserSimilarity similarity = new EuclideanDistanceSimilarity(model); // 斯皮尔曼相关系数 //UserSimilarity similarity = new SpearmanCorrelationSimilarity(model); // 斯皮尔曼相关系数 缓存级别的 //UserSimilarity similarity = new CachingUserSimilarity(new SpearmanCorrelationSimilarity(model), model); // 谷本系数(忽略偏好值的) // UserSimilarity similarity = new TanimotoCoefficientSimilarity(model); // 对数似然法 UserSimilarity similarity = new LogLikelihoodSimilarity(model); //用户邻居 UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model); //一个推荐器 return new GenericUserBasedRecommender(model, neighborhood, similarity); } }; /*DataModelBuilder modelBuilder = new DataModelBuilder() { public DataModel buildDataModel(FastByIDMap<PreferenceArray> arg0) { // TODO Auto-generated method stub return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(arg0)); } };*/ //推荐程序评估值(平均差值)训练90%的数据,测试数据10%,《mahout in Action》使用的是0.7,但是出现结果为NaN double score = evaluator.evaluate(builder, null, dataModel, 0.9, 1.0); System.out.println(score); } }
mahout推荐12-相似度方法汇总,布布扣,bubuko.com
标签:blog java 使用 io 文件 数据 ar cti
原文地址:http://www.cnblogs.com/jsunday/p/3891943.html