码迷,mamicode.com
首页 > 编程语言 > 详细

个性化推荐算法:GRM,CF,NBI的实现

时间:2016-05-30 15:08:07      阅读:261      评论:0      收藏:0      [点我收藏+]

标签:

All three algorithms GRM, CF, and NBI can provide each user an ordered queue of all its uncollected movies. For an arbitrary user ui, if the edgeui?ojis in the probe set according to the training set, oj is an uncollected movie for ui, we measure the position ofoj in the ordered queue. For example, if there are 1500 uncollected movies forui, andoj is the 30th from the top, we say the position ofoj is the top 30/1500, denoted byrij=0.02. --Tao Zhou



The global ranking method (GRM) sorts all the objects in the descending order of degree and recommends those with highest degrees. 

/***************************************************************************
 *  对电影按照度进行排序,将排序的结果存放在rankResult_GRM.data
 ***************************************************************************/
#include<stdio.h>
#include<stdlib.h>

#define MOVIE_SIZE 	1682
#define USER_SIZE 	943	

typedef struct MovieInfo{
	int number;			//电影的编号
	int degree;			//电影的度
	//int rank;			//电影的排名
}MovieInfo;

MovieInfo movieInfo[MOVIE_SIZE + 1];
int matrix[MOVIE_SIZE + 1][USER_SIZE + 1];	//自动初始化,在main中不自动初始化,会报错
int degree[MOVIE_SIZE + 1];			//用来存放每部电影的degree,degree[0]不存放
						//degree[i]用来存放编号是i的电影的度

int main(){
	FILE* fMatrix;				//获得然后计算电影的度 <--matrix.data
						//数组的下标是电影的代号
	FILE* fRankOfGRM;			//将电影的排名写到文件中
	
	int i, j;
	int max_index;
	if( NULL == (fMatrix = fopen("matrix.data", "r"))){
		printf("open file(marix.data) error!\n");
		exit(0);
	}
	if( NULL == (fRankOfGRM = fopen("rankResult_GRM.data", "w"))){
		printf("open file(rankResult_GRM.data) error!\n");
		exit(0);
	}
	//将matrixData中的数据读入数组中
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		//printf("i = %d\n", i);
		for( j = 1; j <= USER_SIZE; j++ ){
			if( 1 != fscanf(fMatrix, "%d ", &matrix[i][j])){
				printf("fscanf error\n");
				exit(0);
			}
		}
	}
	/*
	for( i = 0; i < MOVIE_SIZE; i++ ){
		for( j = 0; j < USER_SIZE; j++ ){
			printf("%d", matrix[i][j]);
		}
		printf("\n");
	}
	*/
	
	//计算电影的度
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			degree[i] = degree[i] + matrix[i][j];
		}
		movieInfo[i].number = i;		//得到每一个电影的ID,保留编号
		movieInfo[i].degree = degree[i];	//和电影的度
	}
	/*
	printf("show the degree of movie\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%4d", degree[i]);
		if( i % 10 == 0 ){
			printf("    row%d\n", i/10);
		}
	}
	*/
	//对电影按照度的大小进行排序
	for( i = 1; i < MOVIE_SIZE; i++ ){
		max_index = i;
		for( j = i + 1; j <= MOVIE_SIZE; j++ ){
			if( movieInfo[j].degree > movieInfo[max_index].degree ){
				max_index = j;
			}
		}
		if( max_index != i ){
			movieInfo[0].degree = movieInfo[i].degree;
			movieInfo[0].number = movieInfo[i].number;
			movieInfo[i].degree = movieInfo[max_index].degree;
			movieInfo[i].number = movieInfo[max_index].number;
			movieInfo[max_index].degree = movieInfo[0].degree;
			movieInfo[max_index].number = movieInfo[0].number;
		}
	}
	//将排序的结果输出
	/*
	printf("\nsrot\n\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%5d", movieInfo[i].degree);
		if( i % 10 == 0 ){
			printf("     row%d\n", i/10);
		}
	}
	*/
	//将排序的结果写入文件,               名次(by degree)    电影的ID,           度
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		fprintf(fRankOfGRM, "%d\t%d\t%d\t\n", i, movieInfo[i].number, movieInfo[i].degree);
	}
	fclose(fMatrix);
	fclose(fRankOfGRM);
	return 0;
}</span><span style="font-size:18px;font-family: Times-Roman;">
</span>

/*
 * 对ProbeSet中的每一条记录,
 * 	用户: 得到该用户的没有选择过的电影的个数
 * 	电影: 得到该电影的排名
 * */
#include<stdio.h>
#include<stdlib.h>

//size of porbe
#define SIZE_PORBE 	8252		//预测集的大小
#define SIZE_USER 	943		//数据集中用户的大小
#define SIZE_MOVIE 	1682		//数据集中电影的大小

typedef struct ItemPorbe{		//读入probe Set
	int user;
	int movie;
}ItemPorbe;

typedef struct ItemMovie{
	int movieId;
	int degree;
}ItemMovie;

ItemPorbe itemPorbe[SIZE_PORBE];
int unSelcMovNumOfUser[SIZE_PORBE+1];	//0号元素不使用,第i号元素表示第i个用户
					//值是用户没有选过的电影的个数
ItemMovie movieRank[SIZE_MOVIE+1];	//0号原始不使用,下标表示排名
					//值是电影的代号

int findMovieRank(int numberOfMovie);
double ratio[SIZE_PORBE];

int main(){
	FILE* fReadPorbeSet;
	int skiprate;
	int verifyPorbeSize = 0;
	int i;
	FILE* fUnselectedMoiveOfUser;
	int skipindex;
	FILE* fMoveRank;
	int skipindex_2;
	int skipdegree;

	int user;
	int movie;
	int temp_unselectForUser;
	int temp_movieRank;

	double average_ratio = 0.0;
	double sum = 0.0;
	double temp_ratio = 0.0;

	if( NULL == (fReadPorbeSet = fopen("porbeSet.data", "r"))){					//读入预测集中的数据
		printf("open file(porbeSet.data) error!\n");
		exit(1);
	}
	if( NULL == (fUnselectedMoiveOfUser = fopen("unselectedMovieNumberForUser.data", "r"))){	//得到每个用户没有评价过的电影的个数	
		printf("open file(unselected.data) error!\n");
		exit(0);
	}
	if( NULL == (fMoveRank = fopen("rankResult_GRM.data", "r"))){					//得到电影的排名(按照电影的度)
		printf("open file(rankResult_GRM.data) error!\n");
		exit(0);
	}

	/*
	 * 读取probeSet中的数据
	 * */
	for( i = 0; i < SIZE_PORBE; i++ ){
		verifyPorbeSize++;
		if( 3 != fscanf(fReadPorbeSet, "%d %d %d", &itemPorbe[i].user, &itemPorbe[i].movie, &skiprate)){
			printf("fscanf 1 error\n");
			exit(0);
		}
	}
	/*
	printf("%d\n", verifyPorbeSize);
	for( i = 0; i < SIZE_PORBE; i++ )
		printf("%d\t%d\t\n", itemPorbe[i].user, itemPorbe[i].movie);

	*/


	/*
	 * 读取unselectedMovieNumberOfUser.data中的数据.得到每一个用户的没有选择的电影的个数
	 * */
	for( i = 1; i <= SIZE_USER; i++ ){
		if( 2 != fscanf(fUnselectedMoiveOfUser, "%d %d", &skipindex, &unSelcMovNumOfUser[i])){
			printf("fscanf 2 error\n");
			exit(0);
		}
	}
	/*
	printf("unselected Movie size of user:\n");
	for( i = 1; i <= SIZE_USER; i++ ){
		printf("%5d ", unSelcMovNumOfUser[i]);
		if( i % 10 == 0 )
			printf("\n");
	}
	*/


	/*
	 * 读取电影的Id和度,排行隐藏在下标中
	 */
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( 3 != fscanf(fMoveRank, "%d %d %d", &skipindex_2, &movieRank[i].movieId, &movieRank[i].degree)){
			printf("fscanf 3 error!");
			exit(0);
		}
	}
	/*
	printf("rank of movie!\n");
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		printf("%4d\t%4d\n", movieRank[i].movieId, movieRank[i].degree);
	}
	*/

	/*
	 * 计算训练集中用户选择的电影在由GRM给出的电影的排行中的名次除以改用户没有评价过的电影的总数
	 * */
	for( i = 0; i < SIZE_PORBE; i++ ){
		user = itemPorbe[i].user;
		movie = itemPorbe[i].movie;
		temp_unselectForUser = unSelcMovNumOfUser[user];
		temp_movieRank = findMovieRank(movie);
		
		//printf("\nitem of probe user and unselected movie %d %d\n", user, movie);
		//printf("the rank of movie %d \n", temp_movieRank);
		//printf("the number of unselected movie %d \n", temp_unselectForUser);
		
		ratio[i] = (double)temp_movieRank / (double)temp_unselectForUser;
		
		//temp_ratio = (double)temp_movieRank / (double)temp_unselectForUser;
		//printf("the raoit is %f\n", temp_ratio);
	}
	/*
	int temp_count = 0;
	for( i = 0; i < SIZE_PORBE; i++ ){
		printf("%12.8f", ratio[i]);
		if( (i+1)%10 == 0 )
			printf("------------%d\n", (i+10)/10);
		if( ratio[i] >= 1 )
			temp_count++;

	}
	*/
	for( i = 0; i < SIZE_PORBE; i++ ){
		sum += ratio[i];
	}
	average_ratio = sum / (double)SIZE_PORBE;
	printf("\n\nThe Rssult of GRM is %f\n", average_ratio);
	//printf("%d", temp_count);

	//printf("\n%d", findMovieRank(1));
	
	return 0;
}

//通过参数numberOfMovie,即电影的编号来查找电影的排名
//        该方法中解决了排名的并列问题
int findMovieRank(int numberOfMovie){
	int i,j;
	int degree;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( numberOfMovie == movieRank[i].movieId ){
			degree = movieRank[i].degree;
			for( j = i; j > 0 && movieRank[j].degree == degree; j-- );
				return j+1;
		}
	}
}
/*
	for( i = 0; i < 10; i++ ){
		if( 1005 == a[i].name ){
			degree = a[i].degree;
			for( j = i; j > 0 && a[j].degree == degree; j-- );
				printf("%d\n", j+1);
		}
	}	
*/


Thus far, the widest applied personal recommendation algorithm is collaborative filtering (CF) , based on a similarity measure between users.

/*
 * 计算CF算法中的相似矩阵
 * */

#include<stdio.h>
#include<stdlib.h>

#define SIZE_MOVIE 1682
#define SIZE_USER  943

int adjacentMateix[SIZE_MOVIE][SIZE_USER];
int degreeOfUser[SIZE_USER + 1];

double similarity[SIZE_USER + 1][SIZE_USER + 1];	

void calculateSimilarity();
void showSimilarity();
void calculateDegreeOfUSer();
void readAdjacentMatrix();
void writeSimilarityToFile();

int main(){
	readAdjacentMatrix();
	calculateDegreeOfUSer();
	calculateSimilarity();
	//showSimilarity();	
	writeSimilarityToFile();
	return 0;
}

/*
 * 读入adjacentMatrix
 * */
void readAdjacentMatrix(){
	FILE* fReadAdjacent;
	int i, j;
	if( NULL == (fReadAdjacent = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!\n");
		exit(0);
	}
	//将文件中的adjacentMatrix读入到数组中
	for( i = 0; i < SIZE_MOVIE; i++ ){
		for( j = 0; j < SIZE_USER; j++ ){
			if( 1 != fscanf(fReadAdjacent, "%d ", &adjacentMateix[i][j])){
				printf("fscanf 1 error\n");
				exit(0);
			}
		}
	}
	/*
	for( i = 0; i < SIZE_MOVIE; i++ ){
		for( j = 0; j < SIZE_USER; j++ ){
			printf("%d", adjacentMateix[i][j]);
		}
		printf("\n");
	}*/
	fclose(fReadAdjacent);
}

//计算每个用户的度. 没有使用0号元素,这样可以使得下标表示用户的编号.
void calculateDegreeOfUSer(){
	int i, j;
	for( i = 0; i < SIZE_USER; i++ ){
		for( j = 0; j < SIZE_MOVIE; j++ ){
			degreeOfUser[i+1] += adjacentMateix[j][i];
		}
	}
	//test
	/*
	for( i = 1; i <= SIZE_USER; i++ ){
		printf("%6d", degreeOfUser[i]);
		if( i % 10 == 0 )
			printf("   row:%d\n", i/10);
	}
	*/
}

/*
 * 计算S矩阵:值是每两个用户的相似度
 * */
void calculateSimilarity(){
	int i;
	int j;
	int l;
	int molecule = 0;
	int denominator = 0;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			molecule = 0;
			denominator = 0;
			for( l = 0; l < SIZE_MOVIE; l++ ){
				molecule = molecule + adjacentMateix[l][i-1] * adjacentMateix[l][j-1];
			}
			denominator = (degreeOfUser[i] < degreeOfUser[j]) ? degreeOfUser[i] : degreeOfUser[j];
			//用户的不为0!natural
			//printf("\n%d %d %d\n", degreeOfUser[i], degreeOfUser[j], denominator);
			similarity[i][j] = (double)molecule/denominator;
		}
	}
}

/*
 * 将S矩阵的结果输入文件中
 * */
void writeSimilarityToFile(){
	FILE* fOut;
	if( NULL == (fOut = fopen("similarityMatrix.data", "w"))){
		printf("open file(similarityMatrix.data) error");
		exit(0);
	}
	int i;
	int j;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			fprintf(fOut, "%10.6f ", similarity[i][j]);
		}
		fprintf(fOut, "\n");
	}
	fclose(fOut);
}

/*
 * 打印S矩阵
 * */
void showSimilarity(){
	int i;
	int j;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			printf("%10.6f", similarity[i][j]);
		}
		printf("---------%d\n", i);
	}
}</span><span style="font-size:18px;font-family: Times-Roman;">
</span>

#include<stdio.h>
#include<stdlib.h>

#define SIZE_USER 	943
#define SIZE_MOVIE 	1682
#define SIZE_PROBE_SET 	8252

typedef struct ProbeItem{
	int user;
	int movie;
}ProbeItem; 

typedef struct UnselectedMovieListOFUser{
	int user;		//用户的ID
	int unselectedNumber;	//用户没有选择过的电影的个数
	int list[SIZE_MOVIE+1];	//用户没有选择过的电影的列表:电影的ID列表
}UnselectedMovieListOFUser;

typedef struct MovieIdAndScore{
	int movieId;
	double score;
}MovieIdAndScore;

ProbeItem probeItem[SIZE_PROBE_SET];
UnselectedMovieListOFUser unselectedMovieListOfuser[SIZE_USER+1];	//用户没有选择的电影的list
double similarityMatrix[SIZE_USER + 1][SIZE_USER + 1];			//用户之间的相似矩阵
int unselectedMovieNumberOfUser[SIZE_USER + 1];				//用户没有选择的电影的number
int adjacentMatrix[SIZE_MOVIE+1][SIZE_USER+1];				//邻接矩阵
MovieIdAndScore movieIdAndScore[SIZE_MOVIE + 1][SIZE_USER + 1];		//用来存放每一个用户没有选择过的电影和电影的预测分数

double ratio[SIZE_PROBE_SET + 1];

void readSimilarityMatrix();
void readAdjacentMatrix();
void readUnSelectedMovieNumberOfUser();	/*
					 *  这个信息表明的是每个用户没有选择的电影的个数,
					 *  现在已经不足够,我们需要知道的是每个用户具体
					 *  没有选择过的电影是哪些。
					 */
void readUnselectedMoviteItemOfUser();
void calculateScore_Matrix();
void printMovieIdAndScoreMatrix();
double calculateScore(int user, int movie);
void sortUnselectedMovieOfUser();
void readProbeSet();
void CF();
int rankOfMovie(int user, int movie);

int main(){
	readAdjacentMatrix();
	readSimilarityMatrix();
	readUnSelectedMovieNumberOfUser();

	readUnselectedMoviteItemOfUser();
	calculateScore_Matrix();
	sortUnselectedMovieOfUser();
	//printMovieIdAndScoreMatrix();
	readProbeSet();
	CF();
	return 0;
}

/*
 * 读出用户之间的相似矩阵s[i][j]表明的是用户i和用户j的相似度
 */
void readSimilarityMatrix(){
	FILE* fIn;
	int i;
	int j;
	if( NULL == (fIn = fopen("similarityMatrix.data", "r"))){
		printf("open file(similaityMatrix.data) error\n");
		exit(0);
	}
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			if( 1 != fscanf(fIn, "%lf ", &similarityMatrix[i][j])){
				printf("fscanf error: %d %d\n", i,j);
				exit(0);
			}
		}
	}
	printf("read over\n");
	/*
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			printf("%10.6f", similarityMatrix[i][j]);
		}
		printf("--------------%d\n", i);
	}
	*/
	fclose(fIn);
}

/*
 * 读入用户的没有选择过的电影的个数
 * */
void readUnSelectedMovieNumberOfUser(){
	FILE* fIn;
	int i;
	int skipIndex;
	if( NULL == (fIn = fopen("unselectedMovieNumberForUser.data", "r"))){
		printf("open file(unselected) error!");
		exit(0);
	}
	for( i = 1; i <= SIZE_USER; i++ ){
		if( 2 != fscanf(fIn, "%d %d", &skipIndex, &unselectedMovieNumberOfUser[i])){
			printf("fscanf error:%d", i);
			exit(0);
		}
	}
	/*
	for( i = 1; i <= SIZE_USER; i++ ){
		printf("%6d", unselectedMovieNumberOfUser[i]);
		if( i % 10 == 0 )
			printf("   row:%d\n", i/10);
	}
	printf("\n");
	*/
	fclose(fIn);
}

/*
 * 读入每个用户的没有选择过的电影是哪些,
 * */
void readUnselectedMoviteItemOfUser(){
	/*
	FILE* fIn;
	if( NULL == (fIn = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!\n");
		exit(0);
	}
	int i,j;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			if( 1 != fscanf(fIn, "%d ", &adjacentMatrix[i][j])){
				printf("fscanf error:%d %d", i, j);
				exit(0);
			}
		}
	}
	*/
	int i,j;
	//readAdjacentMatrix();
	for( i = 1; i <= SIZE_USER; i++ ){
		unselectedMovieListOfuser[i].user = i;
		for( j = 1; j <= SIZE_MOVIE; j++ ){
			if( adjacentMatrix[j][i] == 0 ){			//没有选过的电影
				unselectedMovieListOfuser[i].unselectedNumber += 1;
				unselectedMovieListOfuser[i].list[j] = j;	//记录了没有选择的电影的代号
										//用户i已经选择的电影在list
										//中的值为0
			}
			//printf("---------%d\n", unselectedMovieListOfuser[2].list[j]);
		}
		//printf("%d\n", unselectedMovieListOfuser[i].unselectedNumber);
	}


	//fclose(fIn);
	//得到了每一个用户ui的没有选择过的电影列表的数据
	/*	
	 *
	 * 测试用户i的list
	 *
	
	for( i = 0; i < SIZE_MOVIE; i++ ){
		printf("%d\t%d\n", unselectedMovieListOfuser[2].list[i], adjacentMatrix[i][2]);
	}
	 * */
}


/*
 * 计算每一个用户每一个没有选择的电影的score
 * */
void calculateScore_Matrix(){
	int i;
	int j;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j <= SIZE_MOVIE; j++ ){
			movieIdAndScore[j][i].movieId = unselectedMovieListOfuser[i].list[j];
			if( movieIdAndScore[j][i].movieId != 0 ){	//没有选过的电影,才要计算分数,
									//选择过的电影的分数是0
				movieIdAndScore[j][i].score = calculateScore(i, j);
			}	
		}
	}
}

void printMovieIdAndScoreMatrix(){
	int i;
	int j;
	int count = 0;
	for( i = 1; i <= SIZE_USER; i++ ){
		printf(" user ----- %d \n", i);
		count = 0;
		for( j = 1; j <= SIZE_MOVIE; j++ ){
			printf("%10.6f", movieIdAndScore[j][i].score);
			count++;
		}
		printf("%d  \n\n", count);
	}
}

double calculateScore(int user, int movie){
	//printf("   caluclateScore %d %d\n", user, movie);
	double molecular = 0.0;
	double denominator = 0.0;
	int i;
	for( i = 1; i <= SIZE_USER; i++ ){
		if( user != i ){
			molecular = (double)molecular + similarityMatrix[i][user] * adjacentMatrix[movie][i];
		}
	}
	for( i = 1; i <= SIZE_USER; i++ ){
		if( user != i ){
			denominator = denominator + similarityMatrix[i][user];
		}
	}
	return molecular/denominator;
}

/*
 * 读adjacentMatrix
 * */
void readAdjacentMatrix(){
	FILE* fIn;
	if( NULL == (fIn = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!\n");
		exit(0);
	}
	int i,j;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		for( j = 1; j <= SIZE_USER; j++ ){
			if( 1 != fscanf(fIn, "%d ", &adjacentMatrix[i][j])){
				printf("fscanf error:%d %d", i, j);
				exit(0);
			}
		}
	}
	fclose(fIn);
}

/*
 * 对每个用户的没有选择过的电影按照分数进行排序
 * */
void sortUnselectedMovieOfUser(){
	int i;
	int j;
	int k;
	int max_index;
	for( i = 1; i <= SIZE_USER; i++ ){
		for( j = 1; j < SIZE_MOVIE; j++ ){
			max_index = j;
			for( k = j + 1; k <= SIZE_MOVIE; k++ ){
				if( movieIdAndScore[k][i].score > movieIdAndScore[max_index][i].score )
					max_index = k;
			}
			if( j != max_index ){
				movieIdAndScore[0][i].score = movieIdAndScore[j][i].score; 
				movieIdAndScore[0][i].movieId = movieIdAndScore[j][i].movieId; 
				movieIdAndScore[j][i].score = movieIdAndScore[max_index][i].score;
				movieIdAndScore[j][i].movieId = movieIdAndScore[max_index][i].movieId;
				movieIdAndScore[max_index][i].score = movieIdAndScore[0][i].score;
				movieIdAndScore[max_index][i].movieId = movieIdAndScore[0][i].movieId;
			}
		}
	}
}
/*
void SelectSrot(int a[], int n){
	int i,j,min_index,temp;
	for( i = 0; i < n - 1; i++ ){
		min_index = i;
		for( j = i+1; j < n; j++ ){
			if( a[j] < a[min_index] )
				min_index = j;
		}
		if( min_index != i ){
			temp = a[i];
			a[i] = a[min_index];
			a[min_index] = temp;
		}
	}
}
*/

void readProbeSet(){
	FILE* fIn;
	int i;
	int skipDegree;
	if( NULL == (fIn = fopen("porbeSet.data", "r"))){
		printf("open file(probeSet.data) error!\n");
		exit(0);
	}
	for( i = 0; i < SIZE_PROBE_SET; i++ ){
		if( 3 != fscanf(fIn, "%d %d %d", &probeItem[i].user, &probeItem[i].movie, &skipDegree)){
			printf("fscanf error %d\n", i);
			exit(0);
		}
	}
	/*
	for( i = 0; i < SIZE_PROBE_SET; i++ ){
		printf("%10d%10d\n", probeItem[i].user, probeItem[i].movie);
	}
	*/
}

void CF(){
	int i;
	int user;
	int movie;
	double total = 0.0;
	for( i = 0; i < SIZE_PROBE_SET; i++ ){
		user = probeItem[i].user;
		movie = probeItem[i].movie;
		ratio[i] = (double) rankOfMovie(user, movie) / unselectedMovieNumberOfUser[user];
		total = total + ratio[i];
	}
	printf("%10.6f\n", total/SIZE_PROBE_SET);
}

/*
 * 返回用户user的个人电影排行榜中的movie的电影排行
 * */
int rankOfMovie(int user, int movie){
	int i;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( movieIdAndScore[i][user].movieId == movie ){
			return i;
		}
	}
}

NBI

/*
 * 计算NBI中的W矩阵
 * 	W矩阵是针对object即电影的。
 * */
#include<stdio.h>
#include<stdlib.h>

#define USER_SIZE 	943
#define MOVIE_SIZE 	1682
#define PROBESET_SIZE	8252

typedef struct UnselectedMovieOfUser{
	int user;			//用户的ID
	int count_unselected;		//没有选择过的电影的数量
	int list[MOVIE_SIZE + 1];	//没有选择过的电影的列表,即电影的ID
}UnselectedMovieOfUser;

typedef struct ScoreOfUnselectedMovie{
	int movieId;			//电影的Id
	double score;			//电影的评分
}ScoreOfUnselectedMovie;

typedef struct Item{			//用于存放ProbeSet中的数据
	int user;
	int movie;
}Item;

ScoreOfUnselectedMovie scoreOfUnselectedMovie[MOVIE_SIZE + 1][USER_SIZE + 1];

UnselectedMovieOfUser unselectMovieOfUser[USER_SIZE + 1];
Item item[PROBESET_SIZE];

int adjacentMatrix[MOVIE_SIZE + 1][USER_SIZE + 1];	//用来存放邻接矩阵
double wMatrix[MOVIE_SIZE + 1][MOVIE_SIZE + 1];		//用来存放W矩阵w[i,j]: how likely j to choose i
int degreeOfUser[USER_SIZE + 1];			//用户的度
int degreeOfMoive[MOVIE_SIZE + 1];			//电影的度
double ratio[PROBESET_SIZE];


void readAdjacentMatrix();
void calculateDegreeOFUser();
void calculateDegreeOfMovie();
void calculateWMatrix();
double calculateW_molecular(int f,int s);
void statisticsUnselectedMovieOfUser();
void calculateScoreForUnselectedMovieOFUser();
double calculateScore(int user, int movie);
void sort();
void printSortResult();
void readProbeSet();
void NBI();
int getRank(int user, int movie);


int main(){
	readAdjacentMatrix();
	calculateDegreeOfMovie();
	calculateDegreeOFUser();
	calculateWMatrix();
	statisticsUnselectedMovieOfUser();
	calculateScoreForUnselectedMovieOFUser();
	sort();
	//printSortResult();
	readProbeSet();
	NBI();
	return 0;
}
/*
 * 读入邻接矩阵
 * */
void readAdjacentMatrix(){
	FILE* fin;
	if( NULL == (fin = fopen("matrix.data", "r"))){
		printf("open file(matrix.data) error!");
		exit(0);
	}
	int i;
	int j;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			if( 1 != fscanf(fin, "%d ", &adjacentMatrix[i][j])){
				printf("fscanf error::%d\t%d", i,j);
				exit(0);
			}
		}
	}
	//test
	/*
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%d  %d\n",i,  adjacentMatrix[i][1]);
	}
	printf("aa\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			printf("%d ", adjacentMatrix[i][j]);
		}
		printf("\n");
	}
	*/
}
/*
 * 计算用户的度
 * */
void calculateDegreeOFUser(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			degreeOfUser[i] = degreeOfUser[i] + adjacentMatrix[j][i];
		}
	}
	//test
	/*
	printf("\ndegree of User\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", degreeOfUser[i]);
		if( i % 10 == 0 )
			printf("------%d\n", i/10);
	}
	*/
}

/*
 * 计算电影的度
 * */
void calculateDegreeOfMovie(){
	int i;
	int j;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			degreeOfMoive[i] = degreeOfMoive[i] + adjacentMatrix[i][j];
		}
	}
	//test
	/*
	printf("\ndegree of Movie\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		printf("%5d", degreeOfMoive[i]);
		if( i % 10 == 0 )
			printf("---------%d\n", i/10);
	}
	*/
}
/*
 * 计算 W 矩阵
 * */
void calculateWMatrix(){
	int i;
	int j;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			wMatrix[i][j] = 0.0;
		}
	}
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		//printf("%d ", i);
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			if( degreeOfMoive[j] == 0 ){
				continue;
			}
			wMatrix[i][j] = calculateW_molecular(i,j)/degreeOfMoive[j];
		}
	}
	//test
	/*
	printf("\n------------------W Matrix----------------\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			printf("%10.6f", wMatrix[i][j]);
		}
		printf("---------%d\n", i);
	}
	*/
}
double calculateW_molecular(int f, int s){
	int i;
	int j;
	double sum = 0.0;
	for( i = 1; i <= USER_SIZE; i++ ){
		//printf("###%d %d %d\n", adjacentMatrix[f][i], adjacentMatrix[s][i], degreeOfUser[i]);
		sum = sum + ( (adjacentMatrix[f][i]*adjacentMatrix[s][i])/(double)degreeOfUser[i] );
	}
	//test
	return sum;
}

/*
 * 统计用户的没有选择过的电影的列表
 * 	初始化结构体数组: unselectMovieOfUser
 * 		typedef struct UnselectedMovieOfUser{
			int user;			//用户的ID
			int count_unselected;		//没有选择过的电影的数量
			int list[MOVIE_SIZE + 1];	//没有选择过的电影的列表,即电影的ID
		}UnselectedMovieOfUser;
 * */
void statisticsUnselectedMovieOfUser(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		unselectMovieOfUser[i].user = i;
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			if( 0 == adjacentMatrix[j][i] ){			//用户没有选过的电影, 计算其f'(oj)的值
				unselectMovieOfUser[i].count_unselected += 1;	//用户i的没有选择的电影数+1
				unselectMovieOfUser[i].list[j] = j;		//用户i的list数组中的j = j,表示第j部电影的度不为0
			}
		}
	}

	//test
	/*
	printf("below should be equal\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", unselectMovieOfUser[i].count_unselected);
		if( i % 10 == 0 )
			printf(" --------row%d\n", i / 10);
	}
	printf("\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", MOVIE_SIZE - degreeOfUser[i]);
		if( i % 10 == 0 ){
			printf(" --------row%d\n", i / 10);
		}
	}
	*/
	
}
/*
 * 对用户没有选择过的电影计算分数
 * 	初始化结构体数组: scoreOfUnselectedMovie
 * 			typedef struct ScoreOfUnselectedMovie{
				int movieId;			//电影的Id
				double score;			//电影的评分
			}ScoreOfUnselectedMovie;
 * */
void calculateScoreForUnselectedMovieOFUser(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			scoreOfUnselectedMovie[j][i].movieId = unselectMovieOfUser[i].list[j];
			if( scoreOfUnselectedMovie[j][i].movieId != 0 ){	//没有选过的电影
				scoreOfUnselectedMovie[j][i].score = calculateScore(i, j);
			}else{							//
				scoreOfUnselectedMovie[j][i].movieId = j;
			}
		}
	}

	/*
	printf("\n###############\n");
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		for( j = 1; j <= USER_SIZE; j++ ){
			printf("%d ", adjacentMatrix[i][j]);
		}
		printf("\n");
	}
	
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			printf("%10.6f", scoreOfUnselectedMovie[j][i].score);
		}
		printf("-------------------%d\n\n", i);
	}
	*/
}
/*
 * 上一个函数的子函数
 * */
double calculateScore(int user, int movie){
	int i;
	double score = 0.0;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		score = score + wMatrix[movie][i] * adjacentMatrix[i][user];
	}
	return score;
}
/*
 * 对用户没有选择过的电影按照得分进行排名
 * */
void sort(){
	int i;
	int j;
	int k;
	int max_index = 0;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j < MOVIE_SIZE; j++ ){
			max_index = j;
			for( k = j + 1; k <= MOVIE_SIZE; k++ ){
				if( scoreOfUnselectedMovie[k][i].score > scoreOfUnselectedMovie[max_index][i].score ){
					max_index = k;
				}
			}
			if( max_index != j ){
				scoreOfUnselectedMovie[0][i].movieId = scoreOfUnselectedMovie[j][i].movieId;
				scoreOfUnselectedMovie[0][i].score = scoreOfUnselectedMovie[j][i].score;
				scoreOfUnselectedMovie[j][i].movieId = scoreOfUnselectedMovie[max_index][i].movieId;
				scoreOfUnselectedMovie[j][i].score = scoreOfUnselectedMovie[max_index][i].score;
				scoreOfUnselectedMovie[max_index][i].movieId = scoreOfUnselectedMovie[0][i].movieId;
				scoreOfUnselectedMovie[max_index][i].score = scoreOfUnselectedMovie[0][i].score;
			}
		}
	}
}
/*
 * 测试函数:输出一下排序的结果
 * */
void printSortResult(){
	int i;
	int j;
	for( i = 1; i <= USER_SIZE; i++ ){
		for( j = 1; j <= MOVIE_SIZE; j++ ){
			printf("%d\t%10.6f\n",scoreOfUnselectedMovie[j][i].movieId, scoreOfUnselectedMovie[j][i].score);
		}
		printf("----------------uer %d\n", i);
	}
}


/*
 * 读取peobeSet中的记录
 * */
void readProbeSet(){
	FILE* fin;
	if( NULL == (fin = fopen("porbeSet.data", "r"))){
		printf("open file(porbeSet.data) error!\n");
		exit(0);
	}
	int i;
	int skiprate;
	for( i = 0; i < PROBESET_SIZE; i++ ){
		if( 3 != fscanf(fin, "%d %d %d", &item[i].user, &item[i].movie, &skiprate)){
			printf("fscanf error: %d\n", i);
			exit(0);
		}
	}
	fclose(fin);
	//test
	/*
	for( i = 0; i < PROBESET_SIZE; i++ ){
		printf("%d\t%d\t--------%d\n", item[i].user, item[i].movie, i+1);
	}
	*/
}

void NBI(){
	int i;
	int user;
	int movie;
	double final_result = 0.0;
	/*
	printf("aaaaaaaaa\n");
	for( i = 1; i <= USER_SIZE; i++ ){
		printf("%5d", unselectMovieOfUser[i].count_unselected);
		if( i % 10 == 0 )
			printf(" --------row%d\n", i / 10);
	}
	*/
	printf("\n");
	printf("\n\n");
	for( i = 0; i < PROBESET_SIZE; i++ ){
		user = item[i].user;
		movie = item[i].movie;
		//printf("%d\t%d\t", user, movie);
		ratio[i] = (double)getRank(user, movie)/(MOVIE_SIZE - degreeOfUser[user]);
		//printf("%d\t%d\t%d\t\n", getRank(user, movie), unselectMovieOfUser[user].count_unselected,MOVIE_SIZE - degreeOfUser[user]);
	}
	/*
	printf("ratio array\n");
	for( i = 0; i < PROBESET_SIZE; i++ ){
		printf("%10.6f", ratio[i]);
	}
	*/
	for( i = 0; i < PROBESET_SIZE; i++ ){
		final_result = final_result + ratio[i];
	} 
	printf("\n");
	printf("%10.6f", final_result);
	printf("%10.6f\n", (double)final_result/PROBESET_SIZE);
}

/*
 * 得到指定的用户(参数user)的指定的电影(参数movie)的排名
 * */
int getRank(int user, int movie){
	int i;
	int j;
	double score;
	for( i = 1; i <= MOVIE_SIZE; i++ ){
		if( scoreOfUnselectedMovie[i][user].movieId == movie ){
			score = scoreOfUnselectedMovie[i][user].score;
			for( j = i; j > 0 && scoreOfUnselectedMovie[j][user].score == score; j-- );
				return j+1;
		}
	}
}
/*
int findMovieRank(int numberOfMovie){
	int i,j;
	int degree;
	for( i = 1; i <= SIZE_MOVIE; i++ ){
		if( numberOfMovie == movieRank[i].movieId ){
			degree = movieRank[i].degree;
			for( j = i; j > 0 && movieRank[j].degree == degree; j-- );
				return j+1;
		}
	}
}
*/



个性化推荐算法:GRM,CF,NBI的实现

标签:

原文地址:http://blog.csdn.net/robin_xu_shuai/article/details/51533940

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!