如果想通过矩阵分解的方法实现基于模型的协同过滤,ALS是一个不错的选择,其英文全称是Alternating Least Square,翻译过来是交替最小二乘法。假设用户为a,物品为b,评分矩阵为R(m, n),可分解为用户矩阵U(k, m)和物品矩阵I(k, n),其中m, n, k代表矩阵的维度。前方小段数学公式低能预警:
def_process_data(self,X):self.user_ids=tuple((set(map(lambdax:x[0],X))))self.user_ids_dict=dict(map(lambdax:x[::-1],enumerate(self.user_ids)))self.item_ids=tuple((set(map(lambdax:x[1],X))))self.item_ids_dict=dict(map(lambdax:x[::-1],enumerate(self.item_ids)))self.shape=(len(self.user_ids),len(self.item_ids))ratings=defaultdict(lambda:defaultdict(int))ratings_T=defaultdict(lambda:defaultdict(int))forrowinX:user_id,item_id,rating=rowratings[user_id][item_id]=ratingratings_T[item_id][user_id]=ratingerr_msg="Length of user_ids %d and ratings %d not match!"%(len(self.user_ids),len(ratings))assertlen(self.user_ids)==len(ratings),err_msgerr_msg="Length of item_ids %d and ratings_T %d not match!"%(len(self.item_ids),len(ratings_T))assertlen(self.item_ids)==len(ratings_T),err_msgreturnratings,ratings_T
deffit(self,X,k,max_iter=10):ratings,ratings_T=self._process_data(X)self.user_items={k:set(v.keys())fork,vinratings.items()}m,n=self.shapeerror_msg="Parameter k must be less than the rank of original matrix"assertk<min(m,n),error_msgself.user_matrix=self._gen_random_matrix(k,m)foriinrange(max_iter):ifi%2:items=self.item_matrixself.user_matrix=self._items_mul_ratings(items.mat_mul(items.transpose).inverse.mat_mul(items),ratings)else:users=self.user_matrixself.item_matrix=self._users_mul_ratings(users.mat_mul(users.transpose).inverse.mat_mul(users),ratings_T)rmse=self._get_rmse(ratings)print("Iterations: %d, RMSE: %.6f"%(i+1,rmse))self.rmse=rmse
@run_timedefmain():print("Tesing the accuracy of ALS...")X=load_movie_ratings()model=ALS()model.fit(X,k=3,max_iter=5)print()print("Showing the predictions of users...")user_ids=range(1,5)predictions=model.predict(user_ids,n_items=2)foruser_id,predictioninzip(user_ids,predictions):_prediction=[format_prediction(item_id,score)foritem_id,scoreinprediction]print("User id:%d recommedation: %s"%(user_id,_prediction))