ie=UTF8&refRID=0H4H2NSSR8F34R76E2TP
網頁上的元素:
java
數據演示樣例算法
1,565,3 1,807,2 1,201,1 1,557,9 1,987,10 1,59,5 1,305,6 1,153,3 1,139,7 1,875,5 1,722,10 2,977,4 2,806,3 2,654,8 2,21,8 2,662,5 2,437,6 2,576,3 2,141,8 2,311,4 2,101,3 2,540,9 2,87,3 2,65,8 2,501,6 2,710,5 2,331,9 2,542,4 2,757,9 2,590,7
數據演示樣例apache
1,M,40 2,M,27 3,M,41 4,F,43 5,F,16 6,M,36 7,F,36 8,F,46 9,M,50 10,M,21 11,F,11 12,M,42 13,F,40 14,F,28 15,M,25 16,M,68 17,M,53 18,F,69 19,F,48 20,F,56 21,F,36
針對上面的數據,我將用7種算法組合進行測試:有關Mahout算法組合的詳解。請參考文章:從源碼剖析Mahout推薦引擎
7種算法組合
網絡
單機算法: 在單機內存計算,支持多種算法推薦算法,部署執行簡單,修正處理數據量有限
分步式算法: 基於Hadoop集羣執行,支持有限的幾種推薦算法。部署執行復雜,支持海量數據
開發環境
架構
源碼ide
package org.conan.mymahout.recommendation.book; import java.io.IOException; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; import org.apache.mahout.cf.taste.similarity.ItemSimilarity; import org.apache.mahout.cf.taste.similarity.UserSimilarity; public class BookEvaluator { final static int NEIGHBORHOOD_NUM = 2; final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws TasteException, IOException { String file = "datafile/book/rating.csv"; DataModel dataModel = RecommendFactory.buildDataModel(file); userEuclidean(dataModel); userLoglikelihood(dataModel); userEuclideanNoPref(dataModel); itemEuclidean(dataModel); itemLoglikelihood(dataModel); itemEuclideanNoPref(dataModel); slopeOne(dataModel); } public static RecommenderBuilder userEuclidean(DataModel dataModel) throws TasteException, IOException { System.out.println("userEuclidean"); UserSimilarity userSimilarity = RecommendFactory.userSimilarity(RecommendFactory.SIMILARITY.EUCLIDEAN, dataModel); UserNeighborhood userNeighborhood = RecommendFactory.userNeighborhood(RecommendFactory.NEIGHBORHOOD.NEAREST, userSimilarity, dataModel, NEIGHBORHOOD_NUM); RecommenderBuilder recommenderBuilder = RecommendFactory.userRecommender(userSimilarity, userNeighborhood, true); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } public static RecommenderBuilder userLoglikelihood(DataModel dataModel) throws TasteException, IOException { System.out.println("userLoglikelihood"); UserSimilarity userSimilarity = RecommendFactory.userSimilarity(RecommendFactory.SIMILARITY.LOGLIKELIHOOD, dataModel); UserNeighborhood userNeighborhood = RecommendFactory.userNeighborhood(RecommendFactory.NEIGHBORHOOD.NEAREST, userSimilarity, dataModel, NEIGHBORHOOD_NUM); RecommenderBuilder recommenderBuilder = RecommendFactory.userRecommender(userSimilarity, userNeighborhood, true); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } public static RecommenderBuilder userEuclideanNoPref(DataModel dataModel) throws TasteException, IOException { System.out.println("userEuclideanNoPref"); UserSimilarity userSimilarity = RecommendFactory.userSimilarity(RecommendFactory.SIMILARITY.EUCLIDEAN, dataModel); UserNeighborhood userNeighborhood = RecommendFactory.userNeighborhood(RecommendFactory.NEIGHBORHOOD.NEAREST, userSimilarity, dataModel, NEIGHBORHOOD_NUM); RecommenderBuilder recommenderBuilder = RecommendFactory.userRecommender(userSimilarity, userNeighborhood, false); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } public static RecommenderBuilder itemEuclidean(DataModel dataModel) throws TasteException, IOException { System.out.println("itemEuclidean"); ItemSimilarity itemSimilarity = RecommendFactory.itemSimilarity(RecommendFactory.SIMILARITY.EUCLIDEAN, dataModel); RecommenderBuilder recommenderBuilder = RecommendFactory.itemRecommender(itemSimilarity, true); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } public static RecommenderBuilder itemLoglikelihood(DataModel dataModel) throws TasteException, IOException { System.out.println("itemLoglikelihood"); ItemSimilarity itemSimilarity = RecommendFactory.itemSimilarity(RecommendFactory.SIMILARITY.LOGLIKELIHOOD, dataModel); RecommenderBuilder recommenderBuilder = RecommendFactory.itemRecommender(itemSimilarity, true); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } public static RecommenderBuilder itemEuclideanNoPref(DataModel dataModel) throws TasteException, IOException { System.out.println("itemEuclideanNoPref"); ItemSimilarity itemSimilarity = RecommendFactory.itemSimilarity(RecommendFactory.SIMILARITY.EUCLIDEAN, dataModel); RecommenderBuilder recommenderBuilder = RecommendFactory.itemRecommender(itemSimilarity, false); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } public static RecommenderBuilder slopeOne(DataModel dataModel) throws TasteException, IOException { System.out.println("slopeOne"); RecommenderBuilder recommenderBuilder = RecommendFactory.slopeOneRecommender(); RecommendFactory.evaluate(RecommendFactory.EVALUATOR.AVERAGE_ABSOLUTE_DIFFERENCE, recommenderBuilder, null, dataModel, 0.7); RecommendFactory.statsEvaluator(recommenderBuilder, null, dataModel, 2); return recommenderBuilder; } }
控制檯輸出:oop
userEuclidean AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:0.33333325386047363 Recommender IR Evaluator: [Precision:0.3010752688172043,Recall:0.08542713567839195] userLoglikelihood AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:2.5245869159698486 Recommender IR Evaluator: [Precision:0.11764705882352945,Recall:0.017587939698492466] userEuclideanNoPref AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:4.288461538461536 Recommender IR Evaluator: [Precision:0.09045226130653267,Recall:0.09296482412060306] itemEuclidean AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:1.408880928305655 Recommender IR Evaluator: [Precision:0.0,Recall:0.0] itemLoglikelihood AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:2.448554412835434 Recommender IR Evaluator: [Precision:0.0,Recall:0.0] itemEuclideanNoPref AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:2.5665197873957957 Recommender IR Evaluator: [Precision:0.6005025125628134,Recall:0.6055276381909548] slopeOne AVERAGE_ABSOLUTE_DIFFERENCE Evaluater Score:2.6893078179405814 Recommender IR Evaluator: [Precision:0.0,Recall:0.0]
可視化「評估推薦器」輸出:ui
推薦的結果的平均距離源碼this
package org.conan.mymahout.recommendation.book; import java.io.IOException; import java.util.List; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.recommender.RecommendedItem; public class BookResult { final static int NEIGHBORHOOD_NUM = 2; final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws TasteException, IOException { String file = "datafile/book/rating.csv"; DataModel dataModel = RecommendFactory.buildDataModel(file); RecommenderBuilder rb1 = BookEvaluator.userEuclidean(dataModel); RecommenderBuilder rb2 = BookEvaluator.itemEuclidean(dataModel); RecommenderBuilder rb3 = BookEvaluator.userEuclideanNoPref(dataModel); RecommenderBuilder rb4 = BookEvaluator.itemEuclideanNoPref(dataModel); LongPrimitiveIterator iter = dataModel.getUserIDs(); while (iter.hasNext()) { long uid = iter.nextLong(); System.out.print("userEuclidean =>"); result(uid, rb1, dataModel); System.out.print("itemEuclidean =>"); result(uid, rb2, dataModel); System.out.print("userEuclideanNoPref =>"); result(uid, rb3, dataModel); System.out.print("itemEuclideanNoPref =>"); result(uid, rb4, dataModel); } } public static void result(long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel) throws TasteException { List list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM); RecommendFactory.showItems(uid, list, false); } }
控制檯輸出:僅僅截取部分結果lua
... userEuclidean =>uid:63, itemEuclidean =>uid:63,(984,9.000000)(690,9.000000)(943,8.875000) userEuclideanNoPref =>uid:63,(4,1.000000)(723,1.000000)(300,1.000000) itemEuclideanNoPref =>uid:63,(867,3.791667)(947,3.083333)(28,2.750000) userEuclidean =>uid:64, itemEuclidean =>uid:64,(368,8.615385)(714,8.200000)(290,8.142858) userEuclideanNoPref =>uid:64,(860,1.000000)(490,1.000000)(64,1.000000) itemEuclideanNoPref =>uid:64,(409,3.950000)(715,3.830627)(901,3.444048) userEuclidean =>uid:65,(939,7.000000) itemEuclidean =>uid:65,(550,9.000000)(334,9.000000)(469,9.000000) userEuclideanNoPref =>uid:65,(939,2.000000)(185,1.000000)(736,1.000000) itemEuclideanNoPref =>uid:65,(666,4.166667)(96,3.093931)(345,2.958333) userEuclidean =>uid:66, itemEuclidean =>uid:66,(971,9.900000)(656,9.600000)(918,9.577709) userEuclideanNoPref =>uid:66,(6,1.000000)(492,1.000000)(676,1.000000) itemEuclideanNoPref =>uid:66,(185,3.650000)(533,3.617307)(172,3.500000) userEuclidean =>uid:67, itemEuclidean =>uid:67,(663,9.700000)(987,9.625000)(486,9.600000) userEuclideanNoPref =>uid:67,(732,1.000000)(828,1.000000)(113,1.000000) itemEuclideanNoPref =>uid:67,(724,3.000000)(279,2.950000)(890,2.750000) ...
咱們查看uid=65的用戶推薦信息:
查看user.csv數據集
> user[65,] userid gender age 65 65 M 14
用戶65,男性。14歲。
以itemEuclideanNoPref的算法的推薦結果。查看bookid=666的圖書評分狀況
> rating[which(rating$bookid==666),] userid bookid pref 646 44 666 10 1327 89 666 7 2470 165 666 3 2697 179 666 7
發現有4個用戶對666的圖書評分。查看這4個用戶的屬性數據
> user[c(44,89,165,179),] userid gender age 44 44 F 76 89 89 M 40 165 165 F 59 179 179 F 68
這4個用戶,3女1男。
咱們若是男性和男性有一樣的圖書興趣。女性和女性有一樣的圖書偏好。
因爲用戶65是男性,因此咱們接下來排除女性的評分者。僅僅保留男性評分者的評分記錄。
源碼
package org.conan.mymahout.recommendation.book; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.recommender.IDRescorer; import org.apache.mahout.cf.taste.recommender.RecommendedItem; public class BookFilterGenderResult { final static int NEIGHBORHOOD_NUM = 2; final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws TasteException, IOException { String file = "datafile/book/rating.csv"; DataModel dataModel = RecommendFactory.buildDataModel(file); RecommenderBuilder rb1 = BookEvaluator.userEuclidean(dataModel); RecommenderBuilder rb2 = BookEvaluator.itemEuclidean(dataModel); RecommenderBuilder rb3 = BookEvaluator.userEuclideanNoPref(dataModel); RecommenderBuilder rb4 = BookEvaluator.itemEuclideanNoPref(dataModel); long uid = 65; System.out.print("userEuclidean =>"); filterGender(uid, rb1, dataModel); System.out.print("itemEuclidean =>"); filterGender(uid, rb2, dataModel); System.out.print("userEuclideanNoPref =>"); filterGender(uid, rb3, dataModel); System.out.print("itemEuclideanNoPref =>"); filterGender(uid, rb4, dataModel); } /** * 對用戶性別進行過濾 */ public static void filterGender(long uid, RecommenderBuilder recommenderBuilder, DataModel dataModel) throws TasteException, IOException { Set userids = getMale("datafile/book/user.csv"); //計算男性用戶打分過的圖書 Set bookids = new HashSet(); for (long uids : userids) { LongPrimitiveIterator iter = dataModel.getItemIDsFromUser(uids).iterator(); while (iter.hasNext()) { long bookid = iter.next(); bookids.add(bookid); } } IDRescorer rescorer = new FilterRescorer(bookids); List list = recommenderBuilder.buildRecommender(dataModel).recommend(uid, RECOMMENDER_NUM, rescorer); RecommendFactory.showItems(uid, list, false); } /** * 得到男性用戶ID */ public static Set getMale(String file) throws IOException { BufferedReader br = new BufferedReader(new FileReader(new File(file))); Set userids = new HashSet(); String s = null; while ((s = br.readLine()) != null) { String[] cols = s.split(","); if (cols[1].equals("M")) {// 推斷男性用戶 userids.add(Long.parseLong(cols[0])); } } br.close(); return userids; } } /** * 對結果重計算 */ class FilterRescorer implements IDRescorer { final private Set userids; public FilterRescorer(Set userids) { this.userids = userids; } @Override public double rescore(long id, double originalScore) { return isFiltered(id) ? Double.NaN : originalScore; } @Override public boolean isFiltered(long id) { return userids.contains(id); } }
控制檯輸出:
userEuclidean =>uid:65, itemEuclidean =>uid:65,(784,8.090909)(276,8.000000)(476,7.666667) userEuclideanNoPref =>uid:65, itemEuclideanNoPref =>uid:65,(887,2.250000)(356,2.166667)(430,1.866667)
咱們發現,由於僅僅保留男性的評分記錄,數據量就變得比較少了。基於用戶的協同過濾算法。已經沒有輸出的結果了。
基於物品的協同過濾算法,結果集也有所變化。
對於itemEuclideanNoPref算法。輸出排名第一條爲ID爲887的圖書。
我再進一步向下追蹤:查詢哪些用戶對圖書887進行了打分。
> rating[which(rating$bookid==887),] userid bookid pref 1280 85 887 2 1743 119 887 8 2757 184 887 4 2791 186 887 5
有4個用戶對圖書887評分,再分別查看這個用戶的屬性
> user[c(85,119,184,186),] userid gender age 85 85 F 31 119 119 F 49 184 184 M 27 186 186 M 35
當中2男,2女。由於咱們的算法,已經排除了女性的評分,咱們可以判斷圖書887的推薦應該來自於2個男性的評分者的推薦。
分別計算用戶65,與用戶184和用戶186的評分的圖書交集。
rat65<-rating[which(rating$userid==65),] rat184<-rating[which(rating$userid==184),] rat186<-rating[which(rating$userid==186),] > intersect(rat65$bookid ,rat184$bookid) integer(0) > intersect(rat65$bookid ,rat186$bookid) [1] 65 375
最後發現,用戶65與用戶186都給圖書65和圖書375打過度。咱們再打分出用戶186的評分記錄。
> rat186 userid bookid pref 2790 186 65 7 2791 186 887 5 2792 186 529 3 2793 186 375 6 2794 186 566 7 2795 186 169 4 2796 186 907 1 2797 186 821 2 2798 186 720 5 2799 186 642 5 2800 186 137 3 2801 186 744 1 2802 186 896 2 2803 186 156 6 2804 186 392 3 2805 186 386 3 2806 186 901 7 2807 186 69 6 2808 186 845 6 2809 186 998 3
用戶186。還給圖書887打過度,因此對於給65用戶推薦圖書887。是合理的。