推薦算法(基於用戶和基於物品)

推薦算法

https://yq.aliyun.com/articles/539247python

基於用戶的協同過濾算法

首先用一個詞就能很好的解釋什麼叫作基於用戶的協同過濾算法:【臭味相投】。雖然是貶義詞,但也說明了,具備相似特徵的人羣,他們喜歡的東西不少也是同樣的。所以,在推薦系統中,假設要爲A用戶推薦物品,能夠經過尋找他的「鄰居」——與A具備類似興趣的用戶把那些用戶喜歡的,而A用戶卻未曾據說的東西推薦給Agit

 

基於物品的協同過濾算法

假設某天你購買了機器學習書籍,那麼淘寶會給你推薦python書籍。由於機器通過判斷得出這二者類似度很高,你既然會喜歡機器學習那麼理應喜歡python。github

基於物品的協同過濾算法就是給用戶推薦那些和他們以前喜歡的物品類似的物品算法

不過, ItemCF算法並不利用物品的內容屬性計算物品之間的類似度,它主要經過分析用戶的行爲記錄計算物品之間的類似度。該算法認爲,物品A和物品B具備很大的類似度是由於喜歡物品A的用戶大都也喜歡物品Bjson

 

https://www.jianshu.com/p/e56665c54df8機器學習

 基於領域的協同過濾算法主要有兩種,一種是基於物品的,一種是基於用戶的。所謂基於物品,就是用戶喜歡了X商品,咱們給他推薦與X商品類似的商品。所謂基於用戶,就是用戶A和用戶B類似,用戶A買了X、Y,用戶B買了X、Y、Z,咱們就給用戶A推薦商品Z。scrapy

 

使用基於物品的協同過濾,須要維護一個物品類似度矩陣;使用基於用戶的協同過濾,須要維護一個用戶類似度矩陣。能夠設想,若是物品之間的類似度常常變化,那麼物品類似度的矩陣則須要常常更新。若是物品常常增長,那麼物品類似度的矩陣也會增加的很是快。新聞網站就同時具備這兩個特色,因此基於物品的協同過濾並不適用於新聞的推薦。

類似性計算方法:學習

https://blog.csdn.net/zz_dd_yy/article/details/51924661網站

根據皮爾遜相關係數的值參考如下標準,能夠大概評估出二者的類似程度:spa

  • 0.8-1.0 極強相關
  • 0.6-0.8 強相關
  • 0.4-0.6 中等程度相關
  • 0.2-0.4 弱相關
  • 0.0-0.2 極弱相關或無相關

 

DEMO

https://github.com/fanqingsong/EBooksRecommander

包括兩部分:

一、 抓取用戶讀書數據

二、 進行推薦

 

推薦代碼:

# -*- coding: utf-8 -*-
from __future__ import division
from math import sqrt
from dataloader import loadJsonObjectToDict
import pickle
import os
import json
import io

PENALTY_RATIO = 9

def sim_tanimoto(prefs, personA, personB):
    print "enter sim_tanimoto"

    keys_a = set(prefs[personA])
    keys_b = set(prefs[personB])
    intersection = keys_a & keys_b
    unionDict = dict(prefs[personA], **prefs[personB])
    return len(intersection)/len(unionDict)

def sim_euclid(prefs, personA, personB):
    print "enter sim_euclid"

    si = {} #Dict for shared item
    for item in prefs[personA]:
        if item in prefs[personB]:
            si[item] = 1
    #Zero shared item -> not similar at all
    if len(si) == 0: return 0
    sum_of_squares = sum([pow(prefs[personA][item] - prefs[personB][item], 2) for item in si])
    r = 1/(1+sqrt(sum_of_squares))
    return r

def sim_pearson(prefs, personA, personB):
    print "enter sim_pearson"

    si = {} #Dict for shared item
    for item in prefs[personA]:
        if item in prefs[personB]:
            si[item] = 1
    n = len(si)
    if n == 0: return 0
    #sum
    sumA = sum([prefs[personA][item] for item in si])
    sumB = sum([prefs[personB][item] for item in si])

    #sum sqrt
    sumASqrt = sum([pow(prefs[personA][item], 2) for item in si])
    sumBSqrt = sum([pow(prefs[personB][item], 2) for item in si])
    #power of sum
    pSum = sum(prefs[personA][it] * prefs[personB][it] for it in si)
    #pearson Formula 4
    num = pSum - (sumA*sumB/n)
    den = sqrt((sumASqrt - pow(sumA, 2)/n) * (sumBSqrt - pow(sumB, 2)/n))
    if den == 0: return 0
    r = num/den
    return r

def sim_combine(prefs, personA, personB):
    print "enter sim_combine"

    return (sim_euclid(prefs, personA, personB) + sim_tanimoto(prefs, personA, personB) * PENALTY_RATIO)/(PENALTY_RATIO + 1)

def topMatches(prefs, person, n=5, similarity = sim_pearson):
    print "enter topMatches"

    #scores = [(sim_pearson(prefs, person, other) * sim_euclid(prefs, person, other), other) for other in prefs if other != person]
    scores = [(similarity(prefs, person, other), other) for other in prefs if other != person]
    scores.sort()
    scores.reverse()
    return scores[0:n]

def getRecommandations(prefs, person,similarity = sim_pearson):
    print "enter getRecommandations"

    totals = {}
    simSums = {}

    for other in prefs:
        if other == person : continue
        sim = similarity(prefs, person, other)
        if sim <= 0: continue

        for item in prefs[other]:
            if item not in prefs[person] or prefs[person][item] ==0:
                totals.setdefault(item, 0)
                totals[item] += prefs[other][item] * sim
                simSums.setdefault(item, 0)
                simSums[item] += sim

    rankings = [(total/simSums[item], item) for item, total in totals.items()]
    rankings.sort()
    rankings.reverse()
    return rankings

def transformPrefs(prefs):
    print "enter transformPrefs"

    result = {}
    for person in prefs:
        for item in prefs[person]:
            result.setdefault(item,{})
            result[item][person] = prefs[person][item]
    return result

def calculationSimilarItem(prefs, simFunction, dumpedfilePath, n=10):
    print "enter calculationSimilarItem"

    result = {}

    if os.path.exists(dumpedfilePath):
        print('find preprocessed data, loading directly...')
        with io.open(dumpedfilePath, 'rb') as f:
            result = pickle.load(f)
        return result

    itemPrefs = transformPrefs(prefs)

    for item in itemPrefs:
        scores = topMatches(itemPrefs, item, n=n, similarity=simFunction)
        result[item] = scores

    with io.open(dumpedfilePath, 'wb') as f:
        pickle.dump(result,f)

    return result

def getRecommandedItems(itemMatch, userRating):
    print "enter getRecommandedItems"

    # print json.dumps(itemMatch, encoding="utf-8", ensure_ascii=False)

    # print "----------------------------------------------------------------------"

    # print json.dumps(userRating, encoding="utf-8", ensure_ascii=False)

    scores = {}
    totalSim = {}

    for (item, rating) in userRating.items():
        # print item.encode("UTF-8")
        for (similarity, itemSim) in itemMatch[item]:
            if itemSim in userRating or similarity <= 0: continue
            scores.setdefault(itemSim,0)
            scores[itemSim] += similarity*rating
            totalSim.setdefault(itemSim,0)
            totalSim[itemSim] += similarity

    rankings =[(score/totalSim[item], item) for item,score in scores.items()]
    rankings.sort()
    rankings.reverse()
    return rankings

def readUserPrefs(userRatingPath):
    print "enter readUserPrefs"

    userRating = {}

    if os.path.exists(userRatingPath):
        f = io.open(userRatingPath, 'r', encoding="utf-8")
        for line in f:
            txtSeg = line.split()
            userRating[txtSeg[0]] = float(txtSeg[1])

    return userRating

#TestCode
def ItemBasedReco():
    #Load scrapy data into {User -> Book -> Note} Dict
    loadedData = loadJsonObjectToDict("../data/YSData.json")

    # Read User prefs
    userRatingPath = "./UserPrefs.txt"
    userRating = readUserPrefs(userRatingPath)

    print("------------------ Item Based: Sim Euclid --------------------")
    #Using Euclid for Calculating Similarity
    #Calculate Top10 Matche book for each book with similarity point
    li = calculationSimilarItem(loadedData, sim_euclid, "../data/CalculatedItemSim" +"Euclid" + ".pkl")
    #Get the Recommandations
    re = getRecommandedItems(li,  userRating)
    #Print recommandation
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

    print("------------------ Item Based: Sim Tanimoto --------------------")
    #Using Euclid for Calculating Similarity
    #Calculate Top10 Matche book for each book with similarity point
    li = calculationSimilarItem(loadedData, sim_tanimoto, "../data/CalculatedItemSim" +"Tanimoto" + ".pkl")
    #Get the Recommandations
    re = getRecommandedItems(li,  userRating)
    #Print recommandation
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

    print("------------------ Item Based: Sim Pearson --------------------")
    #Using Euclid for Calculating Similarity
    #Calculate Top10 Matche book for each book with similarity point
    li = calculationSimilarItem(loadedData, sim_pearson,"../data/CalculatedItemSim" +"Pearson" + ".pkl")
    #Get the Recommandations
    re = getRecommandedItems(li,  userRating)
    #Print recommandation
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

    print("------------------ Item Based: Sim Tanimoto * 10 + Sim Euclid --------------------")
    #Using Euclid for Calculating Similarity
    #Calculate Top10 Matche book for each book with similarity point
    li = calculationSimilarItem(loadedData,sim_combine, "../data/CalculatedItemSim" +"Combine" + ".pkl")
    #Get the Recommandations
    re = getRecommandedItems(li,  userRating)
    #Print recommandation
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

def UserBasedReco():
    #Load scrapy data into {User -> Book -> Note} Dict
    loadedData = loadJsonObjectToDict("../data/YSData.json")
    # Read User prefs
    userRatingPath = "./UserPrefs.txt"
    userRating = readUserPrefs(userRatingPath)
    loadedData['Me'] = userRating

    re = getRecommandations(loadedData,'Me',sim_euclid)
    print("------------------ User Based: Sim Euclid --------------------")
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

    re = getRecommandations(loadedData,'Me',sim_pearson)
    print("------------------ User Based: Sim Pearson --------------------")
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

    re = getRecommandations(loadedData,'Me',sim_tanimoto)
    print("------------------ User Based: Sim Tanimoto --------------------")
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])

    re = getRecommandations(loadedData,'Me',sim_combine)
    print("------------------ User Based: Sim Tanimoto * 10 + Sim Euclid --------------------")
    for tl in re[0:15]:
        print (str(tl[0]) + ":" + tl[1])


if __name__ == '__main__':
    UserBasedReco()
    ItemBasedReco()
相關文章
相關標籤/搜索