關聯推薦算法

時間 2019-11-24

標籤關聯推薦算法简体版

原文原文鏈接

基於關聯規則的推薦（Association Rule-based Recommendation）是以關聯規則爲基礎，把已購商品做爲規則頭，規則體爲推薦對象。關聯規則挖掘能夠發現不一樣商品在銷售過程當中的相關性，在零售業中已經獲得了成功的應用。管理規則就是在一個交易數據庫中統計購買了商品集X的交易中有多大比例的交易同時購買了商品集Y，其直觀的意義就是用戶在購買某些商品的時候有多大傾向去購買另一些商品。好比購買牛奶的同時不少人會同時購買麪包。java

算法的第一步關聯規則的發現最爲關鍵且最耗時，是算法的瓶頸，但能夠離線進行。其次，商品名稱的同義性問題也是關聯規則的一個難點。算法

以apriori算法爲例，其挖掘步驟：數據庫

1.依據支持度找出全部頻繁項集（頻度）ide

2.依據置信度產生關聯規則（強度）ui

主要代碼以下：spa

package apriori;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.jolly.bi.aglorithm.conf.AprioriUnit;

public class AprioriUtils {

    private final static String ITEM_SPLIT = ";"; // 項之間的分隔符
    
    private static HashSet<String> getItem1FC(String trans) {
        HashSet<String> rItem1FcSet = new HashSet<String>();// 頻繁1項集
        String[] items = trans.split(ITEM_SPLIT);
            for (String item : items) {
                rItem1FcSet.add(item+ITEM_SPLIT);
        }
        return rItem1FcSet;
    }
    
    public static HashSet<String> getFC(String trans){
        
        HashSet<String> frequentCollectionSet = new HashSet<String>();// 全部的頻繁集
        HashSet<String> tmp_1fc=getItem1FC(trans);
        frequentCollectionSet.addAll(tmp_1fc); //導入一項集
        
        HashSet<String> itemkFcSet = new HashSet<String>();
        itemkFcSet.addAll(tmp_1fc);
        
        int a=0;
        while (itemkFcSet!=null&&itemkFcSet.size()!=0&&a<=3) {
            HashSet<String> candidateCollection = getCandidateCollection(itemkFcSet);
            itemkFcSet.clear();
            Iterator<String> it_cd= candidateCollection.iterator();
            while (it_cd.hasNext()) {
                itemkFcSet.add(it_cd.next());
            }
            frequentCollectionSet.addAll(itemkFcSet);
            a++;
        }
        
        return frequentCollectionSet;
    }
    
    public static HashSet<String> getCandidateCollection(HashSet<String> itemkFcSet){
        
        HashSet<String> candidateCollection=new HashSet<String>();
        Iterator<String> is1= itemkFcSet.iterator();
        while (is1.hasNext()) {
            String itemk1=is1.next();
            Iterator<String> is2= itemkFcSet.iterator();
            while (is2.hasNext()) {
                String itemk2=is2.next();
                String[] tmp1 = itemk1.split(ITEM_SPLIT);
                String[] tmp2 = itemk2.split(ITEM_SPLIT);
            
                String c = "";
                if (tmp1.length == 1) {
                    if (tmp1[0].compareTo(tmp2[0]) < 0) {
                        c = tmp1[0] + ITEM_SPLIT + tmp2[0] + ITEM_SPLIT;
                    }
                } else {
                    boolean flag = true;
                    for (int i = 0; i < tmp1.length - 1; i++) {
                        if (!tmp1[i].equals(tmp2[i])) {
                            flag = false;
                            break;
                        }
                    }
                    if (flag
                            && (tmp1[tmp1.length - 1]
                                    .compareTo(tmp2[tmp2.length - 1]) < 0)) {
                        c = itemk1 + tmp2[tmp2.length - 1] + ITEM_SPLIT;
                    }
                }
                
                // 進行剪枝
                boolean hasInfrequentSubSet = false;
                if (!c.equals("")) {
                    String[] tmpC = c.split(ITEM_SPLIT);
                    for (int i = 0; i < tmpC.length; i++) {
                        String subC = "";
                        for (int j = 0; j < tmpC.length; j++) {
                            if (i != j) {
                                subC = subC + tmpC[j] + ITEM_SPLIT;
                                
                            }
                        }
                    }
                } else {
                    hasInfrequentSubSet = true;
                }

                if (!hasInfrequentSubSet) {
                    candidateCollection.add(c);
                }
            }    
        }
        return candidateCollection;
        
    }
    
    private static void buildSubSet(List<String> sourceSet, List<List<String>> result) {
        // 僅有一個元素時，遞歸終止。此時非空子集僅爲其自身，因此直接添加到result中
        if (sourceSet.size() == 1) {
            List<String> set = new ArrayList<String>();
            set.add(sourceSet.get(0));
            result.add(set);
        } else if (sourceSet.size() > 1) {
            // 當有n個元素時，遞歸求出前n-1個子集，在於result中
            buildSubSet(sourceSet.subList(0, sourceSet.size() - 1), result);
            int size = result.size();// 求出此時result的長度，用於後面的追加第n個元素時計數
            // 把第n個元素加入到集合中
            List<String> single = new ArrayList<String>();
            single.add(sourceSet.get(sourceSet.size() - 1));
            result.add(single);
            // 在保留前面的n-1子集的狀況下，把第n個元素分別加到前n個子集中，並把新的集加入到result中;
            // 爲保留原有n-1的子集，因此須要先對其進行復制
            List<String> clone;
            for (int i = 0; i < size; i++) {
                clone = new ArrayList<String>();
                for (String str : result.get(i)) {
                    clone.add(str);
                }
                clone.add(sourceSet.get(sourceSet.size() - 1));

                result.add(clone);
            }
        }
    }


    public static Map<String, Double> getRelationRules(
            String key,Map<String, Integer> frequentCollectionMap) {
        Map<String, Double> relationRules = new HashMap<String, Double>();
        
        double countAll = frequentCollectionMap.get(key);
        String[] keyItems = key.split(ITEM_SPLIT);
        if (keyItems.length > 1) {
            List<String> source = new ArrayList<String>();
            Collections.addAll(source, keyItems);
            List<List<String>> result = new ArrayList<List<String>>();

            buildSubSet(source, result);// 得到source的全部非空子集

            for (List<String> itemList : result) {
                if (itemList.size() < source.size()) {// 只處理真子集
                    List<String> otherList = new ArrayList<String>();
                    for (String sourceItem : source) {
                        if (!itemList.contains(sourceItem)) {
                            otherList.add(sourceItem);
                        }
                    }
                    String reasonStr = "";// 前置
                    String resultStr = "";// 結果
                    for (String item : itemList) {
                        reasonStr = reasonStr + item + ITEM_SPLIT;
                    }
                    for (String item : otherList) {
                        resultStr = resultStr + item + ITEM_SPLIT;
                    }

                    double countReason = frequentCollectionMap
                            .get(reasonStr);
                    double itemConfidence = countAll / countReason;// 計算置信度
                    if (itemConfidence >= AprioriUnit._CONFIDENCE) {
                        String rule = reasonStr + AprioriUnit._CON + resultStr;
                        relationRules.put(rule, itemConfidence);
                    }
                }
            }
        }
        return relationRules;
    }

}

應用場景舉例：code

(1)數據輸入爲訂單下的全部商品id對象

125403,185733,196095,117965,201975,212841,181789
149693,210991,13992,64312,54796,194527blog

(2)計算全部頻繁項集，依據支持度過濾遞歸

(3)根據公式，獲得符合置信度條件的全部推薦

104138,196705,0.1875

104138,196705,0.1800

結論是購買了商品104138中購買196705的最多

相關標籤/搜索

推薦算法