1 # 自定義好的包,親測可用 原數據和代碼思想來自如下網址 2 # https://github.com/joaolcorreia/RFM-analysis 3 import datetime as dt 4 import pandas as pd 5 def RFM(data_,ri_qi,user_id,money,*time_): 6 ''' 7 return: 8 dataframe表格 9 列有r,f,m,r_四分位分數,f_四分位分數,m_四分位分數,RFM,用戶分類 10 行爲用戶id,惟一,分類彙總後 11 desc: 12 如 : RFM(orders,'order_date','customer','grand_total',2018,1,2) 13 返回df數據會複製到粘貼板 14 params: 15 data_ : 表格數據 pandas dataframe (表格中的字段名) 16 ri_qi : 訂單日期 (表格中的字段名) 17 user_id : 用戶id (表格中的字段名) 18 money : 消費金額 (表格中的字段名) 19 *time_ : 不定長參數 不寫默認當天 20 21 ''' 22 23 # 設置參考時間點,無參數則默認爲當天 24 if time_ == (): 25 NOW = dt.datetime.now() 26 else: 27 y,m,d = time_ 28 NOW = dt.datetime(y,m,d) 29 30 # 日期列進行格式轉換 31 data_[ri_qi] = pd.to_datetime(data_[ri_qi]) 32 33 # 建立RFM標籤 34 rfmTable = data_.groupby(user_id).agg({ri_qi: lambda x: (NOW - x.max()).days, # Recency 顧客最近一次購買距今天數(days) 35 'order_id': lambda x: len(x), # Frequency 按顧客分組後次數,即顧客訂單頻次 36 money: lambda x: x.sum()}) # Monetary Value 顧客消費金額總和 37 # 日期列轉換數據類型 38 rfmTable[ri_qi] = rfmTable[ri_qi].astype(int) 39 # 列名重命名 40 rfmTable.rename(columns={ri_qi: 'recency', 41 'order_id': 'frequency', 42 money: 'monetary_value'}, inplace=True) 43 44 # 肯定RFM四分位數 45 quantiles = rfmTable.quantile(q=[0.25,0.5,0.75]) 46 quantiles = quantiles.to_dict() 47 48 # 建立RFM分割表 49 rfmSegmentation = rfmTable.copy(deep=True) 50 # Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict) 51 # 日期間隔越小越好 52 def RClass(x,p,d): 53 # p,d = args = ('recency',quantiles,) 54 # x即rfmSegmentation['recency'].apply(RClass) 日期間隔數據 55 if x <= d[p][0.25]: 56 return 1 57 elif x <= d[p][0.50]: 58 return 2 59 elif x <= d[p][0.75]: 60 return 3 61 else: 62 return 4 63 64 # Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict) 65 # 消費頻次和金額越大越好 66 # 分數範圍1-4 67 def FMClass(x,p,d): 68 if x <= d[p][0.25]: 69 return 4 70 elif x <= d[p][0.50]: 71 return 3 72 elif x <= d[p][0.75]: 73 return 2 74 else: 75 return 1 76 77 78 # 評分應用 R F M 79 rfmSegmentation['R_Quartile'] = rfmSegmentation['recency'].apply(RClass, args=('recency',quantiles,)) 80 rfmSegmentation['F_Quartile'] = rfmSegmentation['frequency'].apply(FMClass, args=('frequency',quantiles,)) 81 rfmSegmentation['M_Quartile'] = rfmSegmentation['monetary_value'].apply(FMClass, args=('monetary_value',quantiles,)) 82 83 # 評分應用 RFMClass 84 rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str) \ 85 + rfmSegmentation.F_Quartile.map(str) \ 86 + rfmSegmentation.M_Quartile.map(str) 87 88 89 def type_(x,R,F,M): 90 ''' 91 r,f,m即每行的r,f,m分值 92 ''' 93 r,f,m = x[R],x[F],x[M] 94 if r > 2 and f > 2 and m > 2: 95 return '高價值客戶' 96 elif r <= 2 and f > 2 and m > 2: 97 return '高重點保護客戶' 98 elif r > 2 and f <= 2 and m > 2: 99 return '重點發展客戶' 100 elif r <= 2 and f <= 2 and m > 2: 101 return '重點挽留客戶' 102 elif r > 2 and f > 2 and m <= 2: 103 return '通常價值客戶' 104 elif r <= 2 and f > 2 and m <= 2: 105 return '通常保持客戶' 106 elif r > 2 and f <= 2 and m <= 2: 107 return '通常發展客戶' 108 elif r <= 2 and f <= 2 and m <= 2: 109 return '潛在客戶' 110 else: 111 return 112 113 rfmSegmentation['type_'] = rfmSegmentation.apply(type_,args=('R_Quartile','F_Quartile','M_Quartile'),axis=1) 114 rfmSegmentation.to_clipboard() # 數據複製到 粘貼板 115 return rfmSegmentation
調用函數處理後:git