強化學習(英語:Reinforcement learning,簡稱RL)是機器學習中的一個領域,強調如何基於環境而行動,以取得最大化的預期利益。其靈感來源於心理學中的行爲主義理論,即有機體如何在環境給予的獎勵或懲罰的刺激下,逐步造成對刺激的預期,產生能得到最大利益的習慣性行爲。這個方法具備普適性,所以在其餘許多領域都有研究,例如博弈論、控制論、運籌學、信息論、仿真優化、多主體系統學習、羣體智能、統計學以及遺傳算法。在運籌學和控制理論研究的語境下,強化學習被稱做「近似動態規劃」(approximate dynamic programming,ADP)。在最優控制理論中也有研究這個問題,雖然大部分的研究是關於最優解的存在和特性,並不是是學習或者近似方面。在經濟學和博弈論中,強化學習被用來解釋在有限理性的條件下如何出現平衡。 在機器學習問題中,環境一般被規範爲馬可夫決策過程(MDP),因此許多強化學習算法在這種狀況下使用動態規劃技巧。傳統的技術和強化學習算法的主要區別是,後者不須要關於MDP的知識,並且針對沒法找到確切方法的大規模MDP。 強化學習和標準的監督式學習之間的區別在於,它並不須要出現正確的輸入/輸出對,也不須要精確校訂次優化的行爲。強化學習更加專一於在線規劃,須要在探索(在未知的領域)和聽從(現有知識)之間找到平衡。強化學習中的「探索-聽從」的交換,在多臂老虎機問題和有限MDP中研究得最多。
學習自動機是在一隨機環境下的適應性決策產生單元,能夠根據和環境重複的互動來學習最佳的動做。動做是依照特定的機率分佈來決定,而系統會依採起特定行動後的環境反應來更新機率分佈。 在強化學習的領域中,學習自動機的特徵是馬可夫決策過程。政策迭代者會直接處理π,這點其餘強化學習的算法不一樣。另外一個政策迭代者的例子是演化算法。
網絡中的鏈路預測(Link Prediction)是指如何經過已知的網絡節點以及網絡結構等信息預測網絡中還沒有產生連邊的兩個節點之間產生連接的可能性。這種預測既包含了對未知連接(exist yet unknown links)的預測也包含了對將來連接(future links)的預測。該問題的研究在理論和應用兩個方面都具備重要的意義和價值。
import numpy as np
import time
from random import choice
import pandas as pd
import os算法
'''
def Cn(MatrixAdjacency):網絡
Matrix_similarity = np.dot(MatrixAdjacency,MatrixAdjacency) return Matrix_similarity
'''app
def Jaccavrd(MatrixAdjacency_Train):dom
Matrix_similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train) deg_row = sum(MatrixAdjacency_Train) deg_row.shape = (deg_row.shape[0],1) deg_row_T = deg_row.T tempdeg = deg_row + deg_row_T temp = tempdeg - Matrix_similarity Matrix_similarity = Matrix_similarity / temp return Matrix_similarity
def Salton_Cal(MatrixAdjacency_Train):機器學習
similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train) deg_row = sum(MatrixAdjacency_Train) deg_row.shape = (deg_row.shape[0],1) deg_row_T = deg_row.T tempdeg = np.dot(deg_row,deg_row_T) temp = np.sqrt(tempdeg) np.seterr(divide='ignore', invalid='ignore') Matrix_similarity = np.nan_to_num(similarity / temp)
return Matrix_similarity
def Katz_Cal(MatrixAdjacency):ide
#α取值 Parameter = 0.01 Matrix_EYE = np.eye(MatrixAdjacency.shape[0]) Temp = Matrix_EYE - MatrixAdjacency * Parameter Matrix_similarity = np.linalg.inv(Temp) Matrix_similarity = Matrix_similarity - Matrix_EYE return Matrix_similarity
'''
def LP_Cal(MatrixAdjacency):學習
Matrix_similarity = np.dot(MatrixAdjacency,MatrixAdjacency) Parameter = 0.05 Matrix_LP = np.dot(np.dot(MatrixAdjacency,MatrixAdjacency),MatrixAdjacency) * Parameter Matrix_similarity = np.dot(Matrix_similarity,Matrix_LP) return Matrix_similarity
'''優化
def RA(MatrixAdjacency_Train):code
RA_Train = sum(MatrixAdjacency_Train) RA_Train.shape = (RA_Train.shape[0],1) MatrixAdjacency_Train_Log = MatrixAdjacency_Train / RA_Train MatrixAdjacency_Train_Log = np.nan_to_num(MatrixAdjacency_Train_Log) Matrix_similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train_Log) return Matrix_similarity
def RandomEnviromentForActive(MatrixAdjacency,i,j):ip
Index = np.random.randint(1, 5) print(Index) global IndexName if Index == 1: IndexName = '類似性指標是:Jaccard Index' print(IndexName) similarity_matrix = Jaccavrd(MatrixAdjacency) similarity = similarity_matrix[i,j] elif Index == 2: IndexName = '類似性指標是:Salton Index' print(IndexName) similarity_matrix = Salton_Cal(MatrixAdjacency) similarity = similarity_matrix[i,j] elif Index == 3: IndexName = '類似性指標是:Katz Index' print(IndexName) similarity_matrix = Katz_Cal(MatrixAdjacency) similarity = similarity_matrix[i,j] else index == 4: IndexName = '類似性指標是:RA Index' print(IndexName) similarity_matrix = RA(MatrixAdjacency) similarity = similarity_matrix[i,j] return similarity
def RandomEnviromentForNonActive():
Action = np.random.randint(1, 4) if Action == 1: ActionName = 'ID3' similarity_matrix = ID3_Cal(MatrixAdjacency) #similarity = similarity_matrix[i,j] elif Action == 2: ActionName = 'CART' similarity_matrix = Cart_Cal(MatrixAdjacency) #similarity = similarity_matrix[i,j] elif Action == 3: ActionName = 'C4.5' similarity_matrix = C4_Cal(MatrixAdjacency) #similarity = similarity_matrix[i,j] return similarity
def ContructionAgent(filepath,n1,n2):
f = open(filepath) lines = f.readlines() A = np.zeros((50, 50), dtype=float) A_row = 0 for line in lines: list = line.strip('\n').split(' ') A[A_row:] = list[0:50] A_row += 1 # 初始化p1和p2 a = 0.05 b = 0.01 p1 =0.5 p2 =0.5 Action = 1 # 在這裏使用數字1表明選擇動做‘Yes’,用2表明動做‘No’ for i in range(1): # global Action # 類似性閾值(the threashhold_value of similarity) if (p1 >= p2): Action = 1 else: Action = 2 print('選擇的動做是:' + str(Action)) threshhold_value = 0.3 similarity = RandomEnviromentForActive(A, n1, n2) # p1表示動做1'Yes'被選擇的機率,p2表示動做2'No'被選擇的機率 # 前一次選擇的動做是‘Yes’,而且該動做獲得了獎勵 if (similarity > threshhold_value) and (Action == 1): p1 = p1 + a * (1 - p1) p2 = 1-p1 # p2 = (1 - a) * p2 # 前一次選擇的動做是'No',而且該動做獲得了獎勵 elif (similarity < threshhold_value) and (Action == 2): p2 = (1-a)*p2 p1 = 1-p2 # p1 = (1 - a) * p1 # 前一次選擇的動做是‘Yes’,但該動做獲得了懲罰 elif (similarity < threshhold_value) and (Action == 1): p2 = 1-b*p2 p1 = 1-p2 #p2 = 1 - b * p2 # 前一次選擇的動做是‘No’,但該動做獲得了懲罰 elif (similarity > threshhold_value) and (Action == 2): p1 = b + (1 - b) * (1 - p1) p2 = 1-p1 # p1 = 1 - b * p1 if (p1 >= p2): print('下一時刻選擇的動做是:Yes') else: print('下一時刻選擇的動做是:No') return p1, p2 import os
import pandas as pd
import numpy as np
path=r'../Data/itcmatrixs/36000/'
result = np.zeros((50, 50))
for i in os.walk(path):
#print(i) #print(type(i)) for m in range(50): for n in range(50): r = None for j in range(26): datapath = path+i[2][j] p1,p2 = ContructionAgent(datapath,m,n) r = int(p1>=p2) result[m,n] = r;
r.save('result.npy')
pass
有須要源碼和數據集的請發送信息到280815640@qq.com,感謝您的關注。