自2007年發佈以來,scikit-learn已經成爲Python重要的機器學習庫了。scikit-learn簡稱sklearn,支持包括分類、迴歸、降維和聚類四大機器學習算法。還包含了特徵提取、數據處理和模型評估三大模塊。
sklearn是Scipy的擴展,創建在NumPy和matplotlib庫的基礎上。利用這幾大模塊的優點,能夠大大提升機器學習的效率。
sklearn擁有着完善的文檔,上手容易,具備着豐富的API,在學術界頗受歡迎。sklearn已經封裝了大量的機器學習算法,包括LIBSVM和LIBINEAR。同時sklearn內置了大量數據集,節省了獲取和整理數據集的時間。算法
鏈路預測是經過歷史鏈接信息預測將來可能產生的鏈接,即經過當前網絡中的連邊信息預測未來可能產生的連邊信息。
from sklearn.model_selection import train_test_split # 分割數據模塊
from sklearn.neighbors import KNeighborsClassifier # K最近鄰(kNN,k-NearestNeighbor)分類算法
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from math import isnan網絡
def Jaccavrd(MatrixAdjacency_Train):app
Matrix_similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train) deg_row = sum(MatrixAdjacency_Train) deg_row.shape = (deg_row.shape[0],1) deg_row_T = deg_row.T tempdeg = deg_row + deg_row_T temp = tempdeg - Matrix_similarity Matrix_similarity = Matrix_similarity / temp return Matrix_similarity
def Salton_Cal(MatrixAdjacency_Train):機器學習
similarity = np.dot(MatrixAdjacency_Train,MatrixAdjacency_Train) deg_row = sum(MatrixAdjacency_Train) deg_row.shape = (deg_row.shape[0],1) deg_row_T = deg_row.T tempdeg = np.dot(deg_row,deg_row_T) temp = np.sqrt(tempdeg) np.seterr(divide='ignore', invalid='ignore') Matrix_similarity = np.nan_to_num(similarity / temp)
return Matrix_similarity
def file2matrix(filepath):ide
f = open(filepath) lines = f.readlines() matrix = np.zeros((50, 50), dtype=float) A_row = 0 for line in lines: list = line.strip('\n').split(' ') matrix[A_row:] = list[0:50] A_row += 1 return matrix
filepath = '3600/s0001.txt'
MatrixAdjacency = file2matrix(filepath)學習
similarity_matrix_Jaccavrd = Jaccavrd(MatrixAdjacency)
similarity_matrix_Salton = Salton_Cal(MatrixAdjacency)rest
filepath2 = '3600/s0002.txt'
MatrixAdjacency2 = file2matrix(filepath2)code
similarity_matrix_Jaccavrd2 = Jaccavrd(MatrixAdjacency2)
similarity_matrix_Salton2 = Salton_Cal(MatrixAdjacency2)ip
filepath3 = '3600/s0003.txt'
MatrixAdjacency3 = file2matrix(filepath3)ci
similarity_matrix_Jaccavrd3 = Jaccavrd(MatrixAdjacency3)
similarity_matrix_Salton3 = Salton_Cal(MatrixAdjacency3)
Jaccard_Row = similarity_matrix_Jaccavrd.shape[0]
Jaccard_Column = similarity_matrix_Jaccavrd.shape[1]
Jaccard_List = []
for i in range(Jaccard_Row):
for j in range(Jaccard_Column): if i<j: index = similarity_matrix_Jaccavrd[i,j] if isnan(index) == True: index = 0 Jaccard_List.append(index)
Salton_Row = similarity_matrix_Salton.shape[0]
Salton_Column = similarity_matrix_Salton.shape[1]
Salton_List = []
for i in range(Salton_Row):
for j in range(Salton_Column): if i<j: index = similarity_matrix_Salton[i,j] if isnan(index) == True: index = 0 Salton_List.append(index)
Jaccard_Row2 = similarity_matrix_Jaccavrd2.shape[0]
Jaccard_Column2 = similarity_matrix_Jaccavrd2.shape[1]
Jaccard_List2 = []
for i in range(Jaccard_Row2):
for j in range(Jaccard_Column2): if i<j: index2 = similarity_matrix_Jaccavrd2[i,j] if isnan(index2) == True: index2 = 0 Jaccard_List2.append(index2)
Salton_Row2 = similarity_matrix_Salton2.shape[0]
Salton_Column2 = similarity_matrix_Salton2.shape[1]
Salton_List2 = []
for i in range(Salton_Row2):
for j in range(Salton_Column2): if i<j: index2 = similarity_matrix_Salton2[i,j] if isnan(index2) == True: index2 = 0 Salton_List2.append(index2)
Jaccard_Row3 = similarity_matrix_Jaccavrd3.shape[0]
Jaccard_Column3 = similarity_matrix_Jaccavrd3.shape[1]
Jaccard_List3 = []
for i in range(Jaccard_Row3):
for j in range(Jaccard_Column3): if i<j: index3 = similarity_matrix_Jaccavrd3[i,j] if isnan(index3) == True: index3 = 0 Jaccard_List3.append(index3)
Salton_Row3 = similarity_matrix_Salton3.shape[0]
Salton_Column3 = similarity_matrix_Salton3.shape[1]
Salton_List3 = []
for i in range(Salton_Row3):
for j in range(Salton_Column3): if i<j: index3 = similarity_matrix_Salton3[i,j] if isnan(index3) == True: index3 = 0 Salton_List3.append(index3)
Adjacency_Row = MatrixAdjacency.shape[0]
Adjacency_Column = MatrixAdjacency.shape[1]
Adjacency = []
for i in range(Adjacency_Row):
for j in range(Adjacency_Column): if i<j: index = MatrixAdjacency[i,j] Adjacency.append(index)
Adjacency_Row2 = MatrixAdjacency2.shape[0]
Adjacency_Column2 = MatrixAdjacency2.shape[1]
Adjacency2 = []
for i in range(Adjacency_Row2):
for j in range(Adjacency_Column2): if i<j: index2 = MatrixAdjacency2[i,j] Adjacency2.append(index2)
Adjacency_Row3 = MatrixAdjacency3.shape[0]
Adjacency_Column3 = MatrixAdjacency3.shape[1]
Adjacency3 = []
for i in range(Adjacency_Row3):
for j in range(Adjacency_Column3): if i<j: index3 = MatrixAdjacency3[i,j] Adjacency3.append(index3)
data = np.zeros((1225,3))
data2 = np.zeros((1225,3))
data3 = np.zeros((1225,3))
for i in range(1225):
data[i][0] = Jaccard_List[i] data[i][1] = Salton_List[i] data[i][2] = Adjacency[i]
for j in range(1225):
data2[j][0] = Jaccard_List2[j] data2[j][1] = Salton_List2[j] data2[j][2] = Adjacency2[j]
for k in range(1225):
data3[k][0] = Jaccard_List3[k] data3[k][1] = Salton_List3[k] data3[k][2] = Adjacency3[k]
data_train_X = data[:,0:2]
data_train_y = data[:,2]
data_test_X = data2[:,0:2]
data_test_y = data2[:,2]
data_target_X = data3[:,0:2]
data_target_y = data3[:,2]
knn = KNeighborsClassifier()
knn.fit(data_train_X,data_train_y)
print(knn.predict(data_test_X))
print(data_test_y)
clf = SVC()
clf.fit(data_train_X,data_test_y)
print(clf.score(data_test_X,data_target_y))
如需詳細本項目信息,可發送郵件至18770918982@gmail.com