發現機器學習就根本停不下來算法
今天來用RNN算法來爽爽僵屍網絡宿主預測網絡
首先咱們下載好數據,而後打開咱們可愛的熊貓機器學習
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from subprocess import check_output df = pd.read_csv('F:\\machine_learning\\network.csv') df.head(2) print(df.head())
下面繼續處理數據:學習
按天天的數據包總量來分spa
df['date']= pd.to_datetime(df['date']) df = df.groupby(['date','l_ipn'],as_index=False).sum() df['yday'] = df['date'].dt.dayofyear df['wday'] = df['date'].dt.dayofweek
對每一個ip進行分類:code
ip0 = df[df['l_ipn']==0] max0 = np.max(ip0['f']) ip1 = df[df['l_ipn']==1] max1 = np.max(ip1['f']) ip2 = df[df['l_ipn']==2] max2 = np.max(ip2['f']) ip3 = df[df['l_ipn']==3] max3 = np.max(ip3['f']) ip4 = df[df['l_ipn']==4] max4 = np.max(ip4['f']) ip5 = df[df['l_ipn']==5] max5 = np.max(ip5['f']) ip6 = df[df['l_ipn']==6] max6 = np.max(ip6['f']) ip7 = df[df['l_ipn']==7] max7 = np.max(ip7['f']) ip8 = df[df['l_ipn']==8] max8 = np.max(ip8['f']) ip9 = df[df['l_ipn']==9] max9 = np.max(ip9['f']) ip0.head(2)
而後咱們輸出一下ip0的頭blog
很棒,咱們已經成功按ip分類了ip
而後咱們按每一年的數據包總量來對每一個ip進行圖形化計數pandas
首先舉個例子:it
count, division = np.histogram(ip0['f'],bins=10) division
他會輸出這些
array([ 68., 810., 1552., 2294., 3036., 3778., 4520., 5262., 6004., 6746., 7488.])
而後咱們開始對每一個ip下手
f,axarray = plt.subplots(5,2,figsize=(15,20)) count, division = np.histogram(ip0['f'],bins=10) g = sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[0,0]) axarray[0,0].set_title("Local IP 0 Flow") count, division = np.histogram(ip1['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[0,1]) axarray[0,1].set_title("Local IP 1 Flow") count, division = np.histogram(ip2['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[1,0]) axarray[1,0].set_title("Local IP 2 Flow") count, division = np.histogram(ip3['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[1,1]) axarray[1,1].set_title("Local IP 3 Flow") count, division = np.histogram(ip4['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[2,0]) axarray[2,1].set_title("Local IP 4 Flow") count, division = np.histogram(ip5['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[2,1]) axarray[2,1].set_title("Local IP 5 Flow") count, division = np.histogram(ip6['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[3,0]) axarray[3,0].set_title("Local IP 6 Flow") count, division = np.histogram(ip7['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[3,1]) axarray[3,1].set_title("Local IP 7 Flow") count, division = np.histogram(ip8['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[4,0]) axarray[4,0].set_title("Local IP 8 Flow") count, division = np.histogram(ip9['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[4,1]) axarray[4,1].set_title("Local IP 9 Flow")