最近跟着老師在學習神經網絡,爲了更加深入地理解這個黑盒,我打算本身用C/C++將其實現一遍。今天忙活了好一下子,終於實現了一個BP神經網絡,後期還會陸續實現CNN神經網絡之類的,也會發上來和你們一塊兒分享的~ios
由於最近比較忙,因此這裏直接放代碼了,關於一些原理以及本身的一點看法會在有空的時候整理出來的~數組
main.cpp網絡
#include <iostream> #include <vector> #include "BPUtils.h" using namespace std; /* run this program using the console pauser or add your own getch, system("pause") or input loop */ vector<vector<double>>dataTest; vector<double>dataTestY; vector<vector<double>>trainDataX; vector<double>trainDataY; int main() { // double m1[3][1]={{1},{2},{3}}; // double m2[1][4]={1,2,3,4}; // double m3[3][4]; // dott(&m1[0][0],&m2[0][0],&m3[0][0],3,1,4); // for(int i=0;i<3;i++){ // for(int j=0;j<4;j++){ // cout<<m3[i][j]<<" "; // } // cout<<endl; // } createTrainSet(); createTestSet(); guiYiHua(dataTest); guiYiHua(trainDataX); NeuralNetwork nn(2,44,2); nn.train(trainDataX,trainDataY); // for(int i=0;i<trainDataX.size();i++){ // for(int j=0;j<trainDataX[i].size();j++){ // cout<<trainDataX[i][j]<<" "; // } // cout<<endl; // } // for(int i=0;i<trainDataX.size();i++){ // cout<<trainDataY[i]<<" "; // } // // cout<<endl<<"---------------------------------------------------------"<<endl; // // for(int i=0;i<dataTest.size();i++){ // for(int j=0;j<dataTest[i].size();j++){ // cout<<dataTest[i][j]<<" "; // } // cout<<endl; // } // for(int i=0;i<dataTestY.size();i++){ // cout<<dataTestY[i]<<" "; // } // NeuralNetwork nn(2,4,3); // vector<vector<double>>dataX; // vector<double>dataY; // for(int i=0;i<4;i++){ // vector<double>vec; // for(int j=0;j<2;j++){ // vec.push_back(i+j); // } // dataX.push_back(vec); // } // for(int i=0;i<4;i++){ // for(int j=0;j<2;j++){ // cout<<dataX[i][j]<<" "; // } // cout<<endl; // } // for(int i=0;i<4;i++){ // dataY.push_back(i); // } // nn.train(dataX,dataY); return 0; }
BPUtils.hdom
#ifndef BP_UTILS #define BP_UTILS #include <cmath> #include <cstdlib> #include <iostream> #include <vector> #include <ctime> #include <string.h> #include <cstdio> #include <fstream> #define random(x) (rand()%x) using namespace std; #define MAXSIZE 99 //全局變量 //測試集 extern vector<vector<double>>dataTest; extern vector<double>dataTestY; extern vector<vector<double>>trainDataX; extern vector<double>trainDataY; vector<string> split(const string& str, const string& delim) { vector<string> res; if("" == str) return res; //先將要切割的字符串從string類型轉換爲char*類型 char * strs = new char[str.length() + 1] ; //不要忘了 strcpy(strs, str.c_str()); char * d = new char[delim.length() + 1]; strcpy(d, delim.c_str()); char *p = strtok(strs, d); while(p) { string s = p; //分割獲得的字符串轉換爲string類型 res.push_back(s); //存入結果數組 p = strtok(NULL, d); } return res; } double getMax(vector<vector<double>>dataSet){ double MYMAX=-999; for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ if(MYMAX<dataSet[i][j]){ MYMAX=dataSet[i][j]; } } } return MYMAX; } double getMin(vector<vector<double>>dataSet){ double MYMIN=999; for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ if(MYMIN>dataSet[i][j]){ MYMIN=dataSet[i][j]; } } } return MYMIN; } //數據歸一化 //通常是x=(x-x.min)/x.max-x.min void guiYiHua(vector<vector<double>>&dataSet){ double MYMAX=getMax(dataSet); double MYMIN=getMin(dataSet); for(int i=0;i<dataSet.size();i++){ for(int j=0;j<dataSet[i].size();j++){ dataSet[i][j]=(dataSet[i][j]-MYMIN)/(MYMAX-MYMIN); } } } //建立測試集的數據 void createTrainSet(){ fstream f("train.txt"); //保存讀入的每一行 string line; vector<string>res; int ii=0; while(getline(f,line)){ res=split(line,"\t"); vector<double>vec1; for(int i=0;i<res.size();i++){ //cout<<res[i]<<endl; char ch[MAXSIZE]; strcpy(ch,res[i].c_str()); if(i!=2){ vec1.push_back(atof(ch)); }else{ trainDataY.push_back(atof(ch)); } } trainDataX.push_back(vec1); ii++; } } //建立訓練集的數據 void createTestSet(){ fstream f("test.txt"); //保存讀入的每一行 string line; vector<string>res; int ii=0; while(getline(f,line)){ res=split(line,"\t"); vector<double>vec1; for(int i=0;i<res.size();i++){ //cout<<res[i]<<endl; char ch[MAXSIZE]; strcpy(ch,res[i].c_str()); if(i!=2){ vec1.push_back(atof(ch)); }else{ dataTestY.push_back(atof(ch)); } } dataTest.push_back(vec1); ii++; } } //sigmoid激活函數 double sigmoid(double x){ return 1/(1+exp(-x)); } //sigmoid函數的導數 double dsigmoid(double x){ return x*(1-x); } class NeuralNetwork{ public: //輸入層單元個數 int inputLayers; //隱藏層單元個數 int hidenLayers; //輸出層單元個數 int outputLayers; //輸入層到隱藏層的權值 //行數爲輸入層單元個數+1(由於有偏置) //列數爲隱藏層單元個數 vector<vector<double>>VArr; //隱藏層到輸出層的權值 //行數爲隱藏層單元個數 //列數爲輸出層單元個數 vector<vector<double>>WArr; private: //矩陣乘積 void dot(const double* m1,const double* m2,double *m3,int m,int n,int p){ for(int i=0;i<m;++i) //點乘運算 { for(int j=0;j<p;++j) { (*(m3+i*p+j))=0; for(int k=0;k<n;++k) { (*(m3+i*p+j))+=(*(m1+i*n+k))*(*(m2+k*p+j)); } } } } void vectorToArr1(vector<vector<double>>vec,double *arr,int n){ for(int i=0;i<vec.size();i++){ for(int j=0;j<vec[i].size();j++){ //cout<<endl<<vec[i][j]<<"******"<<i<<"*********"<<j<<"***"; (*(arr+i*n+j))=vec[i][j]; } //cout<<endl; } } void vectorToArr2(vector<double>vec,double *arr){ for(int i=0;i<vec.size();i++){ (*(arr+i))=vec[i]; } } void arrToVector1(double *arr,vector<double>&vec,int m){ for(int i=0;i<m;i++){ vec.push_back((*(arr+i))); } } //矩陣轉置 void ZhuanZhi(const double*m1,double *m2,int n1,int n2){ for(int i=0;i<n1;i++){ for(int j=0;j<n2;j++){ (*(m2+j*n1+i))=(*(m1+i*n2+j)); } } } //驗證準確率時的預測 //輸入測試集的一行數據 //ArrL2爲輸出層的輸出 //eg.當咱們要分紅10類的時候,輸出10個數,相似於該樣本屬於這10個類別的機率 //咱們選取其中機率最大的類別做爲最終分類獲得的類別 void predict(vector<double>test,double *ArrL2){ // for(int i=0;i<test.size();i++){ // cout<<"test[i]:"<<test[i]<<endl; // } //添加轉置 test.push_back(1); double testArr[1][inputLayers+1]; //轉成矩陣 vectorToArr2(test,&testArr[0][0]); // for(int i=0;i<inputLayers+1;i++){ // cout<<"testArr:"<<testArr[0][i]<<endl; // } double dotL1[1][hidenLayers]; double VArr_temp[inputLayers+1][hidenLayers]; vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers); // for(int i=0;i<inputLayers+1;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<VArr_temp[i][j]<<" "; // } // cout<<endl; // } //testArr[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers] dot(&testArr[0][0],&VArr_temp[0][0],&dotL1[0][0],1,inputLayers+1,hidenLayers); // for(int i=0;i<1;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<dotL1[i][j]<<" "; // } // cout<<endl; // } //隱藏層輸出 double ArrL1[1][hidenLayers]; //double ArrL2[1][outputLayers]; for(int i=0;i<hidenLayers;i++){ ArrL1[0][i]=sigmoid(dotL1[0][i]); //cout<<ArrL1[0][i]<<endl; } double dotL2[1][outputLayers]; double WArr_temp[hidenLayers][outputLayers]; vectorToArr1(WArr,&WArr_temp[0][0],outputLayers); //ArrL1[1][hidenLayers] dot WArr[hidenLayers][outputLayers] dot(&ArrL1[0][0],&WArr_temp[0][0],&dotL2[0][0],1,hidenLayers,outputLayers); //輸出層輸出 for(int i=0;i<outputLayers;i++){ //ArrL2[0][i]=sigmoid(dotL2[0][1]); (*(ArrL2+i))=sigmoid(dotL2[0][i]); //cout<<*(ArrL2+i)<<endl; } } int getMaxIndex(vector<double>vec){ int index=-1; double MYMAX=-999; for(int i=0;i<vec.size();i++){ //cout<<vec.size()<<"*********"<<endl; //cout<<i<<"::::"<<vec[i]<<endl; if(MYMAX<vec[i]){ MYMAX=vec[i]; index=i; } } return index; } public: //構造函數,傳入輸入層,隱藏層,輸出層單元個數 //而且構造權值矩陣 NeuralNetwork(int _inputLayers,int _hidenLayers,int _outputLayers){ this->inputLayers=_inputLayers; hidenLayers=_hidenLayers; outputLayers=_outputLayers; //構造V權值矩陣 for(int i=0;i<inputLayers+1;i++){ vector<double>vec; for(int j=0;j<hidenLayers;j++){ vec.push_back((double)rand()/RAND_MAX*2-1); } VArr.push_back(vec); } for(int i=0;i<hidenLayers;i++){ vector<double>vec; for(int j=0;j<outputLayers;j++){ vec.push_back((double)rand()/RAND_MAX*2-1); } WArr.push_back(vec); } } //開始訓練 //傳入訓練集,預期的y值,學習效率,以及訓練迭代的次數 //這裏規定輸入的數據爲2列的數據 void train(vector<vector<double>>dataX,vector<double>dataY,double lr=0.03,int epochs=1000000){ double arrL1[1][hidenLayers]; //將VArr由vector轉成arr double VArr_temp[inputLayers+1][hidenLayers]; double hangx_temp[1][inputLayers+1]; vectorToArr1(VArr,&VArr_temp[0][0],hidenLayers); double hangxT[inputLayers+1][1]; double hangxDotVArr[1][hidenLayers]; double arrL2[1][outputLayers]; double WArr_temp[hidenLayers][outputLayers]; double arrL2_delta[1][outputLayers]; double arrL1_delta[1][hidenLayers]; double E; double dao; double dotTemp[hidenLayers][outputLayers]; double WArr_tempT[outputLayers][hidenLayers]; double arrL1T[hidenLayers][1]; double dotTempp[inputLayers+1][hidenLayers]; srand((int)time(0)); //爲數據集添加偏置 //eg.當咱們輸入的數據集爲4X2的時候,須要爲其在最後添加一列偏置,讓其變成一個4X3的矩陣 for(int i=0;i<dataX.size();i++){ //最後一列爲偏置 dataX[i].push_back(1); } //進行權值訓練更新 for(int n=0;n<epochs;n++){ //隨機選取一行樣本進行更新 int iii=random(dataX.size()); //cout<<"iii:"<<iii<<endl; //獲得隨機選取的一行數據 vector<double>hangx=dataX[iii]; // for(int i=0;i<hangx.size();i++){ // cout<<hangx[i]<<"***"<<endl; // } //隱藏層輸出 //這裏先計算輸入矩陣與權值矩陣的點乘,再將其輸入sigmoid函數中,獲得最終的輸出 //eg.輸入4X2的dataX,咱們先加上偏置變成4X3 //選取其中的一行數據1X3 //而後計算dataX與arrV(3XhidenLayers)的dot,獲得一個1XhidenLayers的矩陣 // for(int ii=0;ii<inputLayers+1;ii++){ // for(int jj=0;jj<hidenLayers;jj++){ // cout<<VArr[ii][jj]<<"---"; // cout<<VArr_temp[ii][jj]<<" "; // } // cout<<endl; // } vectorToArr2(hangx,&hangx_temp[0][0]); // for(int i=0;i<inputLayers+1;i++){ // cout<<hangx[i]<<"---"<<endl; // cout<<hangx_temp[0][i]<<"**"<<endl; // } //hangx[1][inputLayers+1] dot VArr[inputLayers+1][hidenLayers] dot(&hangx_temp[0][0],&VArr_temp[0][0],&arrL1[0][0],1,inputLayers+1,hidenLayers); //將點乘後的值輸入到sigmoid函數中 for(int k1=0;k1<hidenLayers;k1++){ arrL1[0][k1]=sigmoid(arrL1[0][k1]); //cout<<arrL1[0][k1]<<endl; } vectorToArr1(WArr,&WArr_temp[0][0],outputLayers); // for(int ii=0;ii<hidenLayers;ii++){ // for(int jj=0;jj<outputLayers;jj++){ // cout<<WArr_temp[ii][jj]<<endl; // } // } //arrL1[1][hidenLayers] dot WArr_temp[hidenLayers][outputLayers] dot(&arrL1[0][0],&WArr_temp[0][0],&arrL2[0][0],1,hidenLayers,outputLayers); //cout<<outputLayers<<endl; //cout<<arrL2[0][1]<<endl; // for(int k1=0;k1<outputLayers;k1++){ // arrL2[0][k1]=sigmoid(arrL2[0][k1]); //// // cout<<k1<<endl; //// cout<<arrL2[0][k1]<<endl; // } //求權值的delta //根據公式計算權值更新的delta for(int k1=0;k1<outputLayers;k1++){ arrL2[0][k1]=sigmoid(arrL2[0][k1]); // cout<<k1<<endl; //cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl; E=dataY[iii]-arrL2[0][k1]; //cout<<"E:"<<E<<endl; dao=dsigmoid(arrL2[0][k1]); //cout<<"dao:"<<dao<<endl; arrL2_delta[0][k1]=E*dao; //cout<<"arrL2_delta[0][k1]:"<<arrL2_delta[0][k1]<<endl; } // for(int k1=0;k1<outputLayers;k1++){ // //計算偏差 // E=dataY[iii]-arrL2[0][k1]; // //對L2輸出的結果求導 // dao=dsigmoid(arrL2[0][k1]); //// cout<<"arrL2[0][k1]:"<<arrL2[0][k1]<<endl; //// cout<<"dataY[iii]:"<<dataY[iii]<<endl; //// cout<<"E:"<<E<<endl; //// cout<<"dao:"<<dao<<endl; // //計算delta // arrL2_delta[0][k1]=E*dao; // } // for(int i=0;i<outputLayers;i++){ // cout<<arrL2_delta[0][i]<<endl; // } //W矩陣轉置 ZhuanZhi(&WArr_temp[0][0],&WArr_tempT[0][0],hidenLayers,outputLayers); // for(int i=0;i<outputLayers;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<WArr_temp[j][i]<<"**"; // cout<<WArr_tempT[i][j]<<" "; // } // cout<<endl; // } //arrL2_delta[1][outputLayers] dot WArr_tempT[outputLayers][hidenLayers] dot(&arrL2_delta[0][0],&WArr_tempT[0][0],&arrL1_delta[0][0],1,outputLayers,hidenLayers); //乘上L1輸出的導數 // for(int k1=0;k1<hidenLayers;k1++){ // cout<<dsigmoid(arrL1[0][k1])<<endl; // } //乘上L1輸出的導數 for(int k1=0;k1<hidenLayers;k1++){ double ii=arrL1_delta[0][k1]; arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]); //cout<<ii<<"**"<<dsigmoid(arrL1[0][k1])<<"**"<<arrL1_delta[0][k1]<<endl; } //經過上面的delta更新權值WV ZhuanZhi(&arrL1[0][0],&arrL1T[0][0],1,hidenLayers); // for(int i=0;i<hidenLayers;i++){ // cout<<arrL1T[i][0]<<endl; // } //arrL1T[hidenLayers][1] dot arrL2_delta[1][outputLayers] dot(&arrL1T[0][0],&arrL2_delta[0][0],&dotTemp[0][0],hidenLayers,1,outputLayers); // for(int k1=0;k1<outputLayers;k1++){ // cout<<arrL2_delta[0][k1]<<endl; // } // for(int k1=0;k1<hidenLayers;k1++){ // for(int k2=0;k2<outputLayers;k2++){ // cout<<dotTemp[k1][k2]<<" "; // } // cout<<endl; // } // for(int k1=0;k1<outputLayers;k1++){ // cout<<arrL2_delta[0][k1]<<endl; // } for(int k1=0;k1<hidenLayers;k1++){ for(int k2=0;k2<outputLayers;k2++){ //根據學習效率進行更新 //cout<<dotTemp[k1][k2]<<endl; WArr[k1][k2]+=(lr*dotTemp[k1][k2]); //cout<<"WArr[k1][k2]:"<<WArr[k1][k2]<<endl; } } //轉置 ZhuanZhi(&hangx_temp[0][0],&hangxT[0][0],1,inputLayers+1); // for(int i=0;i<inputLayers+1;i++){ // cout<<hangxT[i][0]<<"))"<<endl; // } //hangxT[inputLayers+1][1] dot arrL1_delta[1][hidenLayers] // for(int k1=0;k1<hidenLayers;k1++){ // //double ii=arrL1_delta[0][k1]; // //arrL1_delta[0][k1]=ii*dsigmoid(arrL1[0][k1]); // cout<<arrL1_delta[0][k1]<<"** "; // } //cout<<endl; dot(&hangxT[0][0],&arrL1_delta[0][0],&dotTempp[0][0],inputLayers+1,1,hidenLayers); // for(int i=0;i<inputLayers+1;i++){ // for(int j=0;j<hidenLayers;j++){ // cout<<dotTempp[i][j]<<" "; // } // cout<<endl; // } for(int k1=0;k1<inputLayers+1;k1++){ for(int k2=0;k2<hidenLayers;k2++){ VArr[k1][k2]+=(lr*dotTempp[k1][k2]); //cout<<"(lr*dotTempp[k1][k2]):"<<(lr*dotTempp[k1][k2])<<endl; //cout<<VArr[k1][k2]<<"*****"<<endl; } } //每訓練100次預測一下準確率 if(n%10000==0){ //使用測試集驗證一下準確率 //存放預測返回的結果 double resultArr[1][outputLayers]; int index; //整個樣本集中預測結果正確的樣本個數 int num=0; //準確率 double accuracy=0; //遍歷整個測試樣本 for(int k1=0;k1<dataTest.size();k1++){ vector<double>result; //取測試集中的第k1行進行測試,結果保存在resultArr中 predict(dataTest[k1],&resultArr[0][0]); //將arr轉成vector arrToVector1(&resultArr[0][0],result,outputLayers); // for(int kk=0;kk<result.size();kk++){ // //cout<<resultArr[0][kk]<<"%%%%%%%%"<<endl; // cout<<result[kk]<<"&&&&&&&&&7"<<endl; // } //取得結果中的最大值(機率最大)的index index=getMaxIndex(result); // cout<<"**k1:"<<k1<<endl; // cout<<"**index:"<<index<<endl; // cout<<"**Y:"<<dataTestY[k1]<<endl; if(index==dataTestY[k1]){ // cout<<"k1:"<<k1<<endl; // cout<<"index:"<<index<<endl; // cout<<"Y:"<<dataTestY[k1]<<endl; num++; } } accuracy=(double)num/dataTestY.size(); //if(num>5)cout<<"num:!!!!!!!!!!!!!!!!!!!!!!!111"<<num<<endl; cout<<"epoch: "<<n<<", "<<"accuracy: "<<accuracy<<endl; } } } }; #endif