SVM分析——svm.h

時間 2019-12-05

標籤 svm 分析 svm.h 简体版

原文原文鏈接

 # ifndef _LIBSVM_H
#define _LIBSVM_H
#define LIBSVM_VERSION 320
#ifdef __cplusplus
extern "C" {//支持c與c++混合編程
#endif
extern int libsvm_version;
struct svm_node     //存儲單個特徵向量
{
 int index;         //特徵的維數
 double value;     //特徵的數值
};
struct svm_problem     //存儲本次參加運算的參數
{
 int all ;       //數據的總數
 double *p;      //指向數據分類類別的指針
 svm_node **x;     //指向存儲內容爲指針的指針
};
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; //定義枚舉類型表示選用SVM的類別
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; //定義枚舉類型表示所選取的核函數類型
struct svm_parameter
{
 int svm_type;//定義svm分類類型
 int kernel_type;//定義核函數類型
 int degree;//核函數的指數
 double gamma;
 double coef0;
 /* training parameter*/
 double cache_size;//訓練所需的內存
 double eps;
 double C;//懲罰因子
 int nr_weight;//
 int *weight_lable;//權重的數目
 double* weight;//權重
 double nu;
 double p;
 int shrinking;//是否壓縮
 int probability;//是否作機率估計
}
//svm_model
struct svm_model
{
 struct svm_parameter param;//訓練參數
 int nr_class;//類別數
 int l;//支持向量數
 struct svm_node **SV;             /*保存支持向量的指針，至於支持向量的內容，若是是從文件中讀取，內容會
                                       額外保留；若是是直接訓練得來，則保留在原來的訓練集中。若是訓練完成後須要預報，原來的
                                       訓練集內存不能夠釋放。*/
 double *rho;//判別函數的alpha
 double *probA;
 double *probB;
 int *sv_indices;
 //classification use
 int *lable;
 int *nSV;
 int free_sv;
};
struct svm_model *svm_train(const struct svm_self *prob,const svm_parameter *param);//定義svm訓練函數
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);//交差驗證函數 
int svm_save_model(const char *model_file_name, const struct svm_model *model);//保存訓練好的數據函數
struct svm_model *svm_load_model(const char *model_file_name);//將訓練好的模型讀取到內存中
int svm_get_svm_type(const struct svm_model *model);//獲得svm類型的函數
int svm_get_nr_class(const struct svm_model *model);//獲得數據的類別數函數
void svm_get_labels(const struct svm_model *model, int *label);//獲得數據的類別標號
void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
int svm_get_nr_sv(const struct svm_model *model);
double svm_get_svr_probability(const struct svm_model *model);
double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);//用訓練好的模型預測數據類別，並保存到數組
double svm_predict(const struct svm_model *model, const struct svm_node *x);//預測某同樣本的函數
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
void svm_free_model_content(struct svm_model *model_ptr);//清除訓練模型，釋放資源
void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
void svm_destroy_param(struct svm_parameter *param);
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
int svm_check_probability_model(const struct svm_model *model);
void svm_set_print_string_function(void (*print_func)(const char *));
#ifdef __cplusplus
}
#endif
#endif /* _LIBSVM_H */

html

svm.h中主要是定義了4個結構體，分別是svm_node、svm_problem、svm_parameter、svm_model，而後就是19個函數的聲明，函數的聲明我就不先講了，等到在svm.cpp中碰到後再細說。下面來看一看這幾個結構體：node

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  struct svm_node   
 
  {   
 
      int index;   
 
      double value;   
 
  };

c++

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  struct svm_problem   
 
  {   
 
      int l; //記錄樣本總數   
 
      double *y;//指向樣本所屬類別的數組   
 
      struct svm_node **x;//指向一個存儲內容爲指針的數組   
 
  };

web

 struct svm_parameter
{
 int svm_type;//定義svm分類類型
 int kernel_type;//定義核函數類型
 int degree;//核函數的指數
 double gamma;
 double coef0;
 /* training parameter*/
 double cache_size;//訓練所需的內存
 double eps;
 double C;//懲罰因子
 int nr_weight;//
 int *weight_lable;//權重的數目
 double* weight;//權重
 double nu;
 double p;
 int shrinking;//是否壓縮
 int probability;//是否作機率估計
}

面試

 struct svm_model
{
 struct svm_parameter param;//訓練參數
 int nr_class;//類別數
 int l;//支持向量數
 struct svm_node **SV;             /*保存支持向量的指針，至於支持向量的內容，若是是從文件中讀取，內容會
                                       額外保留；若是是直接訓練得來，則保留在原來的訓練集中。若是訓練完成後須要預報，原來的
                                       訓練集內存不能夠釋放。*/
 double *rho;//判別函數的alpha
 double *probA;
 double *probB;
 int *sv_indices;
 //classification use
 int *lable;
 int *nSV;
 int free_sv;
};

編程

關於extern 「C」的解析數組

在閱讀svm.h時發現了以下一段代碼：app

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  #ifndef _LIBSVM_H   
 
  #define _LIBSVM_H   
 
  #define LIBSVM_VERSION 317   
 
     
 
  #ifdef __cplusplus   
 
  extern "C" {   
 
  #endif

顯然，前3行代碼是爲了防止頭文件被重複引用，那麼5-6行是什麼意思呢？函數

某企業曾經給出以下的一道面試題：爲何標準頭文件都有相似如下的結構？oop

 
   
   
   
   

   

   

    
    
    
    
    
    

    
 
    #ifndef __INCvxWorksh   
 
    #define __INCvxWorksh    
 
    #ifdef __cplusplus   
 
    extern "C" {   
 
    #endif    
 
    /*...*/    
 
    #ifdef __cplusplus   
 
    }   
 
    #endif    
 
    #endif /* __INCvxWorksh */

分析
顯然，頭文件中的編譯宏「#ifndef __INCvxWorksh、#define __INCvxWorksh、#endif」的做用是防止該頭文件被重複引用。

那麼

 
   
   
   
   

   

   

    
    
    
    
    
    

    
 
    #ifdef __cplusplus   
 
    extern "C" {   
 
    #endif    
 
    #ifdef __cplusplus   
 
    }   
 
    #endif

的做用又是什麼呢？

extern "C" 包含雙重含義，從字面上便可獲得：首先，被它修飾的目標是「extern」的；其次，被它修飾的目標是「C」的。讓咱們來詳細解讀這兩重含義。

被extern "C"限定的函數或變量是extern類型的；extern是C/C++語言中代表函數和全局變量做用範圍（可見性）的關鍵字，該關鍵字告訴編譯器，其聲明的函數和變量能夠在本模塊或其它模塊中使用。記住，下列語句：extern int a;僅僅是一個變量的聲明，其並非在定義變量a，並未爲a分配內存空間。變量a在全部模塊中做爲一種全局變量只能被定義一次，不然會出現鏈接錯誤。一般，在模塊的頭文件中對本模塊提供給其它模塊引用的函數和全局變量以關鍵字extern聲明。例如，若是模塊B欲引用該模塊A中定義的全局變量和函數時只需包含模塊A的頭文件便可。這樣，模塊B中調用模塊A中的函數時，在編譯階段，模塊B雖然找不到該函數，可是並不會報錯；它會在鏈接階段中從模塊A編譯生成的目標代碼中找到此函數。

與extern對應的關鍵字是static，被它修飾的全局變量和函數只能在本模塊中使用。所以，一個函數或變量只可能被本模塊使用時，其不可能被extern 「C」修飾。

被extern "C"修飾的變量和函數是按照C語言方式編譯和鏈接的；

未加extern 「C」聲明時的編譯方式

首先看看C++中對相似C的函數是怎樣編譯的。做爲一種面向對象的語言，C++支持函數重載，而過程式語言C則不支持。函數被C++編譯後在符號庫中的名字與C語言的不一樣。例如，假設某個函數的原型爲：

[cpp] view plain copy

void foo( int x, int y );

該函數被C編譯器編譯後在符號庫中的名字爲_foo，而C++編譯器則會產生像_foo_int_int之類的名字（不一樣的編譯器可能生成的名字不一樣，可是都採用了相同的機制，生成的新名字稱爲「mangled name」）。_foo_int_int這樣的名字包含了函數名、函數參數數量及類型信息，C++就是靠這種機制來實現函數重載的。例如，在C++中，函數void foo( int x, int y )與void foo( int x, float y )編譯生成的符號是不相同的，後者爲_foo_int_float。一樣地，C++中的變量除支持局部變量外，還支持類成員變量和全局變量。用戶所編寫程序的類成員變量可能與全局變量同名，咱們以"."來區分。而本質上，編譯器在進行編譯時，與函數的處理類似，也爲類中的變量取了一個獨一無二的名字，這個名字與用戶程序中同名的全局變量名字不一樣。

未加extern "C"聲明時的鏈接方式

假設在C++中，模塊A的頭文件以下：

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  // 模塊A頭文件　moduleA.h   
 
  #ifndef MODULE_A_H   
 
  #define MODULE_A_H   
 
  int foo( int x, int y );   
 
  #endif

在模塊B中引用該函數：

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  // 模塊B實現文件　moduleB.cpp   
 
  #include "moduleA.h"   
 
  foo(2,3);

加extern "C"聲明後的編譯和鏈接方式

加extern "C"聲明後，模塊A的頭文件變爲：

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  // 模塊A頭文件　moduleA.h   
 
  #ifndef MODULE_A_H   
 
  #define MODULE_A_H   
 
  extern "C" int foo( int x, int y );   
 
  #endif

在模塊B的實現文件中仍然調用foo( 2,3 )，其結果是：

（1）模塊A編譯生成foo的目標代碼時，沒有對其名字進行特殊處理，採用了C語言的方式；

（2）鏈接器在爲模塊B的目標代碼尋找foo(2,3)調用時，尋找的是未經修改的符號名_foo。

因此，能夠用一句話歸納extern 「C」這個聲明的真實目的：實現C++與C及其它語言的混合編程。
　　
extern "C"的慣用法

（1）在C++中引用C語言中的函數和變量，在包含C語言頭文件（假設爲cExample.h）時，需進行下列處理：

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  extern "C"   
 
  {   
 
  #include "cExample.h"   
 
  }

而在C語言的頭文件中，對其外部函數只能指定爲extern類型，C語言中不支持extern "C"聲明，在.c文件中包含了extern "C"時會出現編譯語法錯誤。筆者編寫的C++引用C函數例子工程中包含的三個文件的源代碼以下：

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  /* c語言頭文件：cExample.h */   
 
  #ifndef C_EXAMPLE_H   
 
  #define C_EXAMPLE_H   
 
  extern int add(int x,int y);   
 
  #endif   
 
  /* c語言實現文件：cExample.c */   
 
  #include "cExample.h"   
 
  int add( int x, int y )   
 
  {   
 
    return x + y;   
 
  }   
 
  // c++實現文件，調用add：cppFile.cpp   
 
  extern "C"    
 
  {   
 
  #include "cExample.h"   
 
  }   
 
  int main(int argc, char* argv[])   
 
  {   
 
    add(2,3);    
 
    return 0;   
 
  }

若是C++調用一個C語言編寫的.DLL時，當包括.DLL的頭文件或聲明接口函數時，應加extern "C" {　}。

（2）在C中引用C++語言中的函數和變量時，C++的頭文件需添加extern "C"，可是在C語言中不能直接引用聲明瞭extern "C"的該頭文件，應該僅將C文件中將C++中定義的extern "C"函數聲明爲extern類型。
筆者編寫的C引用C++函數例子工程中包含的三個文件的源代碼以下：

 
 
 
 
 

 

 

  
  
  
  
  
  

  
 
  //C++頭文件 cppExample.h   
 
  #ifndef CPP_EXAMPLE_H   
 
  #define CPP_EXAMPLE_H   
 
  extern "C" int add( int x, int y );   
 
  #endif   
 
  //C++實現文件 cppExample.cpp   
 
  #include "cppExample.h"   
 
  int add( int x, int y )   
 
  {   
 
     return x + y;   
 
  }   
 
  /* C實現文件 cFile.c  
 
  /* 這樣會編譯出錯：#include "cExample.h" */   
 
  extern int add( int x, int y );   
 
  int main( int argc, char* argv[] )   
 
  {   
 
    add( 2, 3 );    
 
    return 0;   
 
  }