SVO稀疏圖像對齊代碼分析

　　SVO使用稀疏直接法計算兩幀之間的初始相機位姿，即便用兩幀之間稀疏的4*4 patch的光度偏差爲損失函數，使用G-N優化算法得到兩幀之間的位姿變換，因爲沒有特徵匹配過程效率較高。相比本身實現的稀疏直接法的代碼，老是難以達到理想的效率，所以分析SVO代碼以下：算法

　　首先是入口函數：緩存

//輸入的是參考幀和當前幀，即連續的兩幀
size_t SparseAlign::run(FramePtr ref_frame, FramePtr cur_frame) { reset(); if(ref_frame->fts_.empty()) { SVO_WARN_STREAM("SparseAlign: no features to track!"); return 0; } ref_frame_ = ref_frame; cur_frame_ = cur_frame; 　//ref_patch_cache_：參考幀patch的緩存，即每行一個特徵patch的16個像素灰度值；
  ref_patch_cache_ = cv::Mat(ref_frame_->fts_.size(), patch_area_, CV_32F);           // create n x 16 matrix 　//雅克比矩陣，每個特徵patch的每一個像素對應一個6*1的雅克比；
  jacobian_cache_.resize(Eigen::NoChange, ref_patch_cache_.rows*patch_area_); visible_fts_.resize(ref_patch_cache_.rows, false); // TODO: should it be reset at each level? // n x 1 　//幀間位姿初始化；
  SE3 T_cur_from_ref(cur_frame_->T_f_w_ * ref_frame_->T_f_w_.inverse()); 　//金字塔迭代，從最高層（即分辨率最低）開始迭代，到最低層（原始圖像）；
  for(level_=max_level_; level_>=min_level_; --level_) { 　　//每次迭代，雅克比都置0；
 jacobian_cache_.setZero(); 　　//這個參數用於控制參考幀patch的緩存ref_patch_cache_是否被初始化；
    have_ref_patch_cache_ = false; if(verbose_)printf("\nPYRAMID LEVEL %i\n---------------\n", level_); 　　//迭代優化函數；
 optimize(T_cur_from_ref); } //優化完成以後左乘得到世界座標系原點到當前幀的變換；
  cur_frame_->T_f_w_ = T_cur_from_ref * ref_frame_->T_f_w_; return n_meas_/patch_area_; //返回的是平均特徵數量； }

　　迭代優化函數：ide

void  SparseAlign::optimize(SE3& model) { // Save the old model to rollback in case of unsuccessful update
 SE3 old_model(model); // perform iterative estimation
  for (iter_ = 0; iter_<n_iter_; ++iter_) { rho_ = 0; //G-N優化過程當中的H矩陣，和g矩陣（J*b）
 H_.setZero(); Jres_.setZero(); // compute initial error
    n_meas_ = 0; //根據兩幀之間的初始位姿變換，計算兩幀之間patch的殘差並返回；
    double new_chi2 = computeResiduals(model, true, false); // solve the linear system
    if(!solve()) { //求解出現問題就退出迭代，具體就是出現nan；
      stop_ = true; } // 判斷cost是否增加了，正常來講光度偏差是降低的，如上升則說明上一次迭代是最優的，並把上輪迭代結果輸出
    if((iter_ > 0 && new_chi2 > chi2_) || stop_) { model = old_model; // rollback
      break; } // 若是光度偏差降低了，則更新迭代結果
 SE3 new_model; update(model, new_model); //變量交換，準備下次迭代；
    old_model = model; model = new_model; chi2_ = new_chi2; //沒實際意義
 finishIteration(); // stop when converged, i.e. update step too small
    if(norm_max(x_)<=eps_) break; } }

　　殘差計算函數：函數

double SparseAlign::computeResiduals( const SE3& T_cur_from_ref, bool linearize_system, bool compute_weight_scale) { //當前迭代金字塔層的圖像
  const cv::Mat& cur_img = cur_frame_->img_pyr_.at(level_); //可忽略
  if(linearize_system && display_) resimg_ = cv::Mat(cur_img.size(), CV_32F, cv::Scalar(0)); //預計算參考幀特徵patch的緩存，即將ref_patch_cache_開闢的存儲空間填上相應的值，見下面的分析 //能夠暫時認爲ref_patch_cache_中已經有值了； 
  if(have_ref_patch_cache_ == false) precomputeReferencePatches(); // compute the weights on the first iteration 可忽略
  std::vector<float> errors; if(compute_weight_scale) errors.reserve(visible_fts_.size()); //下面這段是臨時變量
  const int stride = cur_img.cols;              //相似與OpenCV Mat中的步；
  const int border = patch_halfsize_+1;         //patch的邊界；
  const float scale = 1.0f/(1<<level_);         //對應金字塔層，1<<level_表示2的level_次方；

  const Vector3d ref_pos(ref_frame_->pos());    //參考幀在世界座標系中的位置，即T(R|t)中的t；
  float chi2 = 0.0;                             //光度偏差cost
 size_t feature_counter = 0; // is used to compute the index of the cached jacobian 特徵索引；
  std::vector<bool>::iterator visiblity_it = visible_fts_.begin(); for(auto it=ref_frame_->fts_.begin(); it!=ref_frame_->fts_.end(); ++it, ++feature_counter, ++visiblity_it) { // check if feature is within image； visiblity_it參數在預計算過程當中初始化；
    if(!*visiblity_it) continue; // compute pixel location in cur img 計算該特徵經過位姿變換和相機投影過程後，在當前幀中的像素座標；
    const double depth = ((*it)->point->pos_ - ref_pos).norm();     //經過特徵的世界座標和參考幀的世界座標計算深度值；
    const Vector3d xyz_ref((*it)->f*depth);                         //f爲特徵球面歸一化以後的相機座標系下的座標；
    const Vector3d xyz_cur(T_cur_from_ref * xyz_ref);               //參考幀特徵經過初始化的或上次迭代估計的值計算在當前幀下的特徵座標；
    const Vector2f uv_cur_pyr(cur_frame_->cam_->world2cam(xyz_cur).cast<float>() * scale);  //經過相機投影變換得到圖像像素座標，注意金字塔scale；
    const float u_cur = uv_cur_pyr[0];                              //像素座標浮點型和整型，用於雙線性插值；
    const float v_cur = uv_cur_pyr[1]; const int u_cur_i = floorf(u_cur); const int v_cur_i = floorf(v_cur); // check if projection is within the image
    if(u_cur_i < 0 || v_cur_i < 0 || u_cur_i-border < 0 || v_cur_i-border < 0 || u_cur_i+border >= cur_img.cols || v_cur_i+border >= cur_img.rows) continue; if(u_cur_i > 10000 || v_cur_i > 10000)      continue;   // fix segment fault bug // compute bilateral interpolation weights for the current image //經過雙線性插值計算像素光度值；
    const float subpix_u_cur = u_cur-u_cur_i;                           //子像素值；
    const float subpix_v_cur = v_cur-v_cur_i; //雙線性插值參數，tl：topleft，tr：topright，bl：bottomleft，br：bottomright
    const float w_cur_tl = (1.0-subpix_u_cur) * (1.0-subpix_v_cur); const float w_cur_tr = subpix_u_cur * (1.0-subpix_v_cur); const float w_cur_bl = (1.0-subpix_u_cur) * subpix_v_cur; const float w_cur_br = subpix_u_cur * subpix_v_cur; //指向參考幀特徵patch的指針：頭指針+特徵數*每一個特徵patch的像素個數；
    float* ref_patch_cache_ptr = reinterpret_cast<float*>(ref_patch_cache_.data) + patch_area_*feature_counter; size_t pixel_counter = 0; // is used to compute the index of the cached jacobian
    for(int y=0; y<patch_size_; ++y) { //指向當前幀像素值的指針，4*4patch的左上角開始；
      uint8_t* cur_img_ptr = (uint8_t*) cur_img.data + (v_cur_i+y-patch_halfsize_)*stride + (u_cur_i-patch_halfsize_); //注意各個指針遞增；
      for(int x=0; x<patch_size_; ++x, ++pixel_counter, ++cur_img_ptr, ++ref_patch_cache_ptr) { // compute residual 根據雙線性插值計算當前pixel的像素值；
        const float intensity_cur = w_cur_tl*cur_img_ptr[0] + w_cur_tr*cur_img_ptr[1] + w_cur_bl*cur_img_ptr[stride] + w_cur_br*cur_img_ptr[stride+1]; //計算殘差：當前幀-參考幀
        const float res = intensity_cur - (*ref_patch_cache_ptr); // used to compute scale for robust cost 可忽略；
        if(compute_weight_scale) errors.push_back(fabsf(res)); // robustification 可忽略
        float weight = 1.0; if(use_weights_) { //weight = weight_function_->value(res/scale_);
 } //差值平方累加和
        chi2 += res*res*weight; n_meas_++; //求解雅克比過程
        if(linearize_system) { // compute Jacobian, weighted Hessian and weighted "steepest descend images" (times error) //取出當前特徵對應的雅克比矩陣，由於使用的是逆向組合算法，因此jacobian_cache_預先計算好了，存儲了全部特徵的雅克比
          const Vector6d J(jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter)); H_.noalias() += J*J.transpose()*weight; Jres_.noalias() -= J*res*weight; if(display_) resimg_.at<float>((int) v_cur+y-patch_halfsize_, (int) u_cur+x-patch_halfsize_) = res/255.0; } } } } 

  //返回的是cost的平均值；
  return chi2/n_meas_; }

　　預計算參考幀的相關信息和上面的殘差計算過程差很少：優化

void SparseAlign::precomputeReferencePatches() { //臨時變量
  const int border = patch_halfsize_+1;                       //邊界；
  const cv::Mat& ref_img = ref_frame_->img_pyr_.at(level_);   //參考幀圖像；
  const int stride = ref_img.cols;                            //步長；
  const float scale = 1.0f/(1<<level_);                       //金字塔層尺度；

  const Vector3d ref_pos = ref_frame_->pos();                 //參考幀位置；
  const double focal_length = ref_frame_->cam_->errorMultiplier2();//焦距
 size_t feature_counter = 0; std::vector<bool>::iterator visiblity_it = visible_fts_.begin(); for(auto it=ref_frame_->fts_.begin(), ite=ref_frame_->fts_.end(); it!=ite; ++it, ++feature_counter, ++visiblity_it) { //在當前金字塔層下的像素座標值； // check if reference with patch size is within image
    const float u_ref = (*it)->px[0]*scale; const float v_ref = (*it)->px[1]*scale; const int u_ref_i = floorf(u_ref); const int v_ref_i = floorf(v_ref); if((*it)->point == NULL || u_ref_i-border < 0 || v_ref_i-border < 0 || u_ref_i+border >= ref_img.cols || v_ref_i+border >= ref_img.rows) continue; //該特徵是否可視在這裏改變狀態，初始化時爲false；
    *visiblity_it = true; // cannot just take the 3d points coordinate because of the reprojection errors in the reference image!!! //經過特徵點世界座標和參考幀位置計算深度值
    const double depth(((*it)->point->pos_ - ref_pos).norm()); const Vector3d xyz_ref((*it)->f*depth); // evaluate projection jacobian //獲取2*6的投影雅克比矩陣，該雅克比沒有乘以焦距，以下所示
    Matrix<double,2,6> frame_jac; Frame::jacobian_xyz2uv(xyz_ref, frame_jac); // inline static void jacobian_xyz2uv( // const Eigen::Vector3d& xyz_in_f, // Eigen::Matrix<double, 2, 6>& J) // { // const double x = xyz_in_f[0]; // const double y = xyz_in_f[1]; // const double z_inv = 1. / xyz_in_f[2]; // const double z_inv_2 = z_inv*z_inv; // J(0, 0) = -z_inv; // -1/z // J(0, 1) = 0.0; // 0 // J(0, 2) = x*z_inv_2; // x/z^2 // J(0, 3) = y*J(0, 2); // x*y/z^2 // J(0, 4) = -(1.0 + x*J(0, 2)); // -(1.0 + x^2/z^2) // J(0, 5) = y*z_inv; // y/z // J(1, 0) = 0.0; // 0 // J(1, 1) = -z_inv; // -1/z // J(1, 2) = y*z_inv_2; // y/z^2 // J(1, 3) = 1.0 + y*J(1, 2); // 1.0 + y^2/z^2 // J(1, 4) = -J(0, 3); // -x*y/z^2 // J(1, 5) = -x*z_inv; // x/z // } //雙線性插值參數，和computeresidual函數中同樣 // compute bilateral interpolation weights for reference image
    const float subpix_u_ref = u_ref-u_ref_i; const float subpix_v_ref = v_ref-v_ref_i; const float w_ref_tl = (1.0-subpix_u_ref) * (1.0-subpix_v_ref); const float w_ref_tr = subpix_u_ref * (1.0-subpix_v_ref); const float w_ref_bl = (1.0-subpix_u_ref) * subpix_v_ref; const float w_ref_br = subpix_u_ref * subpix_v_ref; //cache_ptr：指向ref_patch_cache_的指針，前面僅開闢了內存空間，這裏經過指針填值；
    size_t pixel_counter = 0; float* cache_ptr = reinterpret_cast<float*>(ref_patch_cache_.data) + patch_area_*feature_counter; for(int y=0; y<patch_size_; ++y) { //指向參考幀像素的指針；
      uint8_t* ref_img_ptr = (uint8_t*) ref_img.data + (v_ref_i+y-patch_halfsize_)*stride + (u_ref_i-patch_halfsize_); for(int x=0; x<patch_size_; ++x, ++ref_img_ptr, ++cache_ptr, ++pixel_counter) { // precompute interpolated reference patch color //經過雙線性插值，給ref_patch_cache_填值；
        *cache_ptr = w_ref_tl*ref_img_ptr[0] + w_ref_tr*ref_img_ptr[1] + w_ref_bl*ref_img_ptr[stride] + w_ref_br*ref_img_ptr[stride+1]; // we use the inverse compositional: thereby we can take the gradient always at the same position // get gradient of warped image (~gradient at warped position) //計算像素梯度值，0.5*(u[1]-u[-1]), 0.5*(v[1]-v[-1])，其中的每一個像素值都使用雙線性插值得到
        float dx = 0.5f * ((w_ref_tl*ref_img_ptr[1] + w_ref_tr*ref_img_ptr[2] + w_ref_bl*ref_img_ptr[stride+1] + w_ref_br*ref_img_ptr[stride+2]) -(w_ref_tl*ref_img_ptr[-1] + w_ref_tr*ref_img_ptr[0] + w_ref_bl*ref_img_ptr[stride-1] + w_ref_br*ref_img_ptr[stride])); float dy = 0.5f * ((w_ref_tl*ref_img_ptr[stride] + w_ref_tr*ref_img_ptr[1+stride] + w_ref_bl*ref_img_ptr[stride*2] + w_ref_br*ref_img_ptr[stride*2+1]) -(w_ref_tl*ref_img_ptr[-stride] + w_ref_tr*ref_img_ptr[1-stride] + w_ref_bl*ref_img_ptr[0] + w_ref_br*ref_img_ptr[1])); // cache the jacobian //計算像素雅克比，即像素梯度*投影雅克比；
        jacobian_cache_.col(feature_counter*patch_area_ + pixel_counter) = (dx*frame_jac.row(0) + dy*frame_jac.row(1))*(focal_length / (1<<level_)); } } } //該參數置真，說明ref_patch_cache_已經填值；
  have_ref_patch_cache_ = true; }

　　solve()函數和update()函數爲基本的矩陣運算函數，reset()函數爲參數重置函數，沒什麼可說的。startIteration()和finishIteration()函數能夠忽略。ui

　　總結來看，普遍使用指針，應該比直接取值速度更快，逆向組合算法，預先計算雅克比矩陣能夠節省計算量，幾處雙線性插值方法對提升精度有幫助。相比SVO使用的G-N優化方法，LSD-SLAM中使用的是L-M算法，且明顯增長了patch的模塊計算部分。lua