Matlab圖像識別/檢索系列(8)—開源工具介紹之vlfeat

時間 2020-11-11

標籤 html node less dom ide 函數工具性能開發工具測試欄目 MATLAB 简体版

原文原文鏈接

做爲方便、快捷、功能強大的開發工具，Matlab有大量的第三方資源。在圖像處理、計算機視覺方面，幾年前的Matlab功能不如如今強大，因此在matahworks網站的File exchange（File exchange ）能夠看到大量的我的上傳的代碼，但絕大多數綜合性和運算性能不強。vlfeat的出現改變了這一現狀，能夠移步官網下載vlfeat ，最好下載編譯過的bin文件，不然只下載源碼的話須要本身編譯。不過官方的編譯不太完整，某些函數若是要在64的Windows10或Windows7下使用，須要本身編譯，也可單獨對某個函數使用mex命令編譯，我使用Visual Studio 2010和Visual Studio 2015編譯過。
vlfeat的功能不少，包含了多種特徵提取（SIFT、DSIFT、QuickSIFT、PHOW、HOG、MSER、SLIC、Fisher、LBP）、局部特徵匹配（UBC match）、分類（SVM、GMM）、聚類（IKM、HKM、AIB，Agglomerative Information Bottleneck）、檢索（Random kd-tree）、分割、計算評價指標（true positives and false positives）、做圖（Precision-recall curve、ROC curve）、生成分佈函數（Second derivative of the Gaussian density function、Derivative of the Gaussian density function、Derivative of the sigmoid function、Standard Gaussian density function、Sigmoid function）、特徵編碼（VLAD）等功能，可參看幫助文章或演示程序。
注意，在運行演示程序前，要先運行toolbox目錄下的vl_setup.m和vl_root.m，以添加必要的路徑。
這裏介紹一下Caltech101的分類演示程序phow_caltech101.m。html

function phow_caltech101()
    % PHOW_CALTECH101 Image classification in the Caltech-101 dataset
    %   This program demonstrates how to use VLFeat to construct an image
    %   classifier on the Caltech-101 data. The classifier uses PHOW
    %   features (dense SIFT), spatial histograms of visual words, and a
    %   Chi2 SVM. To speedup computation it uses VLFeat fast dense SIFT,
    %   kd-trees, and homogeneous kernel map. The program also
    %   demonstrates VLFeat PEGASOS SVM solver, although for this small
    %   dataset other solvers such as LIBLINEAR can be more efficient.
    %   
    % Author: Andrea Vedaldi
    % Copyright (C) 2011-2013 Andrea Vedaldi
    % All rights reserved.  

    conf.calDir = 'data/caltech-101' ;
    conf.dataDir = 'data/' ;
    conf.autoDownloadData = true ;
    conf.numTrain = 15 ;
    conf.numTest = 15 ;
    conf.numClasses = 102 ;
    conf.numWords = 600 ;
    conf.numSpatialX = [2 4] ;
    conf.numSpatialY = [2 4] ;
    conf.quantizer = 'kdtree' ;
    conf.svm.C = 10 ;
    conf.svm.solver = 'liblinear' ;
    conf.svm.biasMultiplier = 1 ;
    conf.phowOpts = {'Step', 3} ;
    conf.clobber = false ;
    conf.tinyProblem = true ;
    conf.prefix = 'baseline' ;
    conf.randSeed = 1 ;
    %爲加快速度，只處理5類數據，詞典單詞數爲300
    if conf.tinyProblem
        conf.prefix = 'tiny' ;
        conf.numClasses = 5 ;
        conf.numSpatialX = 2 ;
        conf.numSpatialY = 2 ;
        conf.numWords = 300 ;
        conf.phowOpts = {'Verbose', 2, 'Sizes', 7, 'Step', 5} ;
    end
    %設置詞典、直方圖、模型、運行結果、特徵文件存儲路徑
    conf.vocabPath = fullfile(conf.dataDir, [conf.prefix '-vocab.mat']) ;
    conf.histPath = fullfile(conf.dataDir, [conf.prefix '-hists.mat']) ;
    conf.modelPath = fullfile(conf.dataDir, [conf.prefix '-model.mat']) ;
    conf.resultPath = fullfile(conf.dataDir, [conf.prefix '-result']) ;
    conf.featPath = fullfile(conf.dataDir, [conf.prefix '-feat.mat']) ;
    %設置隨機數生成器
    randn('state',conf.randSeed) ;
    vl_twister('state',conf.randSeed) ;

    %第一次運行需下載數據，如速度慢，可以使用迅雷在該網址手動下載
    if ~exist(conf.calDir, 'dir') || ...
         (~exist(fullfile(conf.calDir, 'airplanes'),'dir') && ...
            ~exist(fullfile(conf.calDir, '101_ObjectCategories', 'airplanes')))
        if ~conf.autoDownloadData
            error(...
                ['Caltech-101 data not found. ' ...
                 'Set conf.autoDownloadData=true to download the required data.']) ;
        end
        vl_xmkdir(conf.calDir) ;
        calUrl = ['http://www.vision.caltech.edu/Image_Datasets/' ...
            'Caltech101/101_ObjectCategories.tar.gz'] ;
        fprintf('Downloading Caltech-101 data to ''%s''. This will take a while.', conf.calDir) ;
        untar(calUrl, conf.calDir) ;
    end
    %設置圖像集路徑
    if ~exist(fullfile(conf.calDir, 'airplanes'),'dir')
        conf.calDir = fullfile(conf.calDir, '101_ObjectCategories') ;
    end
    %獲取類別信息，文件夾名字爲類別名字
    classes = dir(conf.calDir) ;
    classes = classes([classes.isdir]) ;
    classes = {classes(3:conf.numClasses+2).name} ;

    images = {} ;
    imageClass = {} ;
    %遍歷各圖像集各種文件夾，獲取圖像名和類別名
    for ci = 1:length(classes)
        ims = dir(fullfile(conf.calDir, classes{ci}, '*.jpg'))' ;

ims = vl_colsubset(ims, conf.numTrain + conf.numTest) ;
        ims = cellfun(@(x)fullfile(classes{ci},x),{ims.name},'UniformOutput',false) ;
        images = {images{:}, ims{:}} ;
        imageClass{end+1} = ci * ones(1,length(ims)) ;
    end
    %創建訓練集
    selTrain = find(mod(0:length(images)-1, conf.numTrain+conf.numTest) < conf.numTrain) ;
    %創建測試集
    selTest = setdiff(1:length(images), selTrain) ;
    imageClass = cat(2, imageClass{:}) ;

    model.classes = classes ;
    model.phowOpts = conf.phowOpts ;
    model.numSpatialX = conf.numSpatialX ;
    model.numSpatialY = conf.numSpatialY ;
    model.quantizer = conf.quantizer ;
    model.vocab = [] ;
    model.w = [] ;
    model.b = [] ;
    model.classify = @classify ;
    %提取圖像PHOW特徵並訓練詞典
    if ~exist(conf.vocabPath) || conf.clobber
        %取30幅圖像進行訓練
        selTrainFeats = vl_colsubset(selTrain, 30) ;
        descrs = {} ;
        for ii = 1:length(selTrainFeats)
            im = imread(fullfile(conf.calDir, images{selTrainFeats(ii)})) ;
            %對圖像進行標準化
            im = standarizeImage(im) ;
            %提取特徵
            [drop, descrs{ii}] = vl_phow(im, model.phowOpts{:}) ;
        end

        descrs = vl_colsubset(cat(2, descrs{:}), 10e4) ;
        descrs = single(descrs) ;
        save(conf.featPath, 'descrs') ;
        % 對特徵進行聚類生成詞典
        vocab = vl_kmeans(descrs, conf.numWords, 'verbose', 'algorithm', 'elkan', MaxNumIterations', 50) ;

save(conf.vocabPath, 'vocab') ;
    else
        load(conf.vocabPath) ;
    end

    model.vocab = vocab ;

    if strcmp(model.quantizer, 'kdtree')
        %爲詞典創建kdtree索引
        model.kdtree = vl_kdtreebuild(vocab) ;
    end

    %計算圖像直方圖
    if ~exist(conf.histPath) || conf.clobber
        hists = {} ;
        parfor ii = 1:length(images)        
            fprintf('Processing %s (%.2f %%)\n', images{ii}, 100 * ii / length(images)) ;
            im = imread(fullfile(conf.calDir, images{ii})) ;
            hists{ii} = getImageDescriptor(model, im);
        end
        hists = cat(2, hists{:}) ;
        save(conf.histPath, 'hists') ;
    else
        load(conf.histPath) ;
    end

    % 對直方圖進行homker映射
    psix = vl_homkermap(hists, 1, 'kchi2', 'gamma', .5) ;

    % 訓練詞典
    if ~exist(conf.modelPath) || conf.clobber
        switch conf.svm.solver
            case {'sgd', 'sdca'}
                lambda = 1 / (conf.svm.C *  length(selTrain)) ;
                w = [] ;
                parfor ci = 1:length(classes)
                    perm = randperm(length(selTrain)) ;
                    fprintf('Training model for class %s\n', classes{ci}) ;
                    y = 2 * (imageClass(selTrain) == ci) - 1 ;
                    [w(:,ci) b(ci) info] = vl_svmtrain(psix(:, selTrain(perm)), y(perm), lambda, ...
                        'Solver', conf.svm.solver,'MaxNumIterations', 50/lambda, ...
                        'BiasMultiplier', conf.svm.biasMultiplier, 'Epsilon', 1e-3);
                end

            case 'liblinear'
                svm = train(imageClass(selTrain)',sparse(double(psix(:,selTrain))),  ...
                                        sprintf(' -s 3 -B %f -c %f', conf.svm.biasMultiplier, conf.svm.C),'col') ;
                w = svm.w(:,1:end-1)' ;
                b =  svm.w(:,end)' ;

end
        model.b = conf.svm.biasMultiplier * b ;
        model.w = w ;
        save(conf.modelPath, 'model') ;
    else
        load(conf.modelPath) ;
    end

    % 計算測試圖像得分
    scores = model.w' * psix + model.b' * ones(1,size(psix,2)) ;
    [drop, imageEstClass] = max(scores, [], 1) ;

    % 計算混淆矩陣
    idx = sub2ind([length(classes), length(classes)], ...
                                imageClass(selTest), imageEstClass(selTest)) ;
    confus = zeros(length(classes)) ;
    confus = vl_binsum(confus, ones(size(idx)), idx) ;

    % Plots
    figure(1) ; clf;
    subplot(1,2,1) ;
    imagesc(scores(:,[selTrain selTest])) ; title('Scores') ;
    set(gca, 'ytick', 1:length(classes), 'yticklabel', classes) ;
    subplot(1,2,2) ;
    imagesc(confus) ;
    title(sprintf('Confusion matrix (%.2f %% accuracy)', ...
                                100 * mean(diag(confus)/conf.numTest) )) ;
    print('-depsc2', [conf.resultPath '.ps']) ;
    save([conf.resultPath '.mat'], 'confus', 'conf') ;

    % -------------------------------------------------------------------------
    function im = standarizeImage(im)
    % -------------------------------------------------------------------------
    im = im2single(im) ;
    if size(im,1) > 480, im = imresize(im, [480 NaN]) ; end
    % -------------------------------------------------------------------------
    function hist = getImageDescriptor(model, im)
    % -------------------------------------------------------------------------
    im = standarizeImage(im) ;
    width = size(im,2) ;
    height = size(im,1) ;
    numWords = size(model.vocab, 2) ;   
    [frames, descrs] = vl_phow(im, model.phowOpts{:}) ; 
    switch model.quantizer
        case 'vq'
            [drop, binsa] = min(vl_alldist(model.vocab, single(descrs)), [], 1) ;
        case 'kdtree'
            binsa = double(vl_kdtreequery(model.kdtree, model.vocab,single(descrs), 'MaxComparisons', 50)) ;
    end

    for i = 1:length(model.numSpatialX)
        binsx = vl_binsearch(linspace(1,width,model.numSpatialX(i)+1), frames(1,:)) ;
        binsy = vl_binsearch(linspace(1,height,model.numSpatialY(i)+1), frames(2,:)) ;  
        bins = sub2ind([model.numSpatialY(i), model.numSpatialX(i), numWords],  binsy,binsx,binsa) ;
        hist = zeros(model.numSpatialY(i) * model.numSpatialX(i) * numWords, 1) ;
        hist = vl_binsum(hist, ones(size(bins)), bins) ;
        hists{i} = single(hist / sum(hist)) ;
    end
    hist = cat(1,hists{:}) ;
    hist = hist / sum(hist) ;
    % -------------------------------------------------------------------------
    function [className, score] = classify(model, im)
    % -------------------------------------------------------------------------
    hist = getImageDescriptor(model, im) ;
    psix = vl_homkermap(hist, 1, 'kchi2', 'gamma', .5) ;
    scores = model.w' * psix + model.b' ;
    [score, best] = max(scores) ;
    className = model.classes{best} ;

Matlab版本的函數vlfeat可在該頁查看，API函數。簡單列舉以下：node

vl_compile Compile VLFeat MEX files
vl_demo Run VLFeat demos
vl_harris Harris corner strength
vl_help VLFeat toolbox builtin help
vl_noprefix Create a prefix-less version of VLFeat commands
vl_root Obtain VLFeat root path
vl_setup Add VLFeat Toolbox to the pathless

AIB
vl_aib Agglomerative Information Bottleneck
vl_aibcut Cut VL_AIB tree
vl_aibcuthist Compute a histogram by using an AIB compressed alphabet
vl_aibcutpush Quantize based on VL_AIB cut
vl_aibhist Compute histogram over VL_AIB tree

FISHER
vl_fisher Fisher vector feature encoding

GEOMETRY
vl_hat Hat operator
vl_ihat Inverse vl_hat operator
vl_irodr Inverse Rodrigues' formula
vl_rodr Rodrigues' formula

GMM
vl_gmm Learn a Gaussian Mixture Model using EM

IMOP
vl_dwaffine Derivative of an affine warp
vl_imarray Flattens image array
vl_imarraysc Scale and flattens image array
vl_imdisttf Image distance transform
vl_imdown Downsample an image by two
vl_imgrad Image gradient
vl_imintegral Compute integral image
vl_impattern Generate an image from a stock pattern
vl_imreadbw Reads an image as gray-scale
vl_imreadgray Reads an image as gray-scale
vl_imsc Scale image
vl_imsmooth Smooth image
vl_imup Upsample an image by two
vl_imwbackward Image backward warping
vl_imwhiten Whiten an image
vl_rgb2xyz Convert RGB color space to XYZ
vl_tps Compute the thin-plate spline basis
vl_tpsu Compute the U matrix of a thin-plate spline transformation
vl_waffine Apply affine transformation to points
vl_witps Inverse thin-plate spline warping
vl_wtps Thin-plate spline warping
vl_xyz2lab Convert XYZ color space to LAB
vl_xyz2luv Convert XYZ color space to LUV
vl_xyz2rgb Convert XYZ to RGB

KMEANS
vl_hikmeans Hierachical integer K-means
vl_hikmeanshist Compute histogram of quantized data
vl_hikmeanspush Push data down an integer K-means tree
vl_ikmeans Integer K-means
vl_ikmeanshist Compute histogram of quantized data
vl_ikmeanspush Project data on integer K-means paritions
vl_kmeans Cluster data using k-means

MISC
vl_alldist2 Pairwise distances
vl_alphanum Sort strings using the Alphanum algorithm
vl_argparse Parse list of parameter-value pairs
vl_binsearch Maps data to bins
vl_binsum Binned summation
vl_colsubset Select a given number of columns
vl_cummax Cumulative maximum
vl_getpid Get MATLAB process ID
vl_grad Compute the gradient of an image
vl_histmarg Marginal of histogram
vl_hog Compute HOG features
vl_homkermap Homogeneous kernel map
vl_ihashfind Find labels in an integer hash table
vl_ihashsum Accumulate integer labels into a hash table
vl_inthist Calculate Integral Histogram
vl_isoctave Determines whether Octave is running
vl_kdtreebuild Build randomized kd-tree
vl_kdtreequery Query KD-tree
vl_lbp Local Binary Patterns
vl_lbpfliplr Flip LBP features left-right
vl_localmax Find local maximizers
vl_matlabversion Return MATLAB version as an integer
vl_numder Numerical derivative
vl_numder2 Numerical second derivative
vl_override Override structure subset
vl_pegasos [deprecated]
vl_sampleinthist Sample integral histogram
vl_simdctrl Toggle VLFeat SIMD optimizations
vl_svmdataset Construct advanced SVM dataset structure
vl_svmpegasos [deprecated]
vl_svmtrain Train a Support Vector Machine
vl_threads Control VLFeat computational threads
vl_twister Random number generator
vl_version Obtain VLFeat version information
vl_whistc Weighted histogram
vl_xmkdir Create a directory recursively.

MSER
vl_erfill Fill extremal region
vl_ertr Transpose exremal regions frames
vl_mser Maximally Stable Extremal Regions

PLOTOP
vl_cf Creates a copy of a figure
vl_click Click a point
vl_clickpoint Select a point by clicking
vl_clicksegment Select a segment by clicking
vl_det Compute DET curve
vl_figaspect Set figure aspect ratio
vl_linespec2prop Convert PLOT style line specs to line properties
vl_plotbox Plot boxes
vl_plotframe Plot a geometric frame
vl_plotgrid Plot a 2-D grid
vl_plotpoint Plot 2 or 3 dimensional points
vl_plotstyle Get a plot style
vl_pr Precision-recall curve.
vl_printsize Set the printing size of a figure
vl_roc ROC curve.
vl_tightsubplot Tiles axes without wasting space
vl_tpfp Compute true positives and false positives

QUICKSHIFT
vl_flatmap Flatten a tree, assigning the label of the root to each node
vl_imseg Color an image based on the segmentation
vl_quickseg Produce a quickshift segmentation of a grayscale or color image
vl_quickshift Quick shift image segmentation
vl_quickvis Create an edge image from a Quickshift segmentation.

SIFT
vl_covdet Covariant feature detectors and descriptors
vl_dsift Dense SIFT
vl_frame2oell Convert a geometric frame to an oriented ellipse
vl_liop Local Intensity Order Pattern descriptor
vl_phow Extract PHOW features
vl_plotsiftdescriptor Plot SIFT descriptor
vl_plotss Plot scale space
vl_sift Scale-Invariant Feature Transform
vl_siftdescriptor Raw SIFT descriptor
vl_ubcmatch Match SIFT features
vl_ubcread Read Lowe's SIFT implementation data files

SLIC
vl_slic SLIC superpixels

SPECIAL
vl_ddgaussian Second derivative of the Gaussian density function
vl_dgaussian Derivative of the Gaussian density function
vl_dsigmoid Derivative of the sigmoid function
vl_gaussian Standard Gaussian density function
vl_rcos RCOS function
vl_sigmoid Sigmoid function

VLAD
vl_vlad VLAD feature encoding