jquery源碼學習（二）：Sizzle引擎-詞法解析

時間 2019-11-20

標籤 jquery 源碼學習 sizzle 引擎詞法解析欄目 JQuery 简体版

原文原文鏈接

tokenize源碼備註：

Sizzle.tokenize//2131行

rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ) //601行

whitespace = "[\\x20\\t\\r\\n\\f]"  //574行

\t:水平製表符 \f:換頁符 \r:回車符 \n:換行符 \x20:空格符git

rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + "*" ),

rcombinators = /^[\x20\t\r\n\f]([>+~]|[\x20\t\r\n\f])[\x20\t\r\n\f]/ 零個或多個WS+捕獲(>;+;~|WS)+零個或多個WSgithub

rtirm = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ),

rtirm = /^[\x20\t\r\n\f]+|((?:^|[^\])(?:\.)*)[\x20\t\r\n\f]+$/g正則表達式

注： (?:exp)匹配exp,不捕獲匹配的文本，也不給此分組分配組號數組

\x?? 表達的意思

這是利用2位16進製表示ascii碼錶中的字符。緩存

而\uxxxx是利用4位十六進制表示Unicode字符。閉包

選擇器解析基本思想

解析結果：async

咱們能夠看到：選擇器被分解成了數組裏的多個token對象 token對象的格式以下：函數

Token：{  
   value:'匹配到的字符串', 
   type:'對應的Token類型', 
   matches:'正則匹配到的一個結構'(捕獲組)
}

若是選擇器的格式是以逗號分割的多個選擇器，則返回一個二維數組，這個數組的每一項都是一組token對象。oop

tokenize函數思想:學習

function tokenize( selector, parseOnly ){
    var matched, match, tokens, type,
        soFar, groups, preFilters,  //soFar:全局變量，用來放置解析的選擇器字符串，會隨着程序的運行不斷削減。
        cached = tokenCache[ selector + " " ]; //從緩存中讀取已經解析好的選擇器。

    while(soFar){
        if(匹配到逗號)//表示是一個新的選擇器了
            {
                新建一個數組，用來放置一組新的token
                去掉soFar中匹配的部分
            }
        if((match = rcombinators.exec( soFar )))  //匹配+>~
            {
                tokens.push({
                    value: matched,
                    // Cast descendant combinators to space
                    type: match[0].replace( rtrim, " " ) //感受rtrim在這裏用處不大，由於match[0]只有四種可能+>~空格
                });
            }
        for(type in Expr.filter)//Expr.filter中有這些類型TAG,CLASS,ATTR,CHILD,PSEUD，進行循環匹配
            {
                    if ( (match = matchExpr[ type ].exec( soFar )) && (!preFilters[ type ] ||
                    (match = preFilters[ type ]( match ))) ) {
                    matched = match.shift();
                    //放入Token序列中
                    tokens.push({
                        value: matched,
                        type: type,
                        matches: match
                    });
                    //剩餘還未分析的字符串須要減去這段已經分析過的
                    soFar = soFar.slice( matched.length );
                }
            }
        if ( !matched ) { //到此沒有匹配完說明有問題
                break;
        }

        return parseOnly ? //若是是隻解析，拋出soFar長度，若是長度>0表示解析失敗，其餘狀況，若是soFar長度不爲零，拋出Sizzle錯誤。不然將解析的選擇器結果緩存以便下次調用。
            soFar.length :
            soFar ?
                Sizzle.error( selector ) :
                // Cache the tokens
                tokenCache( selector, groups ).slice( 0 );
    }

}

緩存tokenCache

tokenCache在tokenize函數的開頭與結尾被調用到：

cached = tokenCache[ selector + " " ]; //從緩存中讀取已經解析好的選擇器。
tokenCache( selector, groups ).slice( 0 ); //將選擇器名做爲鍵，解析的結果做爲值壓入緩存

第540行有tokenCache的建立過程,能夠發現，除了tokenCache，還有其餘的緩存也是經過一個名爲createCache的函數建立的

classCache = createCache(),
tokenCache = createCache(),
compilerCache = createCache(),

在854行咱們能夠看到createCache

function createCache() {
            var keys = [];

            function cache( key, value ) {
                // Use (key + " ") to avoid collision with native prototype properties (see Issue #157)
                if ( keys.push( key + " " ) > Expr.cacheLength ) {
                    // Only keep the most recent entries
                    delete cache[ keys.shift() ];
                }
                return (cache[ key + " " ] = value);
            }
            return cache;
        }

這是一個建立函數的函數，它建立的函數cache既能做爲函數調用，又能做爲對象來儲存緩存，由於js中函數也是一個對象，一個函數被建立之初除了得到一個prototype屬性指向一個空對象以外，它自己也能夠做爲對象而掛載屬性。當cache被賦值給takenCache時，造成了一個閉包，tokenCache能夠經過閉包訪問keys,並向其中push鍵名。key+' '用來避免屬性名與原生屬性名衝突，return (cache[ key + " " ] = value)最後會返回value。這至關於cache[ key + " " ] = value;return value;

createCache函數很是值得學習。

/**
 *
 *
 *matchExpr 過濾正則
    ATTR: /^\[[\x20\t\r\n\f]*((?:\\.|[\w-]|[^\x00-\xa0])+)[\x20\t\r\n\f]*(?:([*^$|!~]?=)[\x20\t\r\n\f]*(?:(['"])((?:\\.|[^\\])*?)\3|((?:\\.|[\w#-]|[^\x00-\xa0])+)|)|)[\x20\t\r\n\f]*\]/
    CHILD: /^:(only|first|last|nth|nth-last)-(child|of-type)(?:\([\x20\t\r\n\f]*(even|odd|(([+-]|)(\d*)n|)[\x20\t\r\n\f]*(?:([+-]|)[\x20\t\r\n\f]*(\d+)|))[\x20\t\r\n\f]*\)|)/i
    CLASS: /^\.((?:\\.|[\w-]|[^\x00-\xa0])+)/
    ID: /^#((?:\\.|[\w-]|[^\x00-\xa0])+)/
    PSEUDO: /^:((?:\\.|[\w-]|[^\x00-\xa0])+)(?:\(((['"])((?:\\.|[^\\])*?)\3|((?:\\.|[^\\()[\]]|\[[\x20\t\r\n\f]*((?:\\.|[\w-]|[^\x00-\xa0])+)[\x20\t\r\n\f]*(?:([*^$|!~]?=)[\x20\t\r\n\f]*(?:(['"])((?:\\.|[^\\])*?)\8|((?:\\.|[\w#-]|[^\x00-\xa0])+)|)|)[\x20\t\r\n\f]*\])*)|.*)\)|)/
    TAG: /^((?:\\.|[\w*-]|[^\x00-\xa0])+)/
    bool: /^(?:checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped)$/i
    needsContext: /^[\x20\t\r\n\f]*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\([\x20\t\r\n\f]*((?:-\d)?\d*)[\x20\t\r\n\f]*\)|)(?=[^-]|$)/i
 *
 */

最後，咱們能夠寫一個簡略版解析引擎:

github地址：https://github.com/kangkang1234/JqueryStudy/blob/master/tokenize/tokenize.js

var regexp = {  //存放用到的正則表達式
    comma:/^\s*,\s*/,
    separator:/^\s*[\+>~]\s*|\s+/,
    rtrim:/\s+/g,
    tag:/^\s*([a-zA-Z]+)\s*/,
    class:/^\s*(\.[a-zA-Z]+)\s*/,
    child:/^\s*:((?:first|last)-child|nth-child\(([1-9]+)\))\s*/,
    attr:/^asdadawd$/,
    pseudo:/^asdasda$/,
    id:/^\s*(#[a-zA-Z])+\s*/
};

var filter = ['tag','class','child','attr','pseudo','id']; //存放第三次匹配的類型

function createCache() {  //建立緩存
    var keys = [];
    function cache(selector,tokens) {
        if(keys.length>100){
            delete cache[keys.shift()];
        }

        return cache[selector+' '] = tokens;
    }

    return cache;
}

var cache = createCache();

function tokenize(selector,parseOnly){
    var originSelector = selector;

    if(cache[selector+' ']){
        return cache[selector+' '];
    }

    var match; //通用，得到某一次exec的數組
    var tokens = []; //儲存最後的解析數組;
    var token = [];  //儲存單次解析數組;
    var matchs,value,tag;
    var len,i;

    tokens.push(token);
    while(selector){
        match = false;
        //第一部分：匹配逗號
        if((match = regexp.comma.exec(selector))&&match.index===0){
            token = [];
            tokens.push(token);
            selector = selector.slice(match[0].length);
        }
        //第二部分：匹配+~空格>
        else if((match = regexp.separator.exec(selector))&&match.index===0){
            value = match.shift();
            tag = value.replace(regexp.rtrim,' ');
            token.push({value:value,tag:tag});
            selector = selector.slice(value.length);
        }
        //第三部分：匹配剩餘的標籤
        else {
            len = filter.length;
            for(i=0;i<len;i++){
                if((match = regexp[filter[i]].exec(selector))&&match.index===0){
                    value = match.shift();
                    tag = match[0];
                    matchs = match;
                    token.push({value:value,tag:tag,matchs:match});
                    selector = selector.slice(value.length);
                    break;
                }
            }
        }

        if(!match){
            break;
        }
    }

    return parseOnly?
        selector.length:
        selector.length?
            false:
            cache(originSelector,tokens);
}