Sizzle 源碼分析(三)：tokenize函數與Sizzle函數

時間 2019-12-17

標籤 sizzle 源碼分析 tokenize 函數简体版

原文原文鏈接

前言

已經寫了兩篇文章了，個人思路就是按照整個函數的運行順序進行分析，那麼此次就輪到tokenize和Sizzle的主方法了。我仍是會按照原來的方式，先把這兩個函數用到的一些內部的屬性或者方法先放在最前面，而後在進行分析這兩個函數，一些已經出現過的函數或者變量包括以前的正則，我就不重複列舉了，不清楚的同窗能夠去我以前的文章去查。Sizzle源碼雖然看了很長時間了，可是進度真的好慢，好多東西須要打斷點一點點的看，有一些兼容也不是很清楚，要換瀏覽器去測試。OK，牢騷發到這裏，開始擼源碼。javascript

用到的全局變量與方法

這裏的全局指的是IIFE中的做用域 css

變量

tokenCache = createCache tokenize預編譯以後的緩存，下一次若是遇到相同的選擇器，直接返回結果
nonnativeSelectorCache = createCache 這個原生的querySelectorAll沒法返回結果的選擇器字符串的緩存，下次若是出現直接走Sizzle的方法，再也不走原生方法
Expr.prefilter 這東西是因爲像attribute pesuod child的正則，捕獲了太多的捕獲組，要對這個捕獲組進行二次加工。
Expr.cacheLength 常量，數字，控制緩存的數量，值是50。

方法

createCache 這個方法就是建立緩存

function createCache() {
    var keys = [];
    function cache(key, value) {
        //Array.prototype.push返回的是push以後當前數組的長度
        if (keys.push(key) > Expr.cacheLength) {
            //Array.prototype.shift返回的是被shift出去的值
            delete cache[keys.shift()];
        }
        return ( cache[key + ' '] = value );
    }
    return cache;
}
複製代碼

push

push = arr.push;
slice = arr.slice;
try {
  push.apply(
    (arr = slice.call(preferredDoc.childNodes)),
    preferredDoc.childNodes
  );
  arr[preferredDoc.childNodes.length].nodeType
} catch(e) {
    push = { apply : arr.length ?
        function (target, els) {
            pushNative.apply(target, slice.call(els))
        } :
        function (target, els) {
            var j = traget.length,
                i = 0;
            while((traget[i++] = els[i++])){}
            traget.legth = j - 1;
        }
    }
}
複製代碼

Expr.prefilter

Expr = {
    // ...
    prefilter: {
        //參數就是被attribute的正則 match 到的數組
        /***$1 屬性名 $2 運算符 $3 - $5 都是屬性值 $3 雙引號 $4 單引號 $5 沒引號 ***/
        'ATTR': function(match) {
            // 轉碼
            match[1] = match[1].replace(runescape, funescape);
            // 不論是從哪裏捕獲的都統一放到$3
            match[3] = (match[3] || match[4] || match[5] || '').replace(runescape, funescape);
            // 若是是 '~=' 的話, 須要留空格
            if (match[2] === '~=') {
                match[3] = " " + match[3] + " ";
            }
            return match.slice(0, 4);
        },
        /*** $1 (only|first|last|nth|nth-last) $2 (child|of-type) $3 括號中的所有內容 $4 even odd 或者 表達式2n+1 中的2n $5 2n的正負 +2n -2n 中的 + - $6 n的倍數 2n 中的 2 $7 運算符 + - $8 最後一個數 1 ***/
        'CHILD': function(match) {
            match[1] = match[1].toLowerCase();
            if (match[1].slice(0, 3) === 'nth') {
                // nth得有參數
                if(!match[3]) {
                    Siizle.error(match[0]);
                }
                match[4] = +(match[4] ?
                    match[5] + (match[6] || 1) :
                    2 * (match[3] === 'even' || match[3] === 'odd'));
                match[5] = +((match[7] + match[8]) || match[3] === 'odd');
            // 除了nth的其他的沒有括號中的值
            } else if (match[3]) {
                Sizzle.error(match[0]);
            }
            
            return match;
        },
        /*** $1 僞類名 $2 括號中的所有內容 $3 在引號中的值 $4 單引號的值 $5 雙引號的值 $6 沒有引號的值 ***/
        'PESUDO': function(match) {
            var excess,
            // $6沒有值而$2有值的狀況: :not(:nth-child(2))
                unquoted = !match[6] && match[2];
            if (matchExpr['CHILD'].test(match[0])) {
                return null;
            }
            // 若是是引號中的內容
            if (match[3]) {
                match[2] = match[4] || match[5] || '';
            // 若是括號中的內容仍是個僞類
            } else if (unquoted && repseudo.test(unquoted) && 
                //遞歸調用tokenize
                (excess = tokenize(unquoted, true)) &&
                // 留最近的一個()
                // excess是個負數
                (excess = unquoted.indexOf(")", unquoted.length - excess) - unquoted.length)) {
                match[0] = match[0].slice(0, excess);
                match[2] = unquoted.slice(0, excess);
            }
            
            return match.slice(0, 3);   
        }
    }
    // ...
}
複製代碼

testContext 檢測一個節點做爲Sizzle上下文的有效性

function testContext( context ) {
	return context && typeof context.getElementsByTagName !== "undefined" && context;
}
複製代碼

tokenize

預編譯

Sizzle在1.7以後加入了預編譯的思想，其實你們都這麼說，我就也跟着這麼說了。我理解的預編譯其實就是將所輸入的東西，經過某種規則，進行轉換，轉換成另外一種格式。這也有另外一種說辭，也就是AST，我想這個詞你們應該更清楚一點，不一樣AST是什麼的同窗能夠去看一下這篇文章。java

Sizzle中的預編譯就是tokenize函數，它將各類選擇器按照轉換成了一個對象，舉個簡單的例子node

var selector = 'a#link > p, .expando';
    // 會轉換成一下的樣子
    var tSelector = [
        [
            {
                type: 'TAG',
                value: 'a',
                match: ['a']
            },
            {
                type: 'ID',
                value: '#link',
                match: ['link']
            },
            {
                type: '>'
                value: '>'
            },
            {
                type: 'TAG',
                value: 'p',
                match: ['p']
            }
        ],
        [
            {
                type: 'CLASS',
                value: '.expando',
                match: ['expando']
            }
        ]
    ]
複製代碼

函數源碼

tokenize = Sizzle.tokenize = function(selector, parseOnly) {
    var matched, match, tokens, type,
        soFar, groups, preFilters,
        cached = tokenCache[selector + ' '];
    // 有沒有緩存,若是有緩存 那麼直接返回就ok
    if (cached) {
        // 若是是parseOnly的話, 緩存裏面存的都是匹配上的字符,
        // 因此若是有的話, 那麼不會有剩餘的字符串因此返回0
        // 在返回的時候 會返回一個緩存的淺複製.
        return parseOnly ? 0 : cached.silce(0)
    }
    soFar = selector;
    groups = [];
    preFilters = Expr.preFilter;
    
    while (soFar) {
        // 若是是第一次進入, 或者是匹配到了逗號
        if (!matched || (match = rcomma.exec(soFar))) {
            // 若是是逗號的話 把逗號去了
            if (match) {
                soFar = soFar.slice(match[0].length) || soFar;
            }
            groups.push( (tokens = []) );
        }
        
        matched = false;
        
        // 若是是關係選擇器
        if ( (match = rcombinators.exec(soFar))) {
            matched = match.shift();
            tokens.push({
                value: matched,
                type: match[0].replace(rtrim, " ")
            });
            soFar = soFar.slice(0, matched.length);
        }
        
        // 循環匹配
        // TAG CLASS ID ATTR CHILD PESUDO
        for (type in Expr.filter) {
            if (match = matchExpr[type] && (!preFilter[type] || (match = preFilter[type](match)))) {
                matched = match.shift();
                tokens.push({
                    value: matched,
                    type: type,
                    matches: match
                });
                soFar = soFar.slice(matched.length);
            }
        }
        if (!matched) {
            break;
        }
    }
    // 這裏也是, 若是是parseOnly的話就返回沒匹配到的剩下的字符串的長度
    // 若是不是的話, 就要看有沒有剩下的字符串
    // 若是有, 那說明字符串不合法 直接報錯
    // 若是沒有的話 先存緩存, 而後再return一個副本出去
    return paresOnly ?
        soFar.length :
        soFar ? 
            Sizzle.error(selector) : 
            tokenCache(selector, groups).slice(0);
}
複製代碼

這裏會出現一種狀況，好比選擇字符串是':not(:nth-child(2))'這樣的，進入了tokenize，這個時候到PESUDO，那麼就要走preFilter['PESUDO']方法，這個字符串被匹配的時候是$2有值而$6沒值的狀況，因此會再次走tokenize函數(paresOnly = true)，造成遞歸。數組

Sizzle函數

Sizzle函數就是我們調用選擇器走的第一個函數，前兩篇文章說的方法都是在引入Sizzle時運行的。該方法會判斷字符串是不是簡單的class tag id。好比p，#id，.container。或者是否能夠直接使用querySelectorAll，若是都不能夠的話，就進入select方法。瀏覽器

函數源碼

function Sizzle(selector, context, results, seed) {
    var m, i, elem, nid, match, groups, newSelector,
        newContext = context && context.ownerDocument,
        // 若是不傳context, 那麼默認就是9
        nodeType = context ? context.nodeType : 9;
    results = results || [];
    
    // 若是選擇器是空, 或者選擇器不是字符串, 又或者 context的節點類型不是 element document documetfragment之中的任何一個的話 直接返回[]
    if (typeof selector != 'string' || !selector ||
        nodeType !== 1 && nodeType !== 9 && noedeType !== 11) {
            return results;
    }
    if (!seed) {
        // 設置一次document可是通常都是直接返回了
        setDocument(context);
        context = context || doucment;
        if (documentIsHtml) {
            //若是是簡單的選擇id tag class這種狀況
            if (nodeTpe !== 11 && (match = rquickExpr.exec(selector))) {
                //ID
                if (m = match[1]) {
                    // 是document節點
                    if (nodeType === 9) {
                        if ( (elem = context.getElementById(m)) ) {
                            if (elem.id === m) {
                                results.push(elem);
                                return results;
                            }
                        } else {
                            return results;
                        }
                    // 若是是元素節點
                    // 元素節點是沒有getElememtById這個方法的只能經過document選，選完元素以後判斷是否是被當前元素節點包含
                    } else {
                        if (newContext && (elem = newContext.getElementById(m)) &&
                            contains(context, elem) &&
                            elem.id === m) {
                                results.push(elem);
                                return results;
                        }
                    }
                // 標籤選擇
                } else if (match[2]) {
                    // 因爲rquickExpr不會匹配到(*),因此不須要考慮兼容問題, 直接返回全部匹配到的元素就能夠了
                    push.apply(results, context,getElementByTagName(selector));
                // class 選擇
                } else if ((m = match[3]) && support.getElementsByClassName && 
                context.getElementsByClassName) {
                    push.apply(results, context,getElementsByClassName(m));
                    return results;
                }
            }
            // 若是支持querySelectorAll, 而且這個字符串並非以前出現過的沒法用原生方法匹配的字符串, 而且沒有兼容問題中沒有這類的話
            if (support.qsa &&
                !nonnativeSelevtorCache[selector + ' '] &&
                (!rbuggyQSA || !rbuggyQSA.test(selector)) &&
                // 排除object對象
                (nodeType!==1 || context.nodeName.toLowerCase() !== 'object')) {
                newSelector = selector;
                newContext = context;
                
                // querySelector有一個問題，若是不是document.querySelectorAll而是element.querySelectorAll的話
                // 當選擇字符串出現了關係選擇符也就是 '> ~ + ,' 這些的話,選擇出來的結果會有出入
                // 解決這個問題的方法就是給當前的element加一個id 再在本來的選擇字符串的最前面添加這個id
                // 經過doucment.querySelectorAll選擇元素

                if (nodeType === 1 &&
                    (rdescend.test(selector) || rcombinators.test(selector))) {
                    // 若是是兄弟選擇器的話 那麼要給其父集添加id
                    newContext = rsibling.test(selector) && testContext(context.parentNode) ||
                        context;
                    // 這裏這個判斷不是很清楚 若是有人知道是什麼意思的話但願能解答一下蟹蟹
                    // 如下是英文註釋
                    // We can use :scope instead of the ID hack if the browser
                    // supports it & if we're not changing the context.
                    if (newContext !== context || !support.scope) {
                        if ((nid = context.getAttribute("id"))) {
                            nid = nid.replace(rcssescape, fcssescape);
                        } else {
                            context.setAttribute('id', (nid = expando));
                        }
                    }
                    
                    groups = tokenize(selector);
                    i = groups.length;
                    while(i--) {
                        groups[i] = (nid? '#' + nid : ':scope') + ' ' + 
                        toSelector(groups[i]);
                    }
                    newSelector.groups.join(',');
                }
                try {
                    push.apply(results,
                        newContext.querySelectorAll(newSelector)
                    );
                    return results;
                } catch(e) {
                    // 推入原生方法不能解析的字符串緩存
                    nonnatvieSelectorCache(selector, true);
                } finally {
                    if (nid === expando) {
                        context.removeAttribute('id');
                    }
                }
            }
        }
    }
    // 全部不行的都走select方法
    return select(selector.replace(rtrim, '$1'), context, results, seed);
}
複製代碼