underscore源碼解讀

感謝支持ayqy我的訂閱號，每週義務推送1篇（only unique one）原創精品博文，話題包括但不限於前端、Node、Android、數學（WebGL）、語文（課外書讀後感）、英語（文檔翻譯）
若是以爲弱水三千，一瓢太少，能夠去 http://blog.ayqy.net 看個痛快 html

寫在前面

源碼以前，了無祕密。前端

不知道從哪裏據說的，但好像有必定道理。上次讀的是Step，100來行代碼實現的異步流程控制方案，讀完結論是API設計很巧妙（只提供了一個API），異步流程控制方面通常，因此沒偷學到什麼實用招式，只拿到一些「玄乎」的東西：git

The general idea is to not impose structure on the programmer, but rather help them write good callback based functions.github

從Node異步流程控制一路跑下去，又發現了寫co的tj，發現此人聲望很高，就多瞭解了一點八卦：正則表達式

haha wow I’m impressed that you found out I started with design! When I was a few years into design I was playing around with Flash which led me to scripting. Later when I was doing design work for a local company in Victoria I decided that if I was going to do side work that I would like to be able to do every aspect so I started coding. As far as the 「how」 – nothing special really, I don’t read books, never went to school, I just read other people’s code and always wonder how things work編程

（引自TJ Holowaychuk's answer to How did TJ Holowaychuk learn to program? – Quora）api

那麼至少能夠肯定讀源碼療法是有效的，無論偷學到的是招式仍是心法，撿到籃子裏的都是菜數組

一.爲何去讀underscore？

源碼短（1500行的樣子）瀏覽器
據說有助於學習函數式編程（不能說徹底沒有幫助，但離FP還差的很遠）緩存
有用（my.js還缺一套集合操做支持）

天天早上花1個小時讀50-100行，一個多月就讀完了，成本不算高

而後對_的依賴程度通常，不少時候能當即想到可用的_.xxx()，但不多有非用不可的場景（除了昨天用的_.groupBy()，確實比較方便）

二.亮點

1.callback包裝

不少_.xxx()都用到了callback包裝，以下：

 // Internal function that returns an efficient (for current engines) version
  // of the passed-in callback, to be repeatedly applied in other Underscore
  // functions.
// 針對ctx的優化，相似於currying
  var optimizeCb = function(func, context, argCount) {
// 沒有ctx就直接返回func
    if (context === void 0) return func;
// 不傳第3個參數就默認是3
    switch (argCount == null ? 3 : argCount) {
// 肯定單參
      case 1: return function(value) {
        return func.call(context, value);
      };
// 肯定2參，value、other
      case 2: return function(value, other) {
        return func.call(context, value, other);
      };
// 默認3參，item、index、arr
      case 3: return function(value, index, collection) {
        return func.call(context, value, index, collection);
      };
// 肯定4參，收集器、item、index、arr
      case 4: return function(accumulator, value, index, collection) {
        return func.call(context, accumulator, value, index, collection);
      };
    }
// >4參，用apply
    return function() {
      return func.apply(context, arguments);
    };
  };

  // A mostly-internal function to generate callbacks that can be applied
  // to each element in a collection, returning the desired result — either
  // identity, an arbitrary callback, a property matcher, or a property accessor.
// 頗有用的回調生成方法，不少公開方法都是在cb的基礎上實現的
  var cb = function(value, context, argCount) {
// 第1個參數爲空，就返回一個管子方法，x => x
    if (value == null) return _.identity;
// 第一個參數是函數，就返回currying過的callback
    if (_.isFunction(value)) return optimizeCb(value, context, argCount);
// 第一個參數是對象，就返回一個屬性檢測器 (value, attrs) => value是否是attrs的超集（示例屬性上有一份attrs，鍵值一摸同樣）
    if (_.isObject(value)) return _.matcher(value);
// 默認返回取值方法，把value做爲key 返回obj => obj[key]
    return _.property(value);
  };
  _.iteratee = function(value, context) {
// 返回一個callback，見cb中的四種狀況，能夠做用於集合中的每一個元素
    return cb(value, context, Infinity);
  };

把傳入的值包裝成合適的callback，集合操做中省去了不少麻煩

2.用函數代替簡單值

 // Return all the elements that pass a truth test.
  // Aliased as `select`.
// 過濾器，圈個子集,保留漏勺下面的
  _.filter = _.select = function(obj, predicate, context) {
    var results = [];
    // 篩選規則，轉換爲callback(item, index, arr)
    predicate = cb(predicate, context);
// 遍歷，篩選
    _.each(obj, function(value, index, list) {
      // 篩選true就丟到結果集
      if (predicate(value, index, list)) results.push(value);
    });
    return results;
  };

  // Return all the elements for which a truth test fails.
// 與過濾器相反，保留漏勺上面的
  _.reject = function(obj, predicate, context) {
    return _.filter(obj, _.negate(cb(predicate)), context);
  };

咱們發現_.reject()的實現很是簡單，看樣子是對篩選規則predicate取反，再作一遍filter()，負責對函數取反的_.negate()也沒什麼神祕的：

 // Returns a negated version of the passed-in predicate.
// 取反，再包一層，對判斷函數的返回值取反
  _.negate = function(predicate) {
    return function() {
      return !predicate.apply(this, arguments);
    };
  };

之因此能簡單地對predicate取反就實現了相反功能的reject()，正是由於函數式編程的一個小技巧：

儘量使用函數代替簡單值

把篩選條件抽出去做爲函數，而不是傳入一系列基本值，內部if...else篩選，帶來了極大的靈活性

3.函數組合的威力

簡單組合2個函數，就能實現相對複雜的功能了：

 // Convenience version of a common use case of `map`: fetching a property.
// 從對象集合中取出指定屬性值，造成新數組
// 相似於查表，取出某一列
  _.pluck = function(obj, key) {
    // 作映射y=prop(key)
    return _.map(obj, _.property(key));
  };

  // Convenience version of a common use case of `filter`: selecting only objects
  // containing specific `key:value` pairs.
// 從集合中篩選出含有指定鍵值對集合的元素
  _.where = function(obj, attrs) {
    // 先取出attrs的實例屬性，再對obj進行超集檢測留下包含這些屬性的元素
    return _.filter(obj, _.matcher(attrs));
  };

還有更巧妙的，先定義強大的_.partial()：

 // Partially apply a function by creating a version that has had some of its
  // arguments pre-filled, without changing its dynamic `this` context. _ acts
  // as a placeholder, allowing any combination of arguments to be pre-filled.
// 相似於currying，但提供了佔位符
// 經過佔位符能夠跳着綁，比用bind實現的通常currying更強大
  _.partial = function(func) {
    // func後面的其它參數都是要綁定給func的
    var boundArgs = slice.call(arguments, 1);
    // currying結果
    var bound = function() {
      var position = 0, length = boundArgs.length;
      var args = Array(length);
      for (var i = 0; i < length; i++) {
        // 若是要綁定的參數爲_（表示一個佔位符，固然，也是underscore），就把新傳入的參數填進去
        //! 例如_.partial((a, b, c, d) => console.log(a, b, c, d), 1, _, _, 4)(2, 3);
        // 不然不變，就用以前currying內定的參數值
        args[i] = boundArgs[i] === _ ? arguments[position++] : boundArgs[i];
      }
      // 若是新傳入的參數有剩餘（填完空還多餘幾個），就都放在參數列表最後
      while (position < arguments.length) args.push(arguments[position++]);
      // bind執行
      return executeBound(func, bound, this, this, args);
    };
    return bound;
  };

而後能夠玩各類雜技，好比經過_.delay()實現nextTick：

 // Defers a function, scheduling it to run after the current call stack has
  // cleared.
// nextTick，延遲1毫秒執行
// 實現很巧妙，經過_.partial給_.delay作currying，把func空出來，只綁定wait=1
// 此時_.defer(func)就等價於_.delay(func, 1)
  _.defer = _.partial(_.delay, _, 1);

好比經過_.before()實現once：

 // Returns a function that will be executed at most one time, no matter how
  // often you call it. Useful for lazy initialization.
// 只執行1次
// _.before()的一種狀況，對_.before作個currying
  _.once = _.partial(_.before, 2);

_.partial()就像狂野炎術士、縮小射線工程師同樣，創造了無限可能

4.OOP支持

_支持鏈式調用，他們自稱是OOP方式

// OOP

// —————

// If Underscore is called as a function, it returns a wrapped object that

// can be used OO-style. This wrapper holds altered versions of all the

// underscore functions. Wrapped objects may be chained.

那麼怎樣讓掛在_上的n個靜態方法支持鏈式調用呢？

首先，弄個對象出來：

 // Create a safe reference to the Underscore object for use below.
// 用來支持鏈式調用，這樣下面全部方法都做爲靜態方法存在
  var _ = function(obj) {
    // 鏈沒斷就直接返回
    if (obj instanceof _) return obj;
    // 鏈斷了就從新包一個續上
    if (!(this instanceof _)) return new _(obj);
    // 持有被包裹的對象
    this._wrapped = obj;
  };

而後想辦法把靜態方法交給這些對象：

 // Add your own custom functions to the Underscore object.
// 擴展_
// 把靜態方法全粘到原型對象上
  _.mixin = function(obj) {
    // 遍歷obj身上的全部方法名
    _.each(_.functions(obj), function(name) {
      // 當前方法
      var func = _[name] = obj[name];
      // 粘到_的原型對象上去
      _.prototype[name] = function() {
        // 準備參數，把被包裹的對象做爲第一個參數
        var args = [this._wrapped];
        // 把調用時的參數列表接上去
        push.apply(args, arguments);
        // 用準備好的參數，以_爲ctx執行當前方法
        // result()用來處理需不須要支持鏈式調用
        return result(this, func.apply(_, args));
      };
    });
  };

最後，把全部靜態方法粘到_的原型對象上：

 // Add all of the Underscore functions to the wrapper object.
//! 能支持OOP的緣由
// 把本身的靜態方法全粘到原型對象上
  _.mixin(_);

5.正則性能優化小技巧

 // Functions for escaping and unescaping strings to/from HTML interpolation.
  // 轉義器
  // 根據傳入字典作轉義/去轉義
  var createEscaper = function(map) {
    // 查字典
    var escaper = function(match) {
      return map[match];
    };
    // Regexes for identifying a key that needs to be escaped
    // 根據待轉義項拼接生成匹配規則
    var source = '(?:' + _.keys(map).join('|') + ')';
    // 匹配正則，單次
    var testRegexp = RegExp(source);
    // 替換正則，屢次
    var replaceRegexp = RegExp(source, 'g');
    return function(string) {
      // 傳入字符串檢查，undefined/null轉空串
      string = string == null ? '' : '' + string;
//! 性能優化
//! 先用匹配正則檢查，存在須要轉義的才上替換正則（匹配，查字典，換掉）
      return testRegexp.test(string) ? string.replace(replaceRegexp, escaper) : string;
    };
  };
  // 轉義html
  _.escape = createEscaper(escapeMap);
  // 去轉義
  _.unescape = createEscaper(unescapeMap);

小技巧在這裏：

// 匹配正則，單次
var testRegexp = RegExp(source);
// 替換正則，屢次
var replaceRegexp = RegExp(source, 'g');
//...
//! 性能優化
//! 先用匹配正則檢查，存在須要轉義的才上替換正則（匹配，查字典，換掉）
return testRegexp.test(string) ? string.replace(replaceRegexp, escaper) : string;

三.注意事項

經過源碼發現了一些比較難受的地方

1.uniqueId

代碼本身會說話

 // Generate a unique integer id (unique within the entire client session).
  // Useful for temporary DOM ids.
  // 私有計數器
  var idCounter = 0;
// 生成客戶端惟一id
//!!! 若是沒有prefix的話，直接就是1, 2, 3...很容易衝突
// 多用做臨時DOM id
  _.uniqueId = function(prefix) {
    // 先自增，從1開始
    var id = ++idCounter + '';
    // 傳了前綴的話拼上，不然裸1, 2, 3...
    return prefix ? prefix + id : id;
  };

Backbone的cid用的就是這個東西，實現很是簡單，或者說弱，並非想象中強大的惟一id

使用時須要注意，想保證惟一，就只用_.uniqueId()來生成id，不要把幾套生成id的方案一塊兒用，裸1, 2, 3...太容易衝突了

2.unique

集合無序的話，去重方法性能不怎麼樣

 // Produce a duplicate-free version of the array. If the array has already
  // been sorted, you have the option of using a faster algorithm.
  // Aliased as `unique`.
// 去重
// 若是數組有序，傳入isSorted真值一次過
// 無序的話，實現方式是循環包含性檢測，性能比字典法差不少
  _.uniq = _.unique = function(array, isSorted, iteratee, context) {
    // isSorted不是布爾值的話，作3參支持處理
    // 把3個參數(array, iteratee, context)映射到4個參數對應位置上，isSorted爲false
    if (!_.isBoolean(isSorted)) {
      context = iteratee;
      iteratee = isSorted;
      isSorted = false;
    }
    // 若是傳了權值計算方法，包裝成callback(item, index, arr)
    if (iteratee != null) iteratee = cb(iteratee, context);
    // 結果集和臨時變量
    var result = [];
    var seen = [];
    // 遍歷
    for (var i = 0, length = getLength(array); i < length; i++) {
      // 當前值、計算權值（沒傳權值計算方法的話，權值就是當前值）
      var value = array[i],
          computed = iteratee ? iteratee(value, i, array) : value;
      // 有序就直接seen記錄上一個值，一次過
      if (isSorted) {
        // i === 0或者上一個元素的權值不等於當前元素的權值，添進結果集
        if (!i || seen !== computed) result.push(value);
        // 更新狀態
        seen = computed;
      } else if (iteratee) {
      // 無序，但傳了權值計算方法的話
        // 若是seen集合裏沒有當前元素的權值，值添進結果集，權值添進seen集
        if (!_.contains(seen, computed)) {
          seen.push(computed);
          result.push(value);
        }
      } else if (!_.contains(result, value)) {
      // 無序 且 沒傳權值計算方法 且結果集中不含當前值，添進去
        result.push(value);
      }
    }
    return result;
  };

由於是循環包含性檢測，而_.contains(arr, value)查找性能顯然不如字典法的key in dir

3.before

 // Returns a function that will only be executed up to (but not including) the Nth call.
// 只執行幾回
//! 只執行times-1次，爲何不包括第times次？搞得_.once()看着都難受
  _.before = function(times, func) {
    // 緩存返回值
    var memo;
    return function() {
      // 前times-1次調用
      if (--times > 0) {
        memo = func.apply(this, arguments);
      }
      // 以後的調用忽略掉，直接返回最後一次執行結果
      if (times <= 1) func = null;
      return memo;
    };
  };

因此_.once()長這樣子：

_.once = _.partial(_.before, 2);

4.isFunction

 // Optimize `isFunction` if appropriate. Work around some typeof bugs in old v8,
  // IE 11 (#1621), and in Safari 8 (#1929).
// 函數判斷，兼容老版本v八、IE11和Safari8
  // 瀏覽器hack
  // 若是typeof檢測正則表達式不爲'function' 且 typeof檢測Int8Array不爲'object'
  if (typeof /./ != 'function' && typeof Int8Array != 'object') {
    // 重寫函數判斷，typeof檢測返回'function'
//! || false是爲了解決IE8 & 11下的一個詭異問題（有時typeof dom元素結果是'function'，|| false居然能解決），見：
//! https://github.com/jashkenas/underscore/issues/1621
    _.isFunction = function(obj) {
      return typeof obj == 'function' || false;
    };
  }

沒看明白|| false有什麼用，跑去提了個issue，而後知道了這個歷史問題

5.sortBy

 // Sort the object's values by a criterion produced by an iteratee.
// 按iteratee給定的衡量標準對集合元素排序
  _.sortBy = function(obj, iteratee, context) {
    // 轉換爲callback(item, index, arr)
    iteratee = cb(iteratee, context);
// 1.fx = (v, i, w)，作映射，計算每一個元素的權值，並記錄索引
// 2.原生sort方法排序，按權值升序排列，權值相等時保持原順序
// 3.取出結果表的value列
    return _.pluck(_.map(obj, function(value, index, list) {
      return {
        value: value,
        index: index,
        criteria: iteratee(value, index, list)
      };
    }).sort(function(left, right) {
      var a = left.criteria;
      var b = right.criteria;
      if (a !== b) {
// 認爲undefined很大，升序的話，最終全部undefined都排在後面
        if (a > b || a === void 0) return 1;
        if (a < b || b === void 0) return -1;
      }
      return left.index - right.index;
    }), 'value');
  };

須要注意2個問題：

默認升序認爲undefined很大，最終排在最後面
undefined可能會致使排序失敗

例如：

// 默認升序
_.sortBy([,,1,,2]);
// [1, 2, undefined, undefined, undefined]
_.sortBy([,,1,,2], v => v)
// [1, 2, undefined, undefined, undefined]
_.sortBy([,,1,,2], v => v * 1)
// [undefined, undefined, 1, undefined, 2]

緣由很簡單，undefined * 1 === NaN，而NaN不大於x也不小於x，因此：

// NaN不知足這2道檢測
if (a > b || a === void 0) return 1;
if (a < b || b === void 0) return -1;
// 一路跑到
return left.index - right.index;

因此保持原順序，排序失敗。因此使用_.sortBy(obj, fn)要注意undefined的隱患

四.源碼分析

Git地址：https://github.com/ayqy/underscore-1.8.3

P.S.源碼1500行，讀完手動註釋版本2200行，足夠詳細

參考資料

http://underscorejs.org/
#1621：IE8&11下一個很是奇怪的問題

聯繫ayqy

若是在文章中發現了什麼問題，請查看原文並留下評論，ayqy看到就會回覆的（不建議直接回復公衆號，看不到的啦）

特別要緊的問題，能夠直接微信聯繫ayqywx

（這周居然多了5個關注，滿滿的小幸福。感謝支持，頑張りますね）

本文分享自微信公衆號 - 前端向後（backward-fe）。
若有侵權，請聯繫 support@oschina.cn 刪除。
本文參與「OSC源創計劃」，歡迎正在閱讀的你也加入，一塊兒分享。