在.NET開發中,List<T>是咱們常常用到的類型。前段時間看到其餘部門小夥伴討論「兩個List(10W個元素)集合求並集,list1.Where(p=>list2.Contains(p)).ToList()」,性能不好,有人說是ToList的問題,我當時第一直覺是關鍵在Contains方法,這個問題後面在來細說討論。還有某大神說過,沒反編譯過框架源碼的碼農不是合格碼農:)。下面咱們就藉助Reflector來讀一讀.NET4.0中List<T>的源碼,在讀到一些方法實現時候,會更清楚,oh,原來是這樣,解開之前的疑惑,寫更有效率的代碼。html
private const int _defaultCapacity = 4; private static readonly T[] _emptyArray; private T[] _items; private int _size; [NonSerialized] private object _syncRoot; private int _version;
根據讀整個類的代碼,簡單解釋下:數組
_defaultCapacity:表示默認的初始容量,即內部容器數組的Length。但在ctrl+f搜索不到有該字段的使用,有點奇怪(代碼中都是寫死的4),因而查看IL後,發現是static literal string str="123".咱們都知道const是在編譯期必須初始化值。const字段的本質是static,那麼它一樣具有Type Dictionary,但咱們沒法經過像下面的驗證,由於它在編譯時必須初始化,值肯定了。
class Test<T> { //public const string _defaultCapacity="123"; public static string Address; static Test() { Address = typeof(T).ToString(); } } class MyList { static void Main() { // Console.WriteLine("const :{0}",object.ReferenceEquals(Test<int>._defaultCapacity, Test<string>._defaultCapacity)); Console.WriteLine("static :{0}", object.ReferenceEquals(Test<int>.Address, Test<string>.Address));//false //Console.WriteLine(Test<string>._defaultCapacity); } }
可是反編譯的代碼中沒有對_defaultCapacity使用,代碼中是寫死4的,難道在編譯時候對因此使用的地方都替換成了4?爲何要定義爲const而不用static,大神您怎麼看?緩存
_emptyArray:默認爲一個空數組,在靜態構造函數中初始化。爲何不這樣寫public static readonly T[] _emptyArray=new T[0];效果是同樣的。大神您怎麼看?
_items:這個真正存儲數據的內部數組。
_size:表示List中存儲元素的個數。
_syncRoot:用於Thread Safe的。
_version:表示一個版本,當Add元素或者Remove元素等時候,會自增。咱們在foreach list過程當中若是list改變了,那麼會拋出異常(好像是集合已修改,不能枚舉),就是根據它來判斷的。
static List() { List<T>._emptyArray = new T[0];//每一個T 都有一個對應的new T[0] } [TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")] public List() { this._items = List<T>._emptyArray;//好比:對於全部的new List<int>()對象都共享同一份空數組。設計的目的可能爲了性能優化。 } [TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")] public List(int capacity) { if (capacity < 0) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.capacity, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); } this._items = new T[capacity];//能夠看出capacity就是內部數組的Length啦。 } public List(IEnumerable<T> collection) { if (collection == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collection); } ICollection<T> is2 = collection as ICollection<T>; if (is2 != null) { int count = is2.Count; this._items = new T[count]; is2.CopyTo(this._items, 0);//這樣的初始化直接數組對拷,性能很高。 this._size = count; } else { this._size = 0;//您是否有疑問,爲何這個下面代碼中沒有該修改過該值呢?它可表示list元素的個數啊,其實祕密在Add方法中。 this._items = new T[4];//看到伐,直接寫死的4,不知道是反編譯仍是源代碼就這樣寫的,大神您怎麼看? using (IEnumerator<T> enumerator = collection.GetEnumerator()) { while (enumerator.MoveNext()) { this.Add(enumerator.Current); } } } }
public void Add(T item) { if (this._size == this._items.Length) { this.EnsureCapacity(this._size + 1); } this._items[this._size++] = item; this._version++; }
當元素個數和內部數組(_items)Length相等時,那麼就要確保_items的Length必須有this._size+1。順便提一下,能夠看到Add方法不是thread safe的,其實內部有一個性能優化
internal static IList<T> Synchronized(List<T> list) { return new SynchronizedList<T>(list); }
private void EnsureCapacity(int min) { if (this._items.Length < min) { int num = (this._items.Length == 0) ? 4 : (this._items.Length * 2);//容量是以2倍於原容量來增加的,咱們知道數組是定長的,一旦分配後,長度不可改變,那麼List如何擴容的呢?看下面 if (num < min) { num = min; } this.Capacity = num; } } public int Capacity//對於擴容這樣一個高消耗操做,用一個屬性的set來設置,是否合適,爲啥不寫成一個方法SetCapacity(int c)呢?大神您怎麼看? { [TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")] get { return this._items.Length; } set { if (value < this._size) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.value, ExceptionResource.ArgumentOutOfRange_SmallCapacity); } if (value != this._items.Length) { if (value > 0) { T[] destinationArray = new T[value];//看到木,重寫分配一個新數組,將原數組中元素copy到新數組中。因此若是一開始就知道或者能夠預估List的容量,能夠new List(x) if (this._size > 0) //來避免之後List的擴容,可能形成的性能影響(如GC回收原數組,copy大量元素等)。 { Array.Copy(this._items, 0, destinationArray, 0, this._size); } this._items = destinationArray; } else { this._items = List<T>._emptyArray; } } } }
下面來看看AddRange框架
public void AddRange(IEnumerable<T> collection) { this.InsertRange(this._size, collection); } public void InsertRange(int index, IEnumerable<T> collection) { if (collection == null) { ThrowHelper.ThrowArgumentNullException(ExceptionArgument.collection); } if (index > this._size) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_Index); } ICollection<T> is2 = collection as ICollection<T>; if (is2 != null)//若是實現了ICollection<T>,特殊對待,直接使用高性能的Array.Copy(是一個extern外部實現) { int count = is2.Count; if (count > 0) { this.EnsureCapacity(this._size + count); if (index < this._size) { Array.Copy(this._items, index, this._items, index + count, this._size - index); } if (this == is2)//特殊對待 { Array.Copy(this._items, 0, this._items, index, index); Array.Copy(this._items, (int) (index + count), this._items, (int) (index * 2), (int) (this._size - index)); } else { T[] array = new T[count];//建立新數組 is2.CopyTo(array, 0);//將待添加元素先copy到新數組 array.CopyTo(this._items, index);//把新數組copy到List後面。wait等等,各位看官有木有發現,爲何要建立臨時數組啊,直接is2.CopyTo(this.items,。。),且看下面測試結果。 } this._size += count; } } else { using (IEnumerator<T> enumerator = collection.GetEnumerator()) { while (enumerator.MoveNext()) { this.Insert(index++, enumerator.Current); } } } this._version++; }
public T[] ToArray() { T[] destinationArray = new T[this._size];//建立新數組 Array.Copy(this._items, 0, destinationArray, 0, this._size); return destinationArray; } //在內部的 public struct Enumerator : IEnumerator<T>, IDisposable, IEnumerator中,下面兩個方法爲何不合併成一個啊?還有這個struct爲何沒有這樣Enumerator<T>呢?您看懂沒,反正我看了半天懂了:) public bool MoveNext() { List<T> list = this.list; if ((this.version == list._version) && (this.index < list._size)) { this.current = list._items[this.index]; this.index++; return true; } return this.MoveNextRare(); } private bool MoveNextRare() { if (this.version != this.list._version) { ThrowHelper.ThrowInvalidOperationException(ExceptionResource.InvalidOperation_EnumFailedVersion); } this.index = this.list._size + 1; this.current = default(T); return false; }
public bool Contains(T item) { if (item == null) { for (int j = 0; j < this._size; j++) { if (this._items[j] == null) { return true; } } return false; } EqualityComparer<T> comparer = EqualityComparer<T>.Default; for (int i = 0; i < this._size; i++)//順序查找啊,在序言中那個問題,若是使用Dictionary查找,查找複雜度近似O(1)啊,之後在說這個問題。 { if (comparer.Equals(this._items[i], item))//依賴於T是否實現接口 { return true; } } return false; }
其餘方法不解釋了,有興趣本身去看看啦。ide
測試代碼:函數
static List<int> GetData(int length) { List<int> result = new List<int>(length);//潛移默化的影響,哈哈哈 for (int i = 0; i < length; i++) { result.Add(i); } return result; } static void Main() { int itemLength = 10; List<int> itemList = GetData(itemLength); int iteration = 1000000; List<int> firstList = new List<int>(itemLength*iteration); List<int> secondList = new List<int>(itemLength * iteration); CodeTimer.Initialize(); CodeTimer.Time("AddRange方法測試", iteration, () => { firstList.AddRange(itemList); }); CodeTimer.Time("Add方法測試", iteration, () => { for (int i = 0; i < itemList.Count; i++) { secondList.Add(itemList[i]); } }); Console.ReadKey(); }
經過故意執行屢次AddRange,讓其內部不斷的建立臨時數組,能夠看到下面的結果,消耗的時間既然比Add多,並且Gen 0 有33的垃圾回收。AddRange中建立臨時數組,到底算不算疏忽,寫FCL的工程師應該技術水平不容質疑吧,難道故意的,大神您怎麼看?:)性能
測試結果以下:學習
仍是來看下Contains中,比較兩個元素是否相等,其中:測試
public abstract class EqualityComparer<T> : IEqualityComparer, IEqualityComparer<T> { private static EqualityComparer<T> defaultComparer; protected EqualityComparer() { } [SecuritySafeCritical] private static EqualityComparer<T> CreateComparer() { RuntimeType c = (RuntimeType) typeof(T); if (c == typeof(byte)) { return (EqualityComparer<T>) new ByteEqualityComparer(); } if (typeof(IEquatable<T>).IsAssignableFrom(c)) { return (EqualityComparer<T>) RuntimeTypeHandle.CreateInstanceForAnotherGenericParameter((RuntimeType) typeof(GenericEqualityComparer<int>), c); } if (c.IsGenericType && (c.GetGenericTypeDefinition() == typeof(Nullable<>))) { RuntimeType type2 = (RuntimeType) c.GetGenericArguments()[0]; if (typeof(IEquatable<>).MakeGenericType(new Type[] { type2 }).IsAssignableFrom(type2)) { return (EqualityComparer<T>) RuntimeTypeHandle.CreateInstanceForAnotherGenericParameter((RuntimeType) typeof(NullableEqualityComparer<int>), type2); } } if (c.IsEnum && (Enum.GetUnderlyingType(c) == typeof(int))) { return (EqualityComparer<T>) RuntimeTypeHandle.CreateInstanceForAnotherGenericParameter((RuntimeType) typeof(EnumEqualityComparer<int>), c); } return new ObjectEqualityComparer<T>(); } public abstract bool Equals(T x, T y); public abstract int GetHashCode(T obj); internal virtual int IndexOf(T[] array, T value, int startIndex, int count) { int num = startIndex + count; for (int i = startIndex; i < num; i++) { if (this.Equals(array[i], value)) { return i; } } return -1; } internal virtual int LastIndexOf(T[] array, T value, int startIndex, int count) { int num = (startIndex - count) + 1; for (int i = startIndex; i >= num; i--) { if (this.Equals(array[i], value)) { return i; } } return -1; } bool IEqualityComparer.Equals(object x, object y) { if (x == y) { return true; } if ((x != null) && (y != null)) { if ((x is T) && (y is T)) { return this.Equals((T) x, (T) y); } ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidArgumentForComparison); } return false; } int IEqualityComparer.GetHashCode(object obj) { if (obj != null) { if (obj is T) { return this.GetHashCode((T) obj); } ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidArgumentForComparison); } return 0; } public static EqualityComparer<T> Default { [SecuritySafeCritical, TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")] get { EqualityComparer<T> defaultComparer = EqualityComparer<T>.defaultComparer; if (defaultComparer == null) { defaultComparer = EqualityComparer<T>.CreateComparer(); EqualityComparer<T>.defaultComparer = defaultComparer; } return defaultComparer; } } }
泛型類中,靜態字段private static EqualityComparer<T> defaultComparer;會爲每一個T類型都緩存一份該數據,是這樣去初始化的:
public static EqualityComparer<T> Default { [SecuritySafeCritical, TargetedPatchingOptOut("Performance critical to inline across NGen image boundaries")] get { EqualityComparer<T> defaultComparer = EqualityComparer<T>.defaultComparer; if (defaultComparer == null) { defaultComparer = EqualityComparer<T>.CreateComparer(); EqualityComparer<T>.defaultComparer = defaultComparer; } return defaultComparer; } }
其實發現不少FCL中代碼都是這樣的模式,可學習使用在平時工做項目中。Type Dictionary真是一勞永逸的哦,貌似某大神說是必備技能啊,有興趣的能夠看我以前寫的幾篇文章。
經過閱讀分析FCL源碼,能夠更清楚知道實現細節,更高效的使用,可學習MS大神們的代碼和設計,命名規範等等,總之,好處多多,其餘好處等着你來補充:)。當咱們看懂代碼意思後,可否思考爲何要這樣設計,這樣設計的好處是什麼,這將是更高一層次的武功了。
若有不正之處,還請斧正,謝謝你們。期待着你們的討論~~