文中說起HashMap7的參見博客http://www.javashuo.com/article/p-qcwimtzs-nv.htmlhtml
紅黑樹、TreeMap分析詳見http://www.javashuo.com/article/p-weyeglqm-nv.htmljava
//同jdk7 static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16 static final int MAXIMUM_CAPACITY = 1 << 30; static final float DEFAULT_LOAD_FACTOR = 0.75f; //樹化閾值,也就是說鏈表長度超過8纔會進行樹化 static final int TREEIFY_THRESHOLD = 8; //鏈表化閾值,也就是說紅黑樹的節點個數少於6纔會退化成鏈表 static final int UNTREEIFY_THRESHOLD = 6; //最小樹化容量,也就是說鏈表長度超過64纔會樹化 static final int MIN_TREEIFY_CAPACITY = 64; //仍是熟悉的味道,Node數組,數組加鏈表的存儲結構 transient Node<K,V>[] table;
簡單來講,樹化閾值和鏈表化閾值應該相等,統一爲一個閾值,超過則樹化,低於則鏈表化,假設就規定爲8,就會出現這樣的問題,若是一個鏈表長度從7到8了,那麼就樹化,可是過一下子又從8到7了,又須要變回鏈表,而不管鏈表轉化成樹仍是樹轉化成鏈表,都是很是費時的,這就大大下降了HashMap的效率,此外在樹化、鏈表化的過程當中有大量的垃圾對象產生,從而加快觸發GCnode
等下揭曉數組
static class Node<K,V> implements Map.Entry<K,V> { final int hash; final K key; V value; Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } }
等同於JDK7的entry節點換了個名字,仍是熟悉的鏈表app
static final class TreeNode<K,V> extends LinkedHashMap.Entry<K,V> { TreeNode<K,V> parent; // red-black tree links TreeNode<K,V> left; TreeNode<K,V> right; TreeNode<K,V> prev; // needed to unlink next upon deletion boolean red; TreeNode(int hash, K key, V val, Node<K,V> next) { super(hash, key, val, next); } }
boolean red
,紅黑樹它來了dom
HashMap向外提供的功能就是時間複雜度爲O(1)的查詢,可是基於數組鏈表的衝突解決方式,以及HashMap經過位運算計算index的方式,若是hashCode的實現不能實現很好的分散效果,好比本身的類中重寫了hashCode方法,可能致使某一個鏈表過長,從而使得HashMap的查詢速度退化到O(n),這是沒有辦法接收的,因此須要選擇一種支持快速查找的結構--有序的二叉樹函數
爲何是紅黑樹性能
這一點在關於TreeMap中已經分析清楚了,若是選擇二叉搜索樹,在必定的狀況下,二叉搜索樹會退化成鏈表,而AVL樹的實現複雜,插入刪除效率不及紅黑樹,因此選擇綜合性能不錯的紅黑樹。this
public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; //tableSizeFor方法返回一個大於initialCapacity的最小二次冪 this.threshold = tableSizeFor(initialCapacity); }
public HashMap(int initialCapacity, float loadFactor) { //作一些範圍檢查 if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); //對loadFactor賦值以及threshold賦值 this.loadFactor = loadFactor; threshold = initialCapacity; //空方法,交由子類實現,在HashMap中無用 init(); }
區別:翻譯
highestOneBit()、countBit()
方法計算二次冪,JDK8中本身實現了public V put(K key, V value) { return putVal(hash(key), key, value, false, true); }
新增兩個參數:
@param onlyIfAbsent if true, don't change existing value 對應第四個參數-false 若是爲true,插入已經存在key時,不修改value @param evict if false, the table is in creation mode. 對應第五個參數-true 暫且不明
final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; //初始化 if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; //(n - 1) & hash //JDK8中沒有了indexFor方法,可是仍是採用一樣的邏輯計算index //爲null直接插入 if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { //發生哈希衝突 Node<K,V> e; K k; //若是與第一個node的key的hash值相同,而且key相同 if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; //若是已是樹結構了,調用紅黑樹的方式插入結點 //紅黑樹的插入等下再聊 else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); else { //區別於JDK7中的頭插法,採用了尾插法,爲何採用尾插法呢? for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { p.next = newNode(hash, key, value, null); //若是當前的鏈表長度超過了樹化閾值則樹化,-1是由於第一個結點沒計數 if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st treeifyBin(tab, hash); break; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; p = e; } } if (e != null) { // existing mapping for key V oldValue = e.value; //根據傳入的參數onlyIfAbSent決定是否修改已經存在的key對應的value值 if (!onlyIfAbsent || oldValue == null) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; //若是size超過閾值,則擴容 if (++size > threshold) resize(); //hashMap中爲空方法 afterNodeInsertion(evict); return null; }
從上面的代碼能夠看出數組鏈表的邏輯基本相似,可是JDK8中的實現中新結點的插入採用了尾插法
頭插法的問題明天再補!
static final int hash(Object key) { int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); }
相較於JDK7的屢次擾動,JDK8的擾動次數減小了可是利用了高16位和低16位的數據來進行擾動
final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null) ? 0 : oldTab.length; int oldThr = threshold; int newCap, newThr = 0; if (oldCap > 0) { if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } //newCap=oldCap << 1擴容爲原來的兩倍 else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1; // double threshold } //oldCap==0 else if (oldThr > 0) // initial capacity was placed in threshold //若是構造函數中計算出來的threshold被賦值給newCap了 newCap = oldThr; else { // zero initial threshold signifies using defaults //若是調用了默認的構造函數,cap和threshold就會不同 newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (newThr == 0) { float ft = (float)newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer.MAX_VALUE); } threshold = newThr; @SuppressWarnings({"rawtypes","unchecked"}) Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; if (oldTab != null) { //拷貝數組 for (int j = 0; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null) { oldTab[j] = null; //若是鏈表只有這一個節點 if (e.next == null) newTab[e.hash & (newCap - 1)] = e; //若是是紅黑樹 else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); else { Node<K,V> loHead = null, loTail = null; Node<K,V> hiHead = null, hiTail = null; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0) { if (loTail == null) loHead = e; //尾插法 else loTail.next = e; loTail = e; } else { if (hiTail == null) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null); //這裏就能夠直接將兩條鏈的頭部拷貝到新的node數組的相應位置便可 if (loTail != null) { loTail.next = null; newTab[j] = loHead; } if (hiTail != null) { hiTail.next = null; newTab[j + oldCap] = hiHead; } } } } } return newTab; }
拋開紅黑樹來看,這裏利用了一個特性
假設hashcode= 0010 1111 初始容量爲8 index=hashcode&(leng-1)=0010 1111 & 0000 0111 = 0000 0111 =7 此外還有一個hashcode2 = 0000 0111 按照相同的index計算方法,二者發生了衝突,此時若是發生擴容 新的容量爲16-1 = 15 = 0000 1111 此時二者再去運算結果分別爲: index1 = 1111 = 15 index2 = 0111 = 7
經過上面的舉例能夠看出,容量左移一位以後,左移的那一位是否爲1致使舊鏈分裂成兩條新鏈,而這兩條新鏈的head結點的差值就是最高位的1表示的大小(1000=8),也就是舊的容量
其中初始化也會調用到resize方法,分別走兩個分支
else if (oldThr > 0) // initial capacity was placed in threshold //若是構造函數中計算出來的threshold被賦值給newCap了 newCap = oldThr; else { // zero initial threshold signifies using defaults //若是調用了默認的構造函數,cap和threshold就會不同 newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); }
與JDK7中的實現不大相同,第一個分支的capacity與threshold是相同的,經過簡單的實驗查看驗證一下
public static void main(String[] args) throws NoSuchFieldException { HashMap<Integer, Integer> map = new HashMap<>(8); Class<? extends HashMap> mapClass = map.getClass(); //threshold Field threshold = mapClass.getDeclaredField("threshold"); threshold.setAccessible(true); try { Integer num = (Integer)threshold.get(map); System.out.println(num); } catch (IllegalAccessException e) { e.printStackTrace(); } //capacity try { map.put(1,1); Method capacity = map.getClass().getDeclaredMethod("capacity"); capacity.setAccessible(true); Integer c = (Integer)capacity.invoke(map); System.out.println(c); } catch (NoSuchMethodException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (InvocationTargetException e) { e.printStackTrace(); } }
兩個輸出都是8,而初始化若是不傳入,則會發現capacity爲16,threshold爲12=16*0.75,這與JDK7仍是略有不一樣的
final void treeifyBin(Node<K,V>[] tab, int hash) { int n, index; Node<K,V> e; if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) //若是length<64,不進行樹化,進行擴容,擴容一樣可能致使鏈的分裂從而縮短鏈的長度 resize(); else if ((e = tab[index = (n - 1) & hash]) != null) { TreeNode<K,V> hd = null, tl = null; //把Node鏈表轉換成TreeNode鏈表 do { //replacementTreeNode把Node轉成TreeNode,new一個新的出來賦值便可 TreeNode<K,V> p = replacementTreeNode(e, null); if (tl == null) hd = p; else { p.prev = tl; //你可能比較差別,TreeNode結構裏面沒有聲明next變量,可是你順着TreeNode的繼承結構會發現它實際繼承了Node,天然就會有next成員變量 tl.next = p; } tl = p; } while ((e = e.next) != null); if ((tab[index] = hd) != null) hd.treeify(tab); } }
TreeNode<K,V> replacementTreeNode(Node<K,V> p, Node<K,V> next) { return new TreeNode<>(p.hash, p.key, p.value, next); }
final void treeify(Node<K,V>[] tab) { TreeNode<K,V> root = null; for (TreeNode<K,V> x = this, next; x != null; x = next) { next = (TreeNode<K,V>)x.next; x.left = x.right = null; //root結點爲null,root->x,而且將x染黑 if (root == null) { x.parent = null; x.red = false; root = x; } else { K k = x.key; int h = x.hash; Class<?> kc = null; for (TreeNode<K,V> p = root;;) { int dir, ph; K pk = p.key; //利用hash排序 if ((ph = p.hash) > h) dir = -1; else if (ph < h) dir = 1; //是否利用本身定義的排序規則進行排序,這裏就不細究了 else if ((kc == null && (kc = comparableClassFor(k)) == null) || (dir = compareComparables(kc, k, pk)) == 0) dir = tieBreakOrder(k, pk); TreeNode<K,V> xp = p; //if dir<=0 p=p.left else p=p.right //二分搜索隱藏在這裏 //if p!=null 說明還沒找到 if ((p = (dir <= 0) ? p.left : p.right) == null) { x.parent = xp; if (dir <= 0) xp.left = x; else xp.right = x; //插入平衡,與TreeMap中的紅黑樹實現基本一致 root = balanceInsertion(root, x); break; } } } } moveRootToFront(tab, root); }
static <K,V> TreeNode<K,V> balanceInsertion(TreeNode<K,V> root, TreeNode<K,V> x) { x.red = true; for (TreeNode<K,V> xp, xpp, xppl, xppr;;) { //第一個結點,直接染黑便可 if ((xp = x.parent) == null) { x.red = false; return x; } else if (!xp.red || (xpp = xp.parent) == null) //root return root; //x的父親爲祖父的左孩子 if (xp == (xppl = xpp.left)) { //叔叔結點爲紅,父親叔叔染黑,祖父染紅,祖父成爲x if ((xppr = xpp.right) != null && xppr.red) { xppr.red = false; xp.red = false; xpp.red = true; x = xpp; } //叔叔結點爲Nil或者黑色 else { //x爲父親的右孩子,以父親爲中心左旋 if (x == xp.right) { root = rotateLeft(root, x = xp); xpp = (xp = x.parent) == null ? null : xp.parent; } //x爲左孩子,父親染黑,祖父染紅,以祖父爲中心右旋 if (xp != null) { xp.red = false; if (xpp != null) { xpp.red = true; root = rotateRight(root, xpp); } } } } //對稱操做 else { if (xppl != null && xppl.red) { xppl.red = false; xp.red = false; xpp.red = true; x = xpp; } else { if (x == xp.left) { root = rotateRight(root, x = xp); xpp = (xp = x.parent) == null ? null : xp.parent; } if (xp != null) { xp.red = false; if (xpp != null) { xpp.red = true; root = rotateLeft(root, xpp); } } } } } }
不貼代碼了,同樣的操做,先定位再插入,最後平衡紅黑樹
這裏貼一段HashMap中的官方的註解便可
Because TreeNodes are about twice the size of regular nodes, we use them only when bins contain enough nodes to warrant use (see TREEIFY_THRESHOLD). And when they become too small (due to removal or resizing) they are converted back to plain bins. In usages with well-distributed user hashCodes, tree bins are rarely used. Ideally, under random hashCodes, the frequency of nodes in bins follows a Poisson distribution.The first values are: 0: 0.60653066 1: 0.30326533 2: 0.07581633 3: 0.01263606 4: 0.00157952 5: 0.00015795 6: 0.00001316 7: 0.00000094 8: 0.00000006
簡單翻譯一下就是,treeNode的大小大約爲普通Node的2倍數,比較佔內存,若是使用well-distributed
也就是分佈合理的hashcode方法,很難用到紅黑樹,由於若是徹底分佈合理,只會觸發擴容。
因此JDK的意思就是能不用紅黑樹就不用
under random hashCodes, the frequency of nodes in bins follows a Poisson distribution.
若是在足夠random的hashcode下,每一個鏈表的大小服從泊松分佈,能夠看到當鏈表長度爲8時,可能性已經很小了,設置成8的意思就是說在足夠random的hashcode方法下,儘量的不使用紅黑樹,那麼設置成8就足夠了
你可能有問題?既然JDK要極力避免使用紅黑樹,爲何還要做爲一種實現添加進來呢?
上面的前提是足夠隨機的hashcode計算,架不住有些同志的類本身重寫了hashCode方法,那麼就有可能致使分佈不均勻,致使鏈表過長,若是不樹化,就妄爲hashMap查詢時間複雜度O(1)的名號了!!