hashmap源碼分析( 基於java8)

時間 2019-12-04

標籤 hashmap 源碼分析基於 java8 java 欄目 Java 简体版

原文原文鏈接

hashmap源碼分析

簡介

hashmap的get和put操做的時間複雜度是常量。經過調用哈希函數將元素正確的分佈到桶中。初始容量(capacity)的值不能設置過高，加載因子(loadfactor)不能設置的過低，不然會影響迭代的性能。
一個hashmap的實例有兩個參數將影響它的性能。初始容量、加載因子。初始容量是hashmap在建立時候桶的大小。加載因子用來肯定什麼時候進行擴容（size > 容量*加載因子）。擴容的時候也會進行對內部的數據結構進行從新構建，使桶的大小增長兩倍。java

默認的加載因子（0.75）在時間和空間複雜度上提供了很好的權衡。大一點的話會減小空間可是會增長get和put的時間。node

hashmap能夠存鍵值爲null，是線程不安全的。若是想線程安全可使用Collections.synchronizedMap()包裝.
或者使用ConcurrentMap，這個map是線程安全的。數組

hashmap數據結構

hashmap是一個散列表，存儲的內容是key-value。就像咱們用的字典同樣，用過字母（key）查找單詞(value)。hashmap的時間複雜度是O(longN)。安全

在java8以前hashmap採用的是桶+鏈表的數據結構。可是若是數據很大，鏈表的查找時間複雜度是O(n)，顯然者違背了hashmap的初衷，因此在鏈表的元素大於8的時候，java8會把鏈表旋轉爲紅黑樹。微信

[數組鏈表散列（hash）
](https://blog.csdn.net/u013565...數據結構

hashmap的數據結構實現

桶，鏈表的實現

桶的實現:app

transient Node<K,V>[] table;

鏈表的實現:函數

static class Node<K,V> implements Map.Entry<K,V> {

    final int hash;//hash值
    final K key;//節點的鍵
    V  value;//節點的值
    Node<K,V> next;//下一個節點（鏈表）

Node(int hash, K key, V value, Node<K,V> next) {//構造方法
    this.hash = hash;
    this.key = key;
    this.value = value;
    this.next = next;
}

//方法是線程不安全的
public final K getKey()        { return key; }
public final V getValue()      { return value; }
public final String toString() { return key + "=" + value; }

public final int hashCode() {
    return Objects.hashCode(key) ^ Objects.hashCode(value);
}

public final V setValue(V newValue) {
    V oldValue = value;
    value = newValue;
    return oldValue;
}

public final boolean equals(Object o) {//判斷兩個元素是否相等
    if (o == this)
        return true;
    if (o instanceof Map.Entry) {
        Map.Entry<?,?> e = (Map.Entry<?,?>)o;
        if (Objects.equals(key, e.getKey()) &&
            Objects.equals(value, e.getValue()))
            return true;
    }
    return false;
}
}

重要屬性

static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // 默認的桶初始容量（2^4=16）。

static final int MAXIMUM_CAPACITY = 1 << 30;//最大的桶的容量

static final float DEFAULT_LOAD_FACTOR = 0.75f;//默認的加載因子

static final int TREEIFY_THRESHOLD = 8;//當鏈表大於這個閾值會被旋轉爲紅黑樹

static final int UNTREEIFY_THRESHOLD = 6;//當作resize操做的時候，若是桶中某個節點的數量小於這個閾值，則把樹旋轉爲鏈表

static final int MIN_TREEIFY_CAPACITY = 64;//當桶中的數量大於64是，纔會判斷是否轉換成樹

transient Node<K,V>[] table;//桶

transient int size;//hashmap的存儲的元素大小

transient int modCount;//hashmap結構被修改的次數

int threshold;//擴容閾值

final float loadFactor;//加載因子

構造方法

構造方法會建立一個空的桶，計算擴容閾值和加載因子源碼分析

HashMap(int,float)

public HashMap(int initialCapacity, float loadFactor) {//桶初始化容量，加載因子
if (initialCapacity < 0)//桶初始容量不能小於0
    throw new IllegalArgumentException("Illegal initial capacity: " +
                                       initialCapacity);
if (initialCapacity > MAXIMUM_CAPACITY)//若是桶初始化容量大於hashmap最大的容量，則初始化容量等於最大的容量
    initialCapacity = MAXIMUM_CAPACITY;
if (loadFactor <= 0 || Float.isNaN(loadFactor))//
    throw new IllegalArgumentException("Illegal load factor: " +
                                       loadFactor);
this.loadFactor = loadFactor;
this.threshold = tableSizeFor(initialCapacity);//計算擴容閾值
}

HashMap(int)

public HashMap(int initialCapacity) {
this(initialCapacity, DEFAULT_LOAD_FACTOR);//加載因子爲默認的0.75
}

HashMap()

public HashMap() {
        this.loadFactor = DEFAULT_LOAD_FACTOR; //桶初始容量爲0，加載由於0.75
}

HashMap(Map)

public HashMap(Map<? extends K, ? extends V> m) {
this.loadFactor = DEFAULT_LOAD_FACTOR;//加載因子爲默認的0.75
putMapEntries(m, false);//map放入桶中
}



final void putMapEntries(Map<? extends K, ? extends V> m, boolean evict) {
int s = m.size();//插入元素大小
if (s > 0) {//若是大於0 ，則繼續進行插入操做
    if (table == null) { // pre-size
        float ft = ((float)s / loadFactor) + 1.0F;
        int t = ((ft < (float)MAXIMUM_CAPACITY) ?
                 (int)ft : MAXIMUM_CAPACITY);
        if (t > threshold)
            threshold = tableSizeFor(t);
    }
    else if (s > threshold)//若是插入元素數量大於擴容閾值，則桶的大小擴容兩倍
        resize();
    for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) {
        K key = e.getKey();
        V value = e.getValue();
        putVal(hash(key), key, value, false, evict);//插入元素
    }
}
}

主要的幾個方法分析

get(Obejct)

public V get(Object key) {
Node<K,V> e;
return (e = getNode(hash(key), key)) == null ? null : e.value;
}


//計算hash
static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}


//根據key獲取value
final Node<K,V> getNode(int hash, Object key) {
Node<K,V>[] tab; Node<K,V> first, e; int n; K k;//tab:桶 first：桶中節點的第一個元素    n:桶的長度 k:第一個節點的key
if ((tab = table) != null && (n = tab.length) > 0 &&
    (first = tab[(n - 1) & hash]) != null) {//若是桶不爲空，而且key所在的節點的第一個元素不爲空
    if (first.hash == hash && // always check first node
        ((k = first.key) == key || (key != null && key.equals(k))))//若是key是節點的第一元素則返回節點的第一個元素
        return first;
    if ((e = first.next) != null) {//遍歷鏈表/平衡樹 查找元素
        if (first instanceof TreeNode)
            return ((TreeNode<K,V>)first).getTreeNode(hash, key);//在樹中查找 
        do {
            if (e.hash == hash &&
                ((k = e.key) == key || (key != null && key.equals(k))))
                return e;
        } while ((e = e.next) != null);
    }
}
return null;
}

put(K,V)

public V put(K key, V value) {
return putVal(hash(key), key, value, false, true);
}


final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
           boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
if ((tab = table) == null || (n = tab.length) == 0)
    n = (tab = resize()).length;//若是桶爲空，擴容兩倍
if ((p = tab[i = (n - 1) & hash]) == null)
    tab[i] = newNode(hash, key, value, null);//若是key所在的桶第一個元素爲null則直接插入桶中的第一個節點
else {//不然插入鏈表/樹
    Node<K,V> e; K k;
    if (p.hash == hash &&
        ((k = p.key) == key || (key != null && key.equals(k))))
        e = p;//若是插入的元素等於桶中的第一個一個元素，直接返回桶中的第一個元素
    else if (p instanceof TreeNode)
        e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);//若是是樹節點，插入樹中
    else {//插入鏈表中
        for (int binCount = 0; ; ++binCount) {
            if ((e = p.next) == null) {
                p.next = newNode(hash, key, value, null);
                if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                    treeifyBin(tab, hash);
                break;
            }
            if (e.hash == hash &&
                ((k = e.key) == key || (key != null && key.equals(k))))
                break;
            p = e;
        }
    }
    if (e != null) { // existing mapping for key
        V oldValue = e.value;
        if (!onlyIfAbsent || oldValue == null)
            e.value = value;
        afterNodeAccess(e);
        return oldValue;
    }
}
++modCount;
if (++size > threshold)//若是hashmap中的元素等於擴容閾值，則從新構造數據結構
    resize();
afterNodeInsertion(evict);
return null;
}

hash（）

static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

h是原始的hash返回的值是int類型，int取值範圍:-2147483648到2147483648,先後加起來大概四十億的映射空間。只要hash函數映射的比較鬆散，通常是很難出現碰撞的。
可是考慮到實際的內存的大小，很難放下這麼大的數組。性能

因此爲了空間上的考慮上述中的擾動函數，對原始計算出來的hash值（int 四個字節32位），右移16位，本身的高半區和低半區作異或，就是爲了混合原始hash值的高位和地位，以此來加大低位的隨機性。並且混合後的地位參雜了高位的部分特徵，這樣高位的信息也被變相的保留下來了。

線程安全性

hashmap線程不安全的，若是要使用安全的hashmap建議使用ConcurrentHashMap。

參考：

hash（）原理: https://www.zhihu.com/questio...

關注個人公衆號第一時間閱讀有趣的技術故事
掃碼關注：
也能夠在微信搜索公衆號便可關注我：codexiulian 渴望與你一塊兒成長進步！

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。