ConcurrentHashMap源碼分析

時間 2019-12-02
原文原文鏈接
/** JDK7 */
public class ConcurrentHashMap<K, V> extends AbstractMap<K, V> implements ConcurrentMap<K, V>, Serializable {

private static final long serialVersionUID = 7249069246763182397L;

/**
 * 默認的初始容量是16
 */
static final int DEFAULT_INITIAL_CAPACITY = 16;

/**
 * 默認的加載因子是0.75
 */
static final float DEFAULT_LOAD_FACTOR = 0.75f;

/**
 * 默認的併發級別是16
 */
static final int DEFAULT_CONCURRENCY_LEVEL = 16;

/**
 * The maximum capacity, used if a higher value is implicitly specified by either of the constructors with arguments.
 * MUST be a power of two <= 1<<30 to ensure that entries are indexable using ints.
 */
static final int MAXIMUM_CAPACITY = 1 << 30;

/**
 * The minimum capacity for per-segment tables.
 * Must be a power of two, at least two to avoid immediate resizing on next use after lazy construction.
 */
static final int MIN_SEGMENT_TABLE_CAPACITY = 2;

/**
 * The maximum number of segments to allow; used to bound constructor arguments.
 * Must be power of two less than 1 << 24.
 */
static final int MAX_SEGMENTS = 1 << 16; // slightly conservative

/**
 * Number of unsynchronized retries in size and containsValue methods before resorting to locking. 
 * This is used to avoid unbounded retries if tables undergo continuous modification which would make it impossible to obtain an accurate result.
 */
static final int RETRIES_BEFORE_LOCK = 2;

/**
 * 在計算數組的下標時會用到該值：hashValue & segmentMask
 * 
 * segmentMask = segments.length - 1
 */
final int segmentMask;

/**
 * Shift value for indexing within segments.
 */
final int segmentShift;

/**
 * Segment數組，Segment的功能相似於HashTable。
 * 
 */
final Segment<K,V>[] segments;

/**
 * ConcurrentHashMap的構造函數
 * 參數：
 * 	initialCapacity：	ConcurrentHashMap的初始容量
 * 	loadFactor：		Segment的加載因子(Segment數組是不能夠擴容的，ConcurrentHashMap的擴容是經過Segment的擴容實現的)
 * 	concurrencyLevel：	併發級別，默認爲16，根據該參數計算出Segment數組的長度，Segment數組的長度必須是2的整數次冪，而且一旦設定，不可改變。
 * 		eg：指定concurrencyLevel爲17，則Segment數組的長度爲32。
 *
 */
@SuppressWarnings("unchecked")
public ConcurrentHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) {
	
    if (!(loadFactor > 0) || initialCapacity < 0 || concurrencyLevel <= 0) throw new IllegalArgumentException();
	
    if (concurrencyLevel > MAX_SEGMENTS) concurrencyLevel = MAX_SEGMENTS;
	
	// 根據concurrencyLevel參數計算出一個2的整數次冪的數，做爲Segment數組的長度。
    // Find power-of-two sizes best matching arguments
    int sshift = 0;		// 2的指數
    int ssize = 1;		// Segment數組的長度：ssize=2^sshift
    while (ssize < concurrencyLevel) {
        ++sshift;
        ssize <<= 1;
    }
	
    this.segmentShift = 32 - sshift;
    this.segmentMask = ssize - 1;
	
	// 肯定Segment數組中第一個Segment(s0)的HashEntry數組的長度。
    if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY;
    int c = initialCapacity / ssize;
    if (c * ssize < initialCapacity) ++c;
    int cap = MIN_SEGMENT_TABLE_CAPACITY;	// HashEntry數組的長度，最小爲2(最小值設爲2，是爲了不插入一個元素後，就開始擴容)
    while (cap < c)
        cap <<= 1;
	
    // new一個Segment對象
    Segment<K,V> s0 = new Segment<K,V>(loadFactor, (int)(cap * loadFactor), (HashEntry<K,V>[])new HashEntry[cap]);
	
	// new一個的Segment數組，大小爲ssize
    Segment<K,V>[] ss = (Segment<K,V>[])new Segment[ssize];
	
	// 將S0放到Segment數組的第一個位置。Segment數組中其它位置的Segment在調用put()方法時會被初始化。
    UNSAFE.putOrderedObject(ss, SBASE, s0); // ordered write of segments[0]
    this.segments = ss;
}


/**
 * key和value都不能爲null，不然報空指針異常。
 *
 */
@SuppressWarnings("unchecked")
public V put(K key, V value) {
    Segment<K,V> s;
    if (value == null) throw new NullPointerException();
	
	// 根據key計算出Segment數組的下標j，計算方法與HashMap獲取數組下標的方法相似，都是使用 hashVale & (2^n-1)。
    int hash = hash(key);
    int j = (hash >>> segmentShift) & segmentMask;	// segmentMask = Segment數組的長度-1，此處相似於HashMap中的：h & (length-1);
	
	// 對segments[j]進行初始化
    if ((s = (Segment<K,V>)UNSAFE.getObject(segments, (j << SSHIFT) + SBASE)) == null) // nonvolatile; recheck; in ensureSegment
        s = ensureSegment(j);
	
	// 將key-value放到segments[j]的HashEntry數組的特定位置上。
    return s.put(key, hash, value, false);
}



/**
 * Returns the segment for the given index, creating it and
 * recording in segment table (via CAS) if not already present.
 *
 * [@param](https://my.oschina.net/u/2303379) k the index
 * [@return](https://my.oschina.net/u/556800) the segment
 */
@SuppressWarnings("unchecked")
private Segment<K,V> ensureSegment(int k) {
    final Segment<K,V>[] ss = this.segments;
    long u = (k << SSHIFT) + SBASE; // raw offset
    Segment<K,V> seg;
    if ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) {
		
        Segment<K,V> proto = ss[0]; // Segment數組中的第一個Segment，即segments[0]
        int cap = proto.table.length;
        float lf = proto.loadFactor;
        int threshold = (int)(cap * lf);
        HashEntry<K,V>[] tab = (HashEntry<K,V>[])new HashEntry[cap];
		
        if ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) { // recheck
		
			// 新建一個Segment對象
			// 該對象的加載因子等於segments[0]的加載因子，該對象的HashEntry數組(table)的初始容量等於segments[0]的HashEntry數組(table)當前的容量。
			// 注:此時，segments[0]可能已經擴容屢次了。
            Segment<K,V> s = new Segment<K,V>(lf, threshold, tab);
			
			// 將新建的Segment對象添加到Segment數組(segments)指定的位置，經過循環和CAS來保證多線程環境下數據的安全
            while ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) {
                if (UNSAFE.compareAndSwapObject(ss, u, null, seg = s))
                    break;
            }
        }
    }
    return seg;
}

/**
 * 
 * Returns the value to which the specified key is mapped, or null if this map contains no mapping for the key.
 */
public V get(Object key) {
    Segment<K,V> s; // manually integrate access methods to reduce overhead
    HashEntry<K,V>[] tab;
    int h = hash(key);
    long u = (((h >>> segmentShift) & segmentMask) << SSHIFT) + SBASE;
    if ((s = (Segment<K,V>)UNSAFE.getObjectVolatile(segments, u)) != null && (tab = s.table) != null) {
        for (HashEntry<K,V> e = (HashEntry<K,V>) UNSAFE.getObjectVolatile(tab, ((long)(((tab.length - 1) & h)) << TSHIFT) + TBASE); e != null; e = e.next) {
            K k;
            if ((k = e.key) == key || (e.hash == h && key.equals(k)))
                return e.value;
        }
    }
    return null;
}


// ************************************************ 補充：jdk1.6中ConcurrentHashMap的get方法 ************************************************
	
	/**
	 * 	jdk1.6中ConcurrentHashMap的get方法：
	 * 	    1)首先根據key獲取對應的HashEntry，若找不到對應的HashEntry，則直接返回null。
	 * 	    2)若找到了對應的HashEntry，則以不加鎖的方式獲取value(即HashEntry.value)，若value!=null，則直接返回。
	 * 	        注：HashEntry的value屬性是volatile的，故value!=null時可直接返回value。
	 * 	    3)若value==null，則以加鎖的方式來獲取value並返回。
	 *          注：HashEntry!=null,可是HashEntry.value==null的狀況是因爲發生了指令重排序形成的。
	 */
	public V get(Object key) {
		int hash = hash(key.hashCode());
		return segmentFor(hash).get(key, hash);
	}

	/**
	 * ConcurrentHashMap.Segment的get方法：採用樂觀鎖的方式來保證數據的同步。
	 * 
	 * Note：這裏須要考慮到併發的情景：
	 * 	put方法中新建一個HashEntry的語句：tab[index] = new HashEntry<K,V>(key, hash, first, value); 
	 * 		1)這行代碼能夠分解爲以下的3個步驟：
	 * 			①類的加載、鏈接(驗證->準備->解析)。  
	 * 			②初始化對象。							注：初始化後，類的加載就完成了。  
	 * 			③將tab[index]指向剛分配的內存地址。	注：這一步和類的加載過程沒有任何關係
	 * 		2)其中的②和③可能會被重排序：
	 * 			a compiler happens to reorder a HashEntry initialization with its table assignment
	 *			分配對象的內存空間 --> 將tab[index]指向剛分配的內存地址(即給tab[index]賦值) --> 初始化對象(給HashEntry的key、hash、next、value賦值)。
	 *		3)若是另外一個線程執行put方法時，tab[index]已經被賦值，HashEntry的key、hash也已經被賦值，可是value還沒來的及賦值，此時當前正在執行get方法的線程極可能會遇到：
	 *		  e(即tab[index]) != null 且 e.hash == hash && key.equals(e.key) 且 e.value = null 的狀況，
	 *		  故獲取到e.value後須要判斷一下e.value是否爲空，若是e.value爲空，則須要加鎖從新讀取。
	 */
	V get(Object key, int hash) {
		if (count != 0) { 		// read-volatile	(transient volatile int count;)
			HashEntry<K,V> e = getFirst(hash);
			while (e != null) {
				if (e.hash == hash && key.equals(e.key)) { // 若key.equals(e.key)，說明此時找到了該key對應的HashEntry
					V v = e.value;	
					if (v != null)  // 判斷是否爲空。
						return v;
					return readValueUnderLock(e); // recheck 加鎖重讀
				}
				e = e.next;
			}
		}
		return null;
	}

	/**
	 * ConcurrentHashMap.Segment的readValueUnderLock方法
	 * 
	 * 【Reads value field of an entry under lock. Called if value field ever appears to be null. 
	 * This is possible only if a compiler happens to reorder a HashEntry initialization with its table assignment, which is legal under memory model but is not known to ever occur.】
	 */
	V readValueUnderLock(HashEntry<K,V> e) {
		lock();
		try {
			return e.value;
		} finally {
			unlock();
		}
	}
	
	/**
	 * segmentFor的get方法
	 */
	final Segment<K,V> segmentFor(int hash) {
		return segments[(hash >>> segmentShift) & segmentMask];
	}
	
	/**
	 * ConcurrentHashMap list entry. Note that this is never exported out as a user-visible Map.Entry.
	 *
	 * Because the value field is volatile, not final, it is legal wrt the Java Memory Model for an unsynchronized reader to see null instead of initial value when read via a data race.  
	 * Although a reordering leading to this is not likely to ever actually occur, 
	 * the Segment.readValueUnderLock method is used as a backup in case a null (pre-initialized) value is ever seen in an unsynchronized access method.
	 */
	static final class HashEntry<K,V> {
		final K key;
		final int hash;
		volatile V value;			// value被volatile修飾：若是該HashEntry的value被其它線程修改了，volatile能夠保證其它線程的get()方法獲取到的value是最新的。
		final HashEntry<K,V> next; 

		HashEntry(K key, int hash, HashEntry<K,V> next, V value) {
			this.key = key;
			this.hash = hash;
			this.next = next;
			this.value = value;
		}

		@SuppressWarnings("unchecked")
		static final <K,V> HashEntry<K,V>[] newArray(int i) {
			return new HashEntry[i];
		}
	}
	
// ************************************************ jdk1.6中ConcurrentHashMap的get方法 ************************************************


/**
 * Segment相似一個HashTable
 * 
 * Segments are specialized versions of hash tables.  
 * This subclasses from ReentrantLock opportunistically, just to simplify some locking and avoid separate construction.
 */
static final class Segment<K,V> extends ReentrantLock implements Serializable {

    private static final long serialVersionUID = 2249069246763182397L;

    /**
     * The maximum number of times to tryLock in a prescan before possibly blocking on acquire in preparation for a locked segment operation. 
     * On multiprocessors, using a bounded number of retries maintains cache acquired while locating nodes.
     */
    static final int MAX_SCAN_RETRIES =
        Runtime.getRuntime().availableProcessors() > 1 ? 64 : 1;

    /**
     * entry數組，用來儲存數據的
     * The per-segment table. Elements are accessed via entryAt/setEntryAt providing volatile semantics.
     */
    transient volatile HashEntry<K,V>[] table;

    /**
     * Segment中元素的數量
     * 
     * The number of elements. 
     * Accessed only either within locks or among other volatile reads that maintain visibility.
     */
    transient int count;

    /**
     * 對table的大小形成影響的操做(eg:put、remove)次數
     * 
     * The total number of mutative operations in this segment.
     * Even though this may overflows 32 bits, it provides sufficient accuracy for stability checks in CHM isEmpty() and size() methods.  
     * Accessed only either within locks or among other volatile reads that maintain visibility.
     */
    transient int modCount;

    /**
     * Segment的閥值，threshold = capacity * loadFactor
     */
    transient int threshold;

    /**
     * Segment的負載因子
     */
    final float loadFactor;

    Segment(float lf, int threshold, HashEntry<K,V>[] tab) {
        this.loadFactor = lf;
        this.threshold = threshold;
        this.table = tab;
    }

    final V put(K key, int hash, V value, boolean onlyIfAbsent) {
		
		// 獲取Segment的獨佔鎖，若是該key對應的node(HashEntry)存在，則node的值爲null；若是node不存在，則new一個HashEntry並賦值給node。
        HashEntry<K,V> node = tryLock() ? null : scanAndLockForPut(key, hash, value);
        V oldValue;
        try {
            HashEntry<K,V>[] tab = table;
            int index = (tab.length - 1) & hash;
            HashEntry<K,V> first = entryAt(tab, index);
            for (HashEntry<K,V> e = first;;) {
                if (e != null) {
                    K k;
                    if ((k = e.key) == key ||
                        (e.hash == hash && key.equals(k))) {
                        oldValue = e.value;
                        if (!onlyIfAbsent) {
                            e.value = value;
                            ++modCount;
                        }
                        break;
                    }
                    e = e.next;
                } else {
                    if (node != null)			// node!=null說明該key對應的HashEntry以前不存在，此時node爲scanAndLockForPut()方法中new的那個HashEntry
                        node.setNext(first);
                    else						// node=null 說明該key對應的HashEntry以前就存在，故這裏new一個HashEntry並賦值給node。
                        node = new HashEntry<K,V>(hash, key, value, first); 
						
                    int c = count + 1;
                    if (c > threshold && tab.length < MAXIMUM_CAPACITY)	// 若Segment的容量達到閥值，則擴容。
                        rehash(node);
                    else
                        setEntryAt(tab, index, node);					// 若Segment的容量未達到閥值，則將node添加到鏈表的頭部。
                    ++modCount;
                    count = c;
                    oldValue = null;
                    break;
                }
            }
        } finally {
			// 釋放Segment的獨佔鎖
            unlock();
        }
        return oldValue;
    }

    /**
     * 尋找該key對應的HashEntry，若是找到則返回null；若是沒有找到，則new一個HashEntry並返回。
     * 在該方法返回前，當前線程一定已經持有該Segment的鎖了。
     * 
     * Scans for a node containing given key while trying to acquire lock, creating and returning one if not found. 
     * Upon return, guarantees that lock is held. 
     *
     * @return a new node if key not found, else null
     */
    private HashEntry<K,V> scanAndLockForPut(K key, int hash, V value) {
        HashEntry<K,V> first = entryForHash(this, hash); // 這裏的this指當前的Segment
        HashEntry<K,V> e = first;
        HashEntry<K,V> node = null;
        int retries = -1; // negative while locating node
		
        while (!tryLock()) {	// 循環tryLock()來確保獲取到Segment的鎖。
            HashEntry<K,V> f; // to recheck first below
            if (retries < 0) {
                if (e == null) {
                    if (node == null) // speculatively create node
                        node = new HashEntry<K,V>(hash, key, value, null);
                    retries = 0;
                }
                else if (key.equals(e.key))
                    retries = 0;
                else
                    e = e.next;
            }
			// 若是遍歷的次數(retries)超過了MAX_SCAN_RETRIES(單核時值爲1，多核時值爲64)，則使用lock()方法阻塞式的獲取鎖。
            else if (++retries > MAX_SCAN_RETRIES) {
                lock();
                break;
            }
			// 若是有新的元素被添加到該鏈表(HashEntry)的頭部，則從新遍歷
            else if ((retries & 1) == 0 && (f = entryForHash(this, hash)) != first) { 
                e = first = f; // re-traverse if entry changed
                retries = -1;
            }
        }
        return node;
    }
	

    /**
     * 擴容爲以前的2倍。
     * Doubles size of table and repacks entries, also adding the given node to new table
     */
    @SuppressWarnings("unchecked")
    private void rehash(HashEntry<K,V> node) {
        /*
         * Reclassify nodes in each list to new table.  Because we
         * Because we are using power-of-two expansion, the elements from each bin must either stay at same index, or move with a power of two offset. 
		 * We eliminate unnecessary node
         * creation by catching cases where old nodes can be
         * reused because their next fields won't change.
         * Statistically, at the default threshold, only about
         * one-sixth of them need cloning when a table
         * doubles. The nodes they replace will be garbage
         * collectable as soon as they are no longer referenced by
         * any reader thread that may be in the midst of
         * concurrently traversing table. Entry accesses use plain
         * array indexing because they are followed by volatile
         * table write.
         */
        HashEntry<K,V>[] oldTable = table;
        int oldCapacity = oldTable.length;
        int newCapacity = oldCapacity << 1;		// 擴容爲以前的2倍
        threshold = (int)(newCapacity * loadFactor);
        HashEntry<K,V>[] newTable = (HashEntry<K,V>[]) new HashEntry[newCapacity];
        int sizeMask = newCapacity - 1;
        for (int i = 0; i < oldCapacity ; i++) {
            HashEntry<K,V> e = oldTable[i];
            if (e != null) {
                HashEntry<K,V> next = e.next;
                int idx = e.hash & sizeMask;
                if (next == null)   //  若是該鏈表上只有一個元素
                    newTable[idx] = e;
                else { // Reuse consecutive sequence at same slot
                    HashEntry<K,V> lastRun = e;
                    int lastIdx = idx;
                    for (HashEntry<K,V> last = next; last != null; last = last.next) {
                        int k = last.hash & sizeMask;
                        if (k != lastIdx) {
                            lastIdx = k;
                            lastRun = last;
                        }
                    }
                    newTable[lastIdx] = lastRun;
                    // Clone remaining nodes
                    for (HashEntry<K,V> p = e; p != lastRun; p = p.next) {
                        V v = p.value;
                        int h = p.hash;
                        int k = h & sizeMask;
                        HashEntry<K,V> n = newTable[k];
                        newTable[k] = new HashEntry<K,V>(h, p.key, v, n);
                    }
                }
            }
        }
        int nodeIndex = node.hash & sizeMask; // add the new node
        node.setNext(newTable[nodeIndex]);
        newTable[nodeIndex] = node;
        table = newTable;
    }


    /**
     * Scans for a node containing the given key while trying to acquire lock for a remove or replace operation. 
	 * Upon return, guarantees that lock is held.  
	 * Note that we must lock even if the key is not found, to ensure sequential consistency of updates.
     */
    private void scanAndLock(Object key, int hash) {
        // similar to but simpler than scanAndLockForPut
        HashEntry<K,V> first = entryForHash(this, hash);
        HashEntry<K,V> e = first;
        int retries = -1;
        while (!tryLock()) {
            HashEntry<K,V> f;
            if (retries < 0) {
                if (e == null || key.equals(e.key))
                    retries = 0;
                else
                    e = e.next;
            }
            else if (++retries > MAX_SCAN_RETRIES) {
                lock();
                break;
            }
            else if ((retries & 1) == 0 &&
                     (f = entryForHash(this, hash)) != first) {
                e = first = f;
                retries = -1;
            }
        }
    }
}

/**
 * 
 * value被volatile修飾：若是該HashEntry的value被其它線程修改了，volatile能夠保證其它線程的get()方法獲取到的value是最新的。
 * 
 * ConcurrentHashMap list entry.
 */
static final class HashEntry<K,V> {
    final int hash;
    final K key;
    volatile V value;
    volatile HashEntry<K,V> next;

    HashEntry(int hash, K key, V value, HashEntry<K,V> next) {
        this.hash = hash;
        this.key = key;
        this.value = value;
        this.next = next;
    }

    /**
     * Sets next field with volatile write semantics.  (See above
     * about use of putOrderedObject.)
     */
    final void setNext(HashEntry<K,V> n) {
        UNSAFE.putOrderedObject(this, nextOffset, n);
    }

    // Unsafe mechanics
    static final sun.misc.Unsafe UNSAFE;
    static final long nextOffset;
    static {
        try {
            UNSAFE = sun.misc.Unsafe.getUnsafe();
            Class k = HashEntry.class;
            nextOffset = UNSAFE.objectFieldOffset
                (k.getDeclaredField("next"));
        } catch (Exception e) {
            throw new Error(e);
        }
    }
}


/**
 * 
 * 首先以不加鎖的方式獲取3次(注:jdk6中是2次)，若是其中任意連續兩次的modCounts相等，則直接返回，不然以加鎖的方式從新獲取並返回。
 * 
 * Returns the number of key-value mappings in this map.  
 * If the map contains more than <tt>Integer.MAX_VALUE</tt> elements, returns Integer.MAX_VALUE.
 */
public int size() {
    // Try a few times to get accurate count. On failure due to continuous async changes in table, resort to locking.
    final Segment<K,V>[] segments = this.segments;
    int size;
    boolean overflow; // true if size overflows 32 bits
    long sum;         // sum of modCounts
    long last = 0L;   // 記錄上一次的sum
    int retries = -1; // 記錄獲取的次數(0表示第一次，1表示第二次，2表示第三次)。
    try {
        for (;;) {
			
			// 若是獲取的次數超過3次，則給segments數組中的全部Segment加鎖。
            if (retries++ == RETRIES_BEFORE_LOCK) {  // RETRIES_BEFORE_LOCK=2
                for (int j = 0; j < segments.length; ++j)
                    ensureSegment(j).lock(); 
            }
			
            sum = 0L;
            size = 0;
            overflow = false;
			
            for (int j = 0; j < segments.length; ++j) {
                Segment<K,V> seg = segmentAt(segments, j);
                if (seg != null) {
                    sum += seg.modCount;			// map的modCount等於全部Segment的modCount相加
                    int c = seg.count;
                    if (c < 0 || (size += c) < 0)	// map的size等於全部Segment的count相加 即：size += c 
                        overflow = true;
                }
            }
			
			// 判斷本次獲取的modCounts和上一次獲取的modCounts是否相等，若是相等，則跳出循環。
            if (sum == last)  break;
            last = sum;
        }
    } finally {
		// 若是獲取的次數超過3次，給segments數組中的全部Segment解鎖。
        if (retries > RETRIES_BEFORE_LOCK) {	
            for (int j = 0; j < segments.length; ++j)
                segmentAt(segments, j).unlock();
        }
    }
    return overflow ? Integer.MAX_VALUE : size;
}

// ...
}node