cache_t的結構分析

時間 2020-01-09

標籤 cache 結構分析简体版

原文原文鏈接

一.探索前需知算法

上篇文章已經討論過類的的結構是個結構體裏面有 isa、superclass、cache_t、 class_data_bits_t bits，這樣咱們可用經過x/p 4gx Class 看到類的內存地址信息，首地址(isa)偏移16位獲得cache_t所在的指針地址.緩存

struct objc_class : objc_object {    // Class ISA;    Class superclass;    cache_t cache;             // formerly cache pointer and vtable    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags複製代碼

二.cache_t 的初步瞭解bash

1.cache_t 的底層結構less

struct cache_t {    
struct bucket_t *_buckets;   
 mask_t _mask;   
 mask_t _occupied;
public:    struct bucket_t *buckets();    
mask_t mask();   
 mask_t occupied();    
void incrementOccupied();    
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);    
void initializeToEmpty();    
mask_t capacity();    
bool isConstantEmptyCache();    
bool canBeFreed();    
static size_t bytesForCapacity(uint32_t cap);    
static struct bucket_t * endMarker(struct bucket_t *b, uint32_t cap);    
void expand();    
void reallocate(mask_t oldCapacity, mask_t newCapacity);    
struct bucket_t * find(cache_key_t key, id receiver);    
static void bad_cache(id receiver, SEL sel, Class isa) __attribute__((noreturn));
};
複製代碼

能夠看到cache_t 是個結構體，裏面有buckets(能夠理解爲整個類裏面的緩存池),而緩存池裏有許許多多的bucket，這bucket就是每一個方法的緩存，_mask 和_occupied這個咱們下面再說，public 裏面是些關於整個緩存流程裏的公共方法.函數

經過lldb 進行調試:ui

此時可能會有些人疑問，明明調了alloc和class的方法爲何緩存裏沒有東西，其實若是看過上篇文章朋友就會知道這些方法是類進行調用的，存在元類的緩存區裏.而斷點打在sayHello上還沒來得及緩存，下面咱們把斷點打在sayCode上 .this

cache_t裏面的 buckets緩存池裏的的確確緩存了sayHello的方法.可是每次使用lldb進行調試確實比較麻煩，咱們能夠換種方式進行調試.在下面先調用下三個對象方法以下：spa

typedef uint32_t mask_t;
typedef uintptr_t cache_key_t;
typedef unsigned long  uintptr_t;


struct lg_bucket_t {
    IMP _imp;
    cache_key_t _key;
};

struct lg_cache_t {
    struct lg_bucket_t *_buckets;
    mask_t _mask;
    mask_t _occupied;
};

struct lg_class_data_bits_t {
    uintptr_t bits;
};

struct lg_objc_class {
    Class ISA;
    Class superclass;
    struct lg_cache_t cache;             // formerly cache pointer and vtable
    struct lg_class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
};

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        LGPerson *person = [LGPerson alloc];
        Class pClass = [LGPerson class];
        // cache_t 爲何沒有 - 第一次
        [person sayHello];
        [person sayCode];
        [person sayNB]; // 臨界點 - 清理 過去 - 擴容 - cache_t 緩存
   
        struct lg_objc_class *lg_pClass = (__bridge struct lg_objc_class *)(pClass);
        for (mask_t i = 0; i<lg_pClass->cache._mask; i++) {
            struct lg_bucket_t bucket = lg_pClass->cache._buckets[i];
            NSLog(@"%lu - %p",bucket._key,bucket._imp);
        }
        NSLog(@"%@ - %p",person,pClass);
        NSLog(@"%@ - %p",person,pClass);
複製代碼

咱們看下循環裏的打印結果：指針

2020-01-01 22:29:06.985587+0800 LGTest[2357:230137] 4294970368 - 0x100000e5c
2020-01-01 22:29:06.985647+0800 LGTest[2357:230137] 4294970416 - 0x100000e65
2020-01-01 22:29:06.985689+0800 LGTest[2357:230137] 4294970512 - 0x100000e6d
複製代碼

若是是調用四個對象方法會怎麼樣呢？調試

#import <Foundation/Foundation.h>
#import "LGPerson.h"
#import <objc/runtime.h>

typedef uint32_t mask_t;
typedef uintptr_t cache_key_t;
typedef unsigned long  uintptr_t;


struct lg_bucket_t {
    IMP _imp;
    cache_key_t _key;
};

struct lg_cache_t {
    struct lg_bucket_t *_buckets;
    mask_t _mask;
    mask_t _occupied;
};

struct lg_class_data_bits_t {
    uintptr_t bits;
};

struct lg_objc_class {
    Class ISA;
    Class superclass;
    struct lg_cache_t cache;             // formerly cache pointer and vtable
    struct lg_class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
};

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        LGPerson *person = [LGPerson alloc];
        Class pClass = [LGPerson class];
        // cache_t 爲何沒有 - 第一次
        [person sayHello];
        [person sayCode];
        [person sayNB]; // 臨界點 - 清理 過去 - 擴容 - cache_t 緩存
        [person sayMaster];
        
        // 某一個值 1000 - 10W
        // 浪費 - 動態方法
        // 

        // cache_t mask
        // 方法緩存的原理 證實咱們須要處理
        // 來一個方法就緩存 - 有一個特殊的處理
        
        // 我不知道哪一個緩存的方法 - 上帝視角

        struct lg_objc_class *lg_pClass = (__bridge struct lg_objc_class *)(pClass);
        for (mask_t i = 0; i<lg_pClass->cache._mask; i++) {
            struct lg_bucket_t bucket = lg_pClass->cache._buckets[i];
            NSLog(@"%lu - %p",bucket._key,bucket._imp);
        }
        NSLog(@"%@ - %p",person,pClass);
          NSLog(@"%@ - %p",person,pClass);
    }
    return 0;
}

複製代碼

輸出：

2020-01-01 22:37:43.437962+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438056+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438163+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438252+0800 LGTest[2385:234120] 4294970464 - 0x100000e73
2020-01-01 22:37:43.438302+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438379+0800 LGTest[2385:234120] 0 - 0x0
2020-01-01 22:37:43.438458+0800 LGTest[2385:234120] 0 - 0x0
複製代碼

其中cache_t裏_mask有7個，可是隻有一個地方有值等於只有一個方法被存儲了，這是爲何呢？咱們只能在源碼裏找到答案.

三.cache_t 的底層探索

static void cache_fill_nolock(Class cls, SEL sel, IMP imp, id receiver)
{
    cacheUpdateLock.assertLocked();

    // Never cache before +initialize is done
    if (!cls->isInitialized()) return;

    // Make sure the entry wasn't added to the cache by some other thread // before we grabbed the cacheUpdateLock. if (cache_getImp(cls, sel)) return; cache_t *cache = getCache(cls); cache_key_t key = getKey(sel); // Use the cache as-is if it is less than 3/4 full mask_t newOccupied = cache->occupied() + 1; mask_t capacity = cache->capacity(); if (cache->isConstantEmptyCache()) { // Cache is read-only. Replace it. cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE); } else if (newOccupied <= capacity / 4 * 3) { // Cache is less than 3/4 full. Use it as-is. } else { // Cache is too full. Expand it. cache->expand(); } // Scan for the first unused slot and insert there. // There is guaranteed to be an empty slot because the // minimum size is 4 and we resized at 3/4 full. bucket_t *bucket = cache->find(key, receiver); if (bucket->key() == 0) cache->incrementOccupied(); bucket->set(key, imp); } 複製代碼

首先 cacheUpdateLock.assertLocked(); 訪問底層cache時，先鎖起來由於這個方法會很頻繁進行調用，因此避免訪問過程當中出現混亂先對當前的操做lock起來.

if (cache_getImp(cls, sel)) return 進行下判斷當前的方法有沒有以前被緩存過.

cache_t *cache = getCache(cls);cache_key_t key = getKey(sel);

這不操做是獲取當前類的緩存，將sel 強轉成cache_key_t 類型的key，

mask_t newOccupied = cache->occupied() + 1;

cache->occupied() 獲取當前類中已經存儲的方法個數，由於如今正在執行存儲的操做因此 newOccupied = cache->occupied() + 1

mask_t capacity = cache->capacity();

獲取當前類的存儲空間.

下一步就要進行緩存了這又分爲了三種狀況：

3.1 當前這個類以前沒有進行存儲也就是說當前代碼剛剛走到調用第一個實例方法的時候.(cache->occupied() = 0 , cache->capacity() = 0) cache->isConstantEmptyCache() 判斷爲YES，進入到 cache->reallocate(capacity, capacity ?: INIT_CACHE_SIZE);的流程。

由於 cache->capacity() = 0，因此 newCapacity = INIT_CACHE_SIZE (1 << INIT_CACHE_SIZE_LOG2) 也就是等於4

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
{
    bool freeOld = canBeFreed();

    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. // This is thought to save cache memory at the cost of extra cache fills. // fixme re-measure this assert(newCapacity > 0); assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1); setBucketsAndMask(newBuckets, newCapacity - 1); if (freeOld) { cache_collect_free(oldBuckets, oldCapacity); cache_collect(false); } } bool cache_t::canBeFreed(){ return !isConstantEmptyCache();}複製代碼

第一個判斷就是判斷是不是類第一次進行存儲，若是是第一次存儲 freeOld 會返回false，不然返回yes。

bucket_t *oldBuckets = buckets();

bucket_t *newBuckets = allocateBuckets(newCapacity);

這個就是獲取舊的緩存池和設置新的緩存池(設置緩存池的空間有多少)

setBucketsAndMask(newBuckets, newCapacity - 1);

在這裏設置緩存池裏的 mask 爲緩存空間 - 1，因此當第一個方法存儲完以後 mask會爲3 ，這和上面lldb所打印的正好是吻合的，

bucket_t *bucket = cache->find(key, receiver);

if (bucket->key() == 0) cache->incrementOccupied();

bucket->set(key, imp);

最後經過 cache 哈希算法找到buckets（緩存池裏）找到緩存空間裏找到最適合的bucket ，將方法實現imp和key 關聯起來.

關於這方法的具體實現以下:

bucket_t * cache_t::find(cache_key_t k, id receiver)
{
    assert(k != 0);

    bucket_t *b = buckets();
    mask_t m = mask();
    // 經過cache_hash函數【begin  = k & m】計算出key值 k 對應的 index值 begin，用來記錄查詢起始索引
    mask_t begin = cache_hash(k, m);
    // begin 賦值給 i，用於切換索引
    mask_t i = begin;
    do {
        if (b[i].key() == 0  ||  b[i].key() == k) {
            //用這個i從散列表取值，若是取出來的bucket_t的 key = k，則查詢成功，返回該bucket_t，
            //若是key = 0，說明在索引i的位置上尚未緩存過方法，一樣須要返回該bucket_t，用於停止緩存查詢。
            return &b[i];
        }
    } while ((i = cache_next(i, m)) != begin);
    
    // 這一步其實至關於 i = i-1,回到上面do循環裏面，至關於查找散列表上一個單元格里面的元素，再次進行key值 k的比較，
    //當i=0時，也就i指向散列表最首個元素索引的時候從新將mask賦值給i，使其指向散列表最後一個元素，從新開始反向遍歷散列表，
    //其實就至關於繞圈，把散列表頭尾連起來，不就是一個圈嘛，從begin值開始，遞減索引值，當走過一圈以後，必然會從新回到begin值，
    //若是此時尚未找到key對應的bucket_t，或者是空的bucket_t，則循環結束，說明查找失敗，調用bad_cache方法。
 
    // hack
    Class cls = (Class)((uintptr_t)this - offsetof(objc_class, cache));
    cache_t::bad_cache(receiver, (SEL)k, cls);
}

複製代碼

3.2 當 newOccupied <= capacity / 4 * 3

這是什麼意思呢，就是第一個方法執行完以後，Occupied = 1 ，mask = 3，capacity = 4
進入第二個方法 newOccupied = Occupied + 1 爲2 2小於緩存池整個空間的3/4，等於說整個緩存空間還能夠容納第二個方法，因此就在buckets（緩存池中）找到最合適的bucket（緩存桶）和上面經過cache 哈希尋找是同樣的，將方法實現imp和key 關聯起來.

3.3 當 newOccupied > capacity / 4 * 3

在這種狀況下就須要擴容，擴大整個緩存池、擴大的空間爲以前的兩倍.

void cache_t::expand()
{
    cacheUpdateLock.assertLocked();
    
    uint32_t oldCapacity = capacity();
    uint32_t newCapacity = oldCapacity ? oldCapacity*2 : INIT_CACHE_SIZE;

    if ((uint32_t)(mask_t)newCapacity != newCapacity) {
        // mask overflow - can't grow further // fixme this wastes one bit of mask newCapacity = oldCapacity; } reallocate(oldCapacity, newCapacity); } 複製代碼

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity)
{
    bool freeOld = canBeFreed();

    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. // This is thought to save cache memory at the cost of extra cache fills. // fixme re-measure this assert(newCapacity > 0); assert((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1); setBucketsAndMask(newBuckets, newCapacity - 1); if (freeOld) { cache_collect_free(oldBuckets, oldCapacity); cache_collect(false); } } 複製代碼

空間擴展完以後設置新的緩存池，而且會把以前的緩存給清除，最後在buckets（緩存池中）找到最合適的bucket（緩存桶）和上面經過cache 哈希尋找是同樣的，將方法實現imp和key 關聯起來.

四.總結

OC 中實例方法緩存在類上面，類方法緩存在元類上面。

cache_t 緩存會提早進行擴容防止溢出。

方法緩存是爲了最大化的提升程序的執行效率。

蘋果在方法緩存這裏用的是開放尋址法來解決哈希衝突。