5.分配slab對象
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *ret = slab_alloc(cachep, flags, _RET_IP_); trace_kmem_cache_alloc(_RET_IP_, ret, cachep->object_size, cachep->size, flags); return ret; }
slab對象的分配是從這個函數開始的,他的核心函數是salb_allocnode
static __always_inline void * slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) { unsigned long save_flags; void *objp; flags &= gfp_allowed_mask; cachep = slab_pre_alloc_hook(cachep, flags); ??? if (unlikely(!cachep)) return NULL; cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); prefetchw(objp); if (unlikely(flags & __GFP_ZERO) && objp) memset(objp, 0, cachep->object_size); slab_post_alloc_hook(cachep, flags, 1, &objp); return objp; } static __always_inline void * __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { return ____cache_alloc(cachep, flags); }
這個函數中的核心函數就是__do_cache_alloc->____cache_alloc。調用以前會先關閉本地中斷。數組
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; struct array_cache *ac; check_irq_off(); ac = cpu_cache_get(cachep); //獲取本地緩衝池array_cache if (likely(ac->avail)) { //判斷本地緩衝池內有沒有空閒對象 ac->touched = 1; objp = ac->entry[--ac->avail]; //分配一個空閒對象 STATS_INC_ALLOCHIT(cachep); goto out; } STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags);//若是空閒對象爲0,到這裏申請新的對象 /*「 ac」能夠由cache_alloc_refill()更新,而kmemleak_erase()須要正確的值。*/ ac = cpu_cache_get(cachep); out: /*爲了不出現假陰性,若是每一個CPU緩存之一中的對象泄漏,咱們須要確保kmemleak不會將數組指針視爲對該對象的引用。*/ if (objp) kmemleak_erase(&ac->entry[ac->avail]); return objp; }
若是沒有可用的對象時,會調用cache_alloc_refill來申請新的內存緩存
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) { int batchcount; struct kmem_cache_node *n; struct array_cache *ac, *shared; int node; void *list = NULL; struct page *page; check_irq_off(); node = numa_mem_id(); ac = cpu_cache_get(cachep); //獲取本地對線個緩衝池 batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { /*若是此高速緩存上的近期活動不多,則僅執行部分從新填充。不然,咱們可能會產生筆芯彈跳。 */ batchcount = BATCHREFILL_LIMIT; } n = get_node(cachep, node); //從node數組中獲取slab節點 BUG_ON(ac->avail > 0 || !n); shared = READ_ONCE(n->shared);//獲取共享緩衝池 if (!n->free_objects && (!shared || !shared->avail)) goto direct_grow; spin_lock(&n->list_lock); shared = READ_ONCE(n->shared); /* 看看若是咱們能夠從共享數組中從新填充 */ if (shared && transfer_objects(ac, shared, batchcount)) {//若是有空閒的對象,就遷移batchcount個對象到本地緩衝池中 shared->touched = 1; goto alloc_done; } while (batchcount > 0) { /* Get slab alloc is to come from. */ page = get_first_slab(n, false); if (!page) goto must_grow; check_spinlock_acquired(cachep); batchcount = alloc_block(cachep, ac, page, batchcount); fixup_slab_list(cachep, n, page, &list); } must_grow: n->free_objects -= ac->avail; alloc_done: spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); direct_grow: if (unlikely(!ac->avail)) { /* Check if we can use obj in pfmemalloc slab */ if (sk_memalloc_socks()) { void *obj = cache_alloc_pfmemalloc(cachep, n, flags); if (obj) return obj; } page = cache_grow_begin(cachep, gfp_exact_node(flags), node); /*cache_grow_begin()能夠從新啓用中斷,而後ac能夠更改。*/ ac = cpu_cache_get(cachep); if (!ac->avail && page) alloc_block(cachep, ac, page, batchcount); cache_grow_end(cachep, page); if (!ac->avail) return NULL; } ac->touched = 1; return ac->entry[--ac->avail]; }
若是共享緩衝池中沒有空閒對象,會先查看slab_partial鏈表和slab_free函數
static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) { struct page *page; assert_spin_locked(&n->list_lock); page = list_first_entry_or_null(&n->slabs_partial, struct page, lru); if (!page) { n->free_touched = 1; page = list_first_entry_or_null(&n->slabs_free, struct page, lru); if (page) n->free_slabs--; } if (sk_memalloc_socks()) page = get_valid_first_slab(n, page, pfmemalloc); return page; }
申請分配新的slab佈局
static struct page *cache_grow_begin(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *freelist; size_t offset; gfp_t local_flags; int page_node; struct kmem_cache_node *n; struct page *page; /* 懶一點,只在這裏檢查有效標誌,不要把它放在kmem_cache_alloc()的關鍵路徑中。 */ if (unlikely(flags & GFP_SLAB_BUG_MASK)) { gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; flags &= ~GFP_SLAB_BUG_MASK; pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", invalid_mask, &invalid_mask, flags, &flags); dump_stack(); } WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); check_irq_off(); if (gfpflags_allow_blocking(local_flags)) local_irq_enable(); /* 獲取對象的mem。試圖從「nodeid」分配物理頁面。 */ page = kmem_getpages(cachep, local_flags, nodeid); if (!page) goto failed; page_node = page_to_nid(page); n = get_node(cachep, page_node); /* 獲取slab的colour,並計算下一個值。 */ n->colour_next++; if (n->colour_next >= cachep->colour) n->colour_next = 0; offset = n->colour_next; if (offset >= cachep->colour) offset = 0; offset *= cachep->colour_off; /* Get slab management. */ //計算slab中的cache colour和freelist,以及對象的地址佈局 freelist = alloc_slabmgmt(cachep, page, offset, local_flags & ~GFP_CONSTRAINT_MASK, page_node); if (OFF_SLAB(cachep) && !freelist) goto opps1; slab_map_pages(cachep, page, freelist); kasan_poison_slab(page); cache_init_objs(cachep, page); if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); return page; opps1: kmem_freepages(cachep, page); failed: if (gfpflags_allow_blocking(local_flags)) local_irq_disable(); return NULL; } static void *alloc_slabmgmt(struct kmem_cache *cachep, struct page *page, int colour_off, gfp_t local_flags, int nodeid) { void *freelist; void *addr = page_address(page); page->s_mem = addr + colour_off; page->active = 0; if (OBJFREELIST_SLAB(cachep)) freelist = NULL; else if (OFF_SLAB(cachep)) { /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); if (!freelist) return NULL; } else { /* We will use last bytes at the slab for freelist */ freelist = addr + (PAGE_SIZE << cachep->gfporder) - cachep->freelist_size; } return freelist; }
freelist能夠當作一個char型的數組,每一個對象佔用一個元素來保存對象序號。s_mem就是第一個對象開始的地址。若是freelist不在slab上,就會從新申請一個free_list_cache大小的內存來放freelist。不然就取內存塊的最後的freelist_size大小的內存用來存放freelist。post
static void slab_map_pages(struct kmem_cache *cache, struct page *page, void *freelist) { page->slab_cache = cache; page->freelist = freelist; }
slab_map_pages函數會把freelist和kmem_cache賦給這個page.fetch
static void cache_init_objs(struct kmem_cache *cachep, struct page *page) { int i; void *objp; bool shuffled; cache_init_objs_debug(cachep, page); /* 若是啓用,嘗試隨機化空閒列表 */ shuffled = shuffle_freelist(cachep, page); if (!shuffled && OBJFREELIST_SLAB(cachep)) { page->freelist = index_to_obj(cachep, page, cachep->num - 1) + obj_offset(cachep); } for (i = 0; i < cachep->num; i++) { objp = index_to_obj(cachep, page, i); kasan_init_slab_obj(cachep, objp); /* 構造函數可能會破壞毒物信息 */ if (DEBUG == 0 && cachep->ctor) { kasan_unpoison_object_data(cachep, objp); cachep->ctor(objp); kasan_poison_object_data(cachep, objp); } if (!shuffled) set_free_obj(page, i, i); } } static inline void set_free_obj(struct page *page, unsigned int idx, freelist_idx_t val) { ((freelist_idx_t *)(page->freelist))[idx] = val; }
接下來,在cache_init_obj函數中,page->free_list先指向了最後一個obj的位置。而後遍歷全部的obj,獲取這個obj的地址,並將這個obj的序號填入freelist中。ui
這時候,在cache_alloc_refill函數中再次去判斷array_cache中是否有可用的對象,若是沒有的話調用alloc_blockspa
static __always_inline int alloc_block(struct kmem_cache *cachep, struct array_cache *ac, struct page *page, int batchcount) { /*必須至少有一個對象可供分配。*/ BUG_ON(page->active >= cachep->num); while (page->active < cachep->num && batchcount--) { STATS_INC_ALLOCED(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); ac->entry[ac->avail++] = slab_get_obj(cachep, page); } return batchcount; }
若是活躍的page中活躍的obj數量小於kmem_cache中的對象數量,就將page中的空閒對象寫入共享對象緩衝池中。debug
static void cache_grow_end(struct kmem_cache *cachep, struct page *page) { struct kmem_cache_node *n; void *list = NULL; check_irq_off(); if (!page) return; INIT_LIST_HEAD(&page->lru); n = get_node(cachep, page_to_nid(page)); spin_lock(&n->list_lock); n->total_slabs++; if (!page->active) { list_add_tail(&page->lru, &(n->slabs_free)); n->free_slabs++; } else fixup_slab_list(cachep, n, page, &list); STATS_INC_GROWN(cachep); n->free_objects += cachep->num - page->active; spin_unlock(&n->list_lock); fixup_objfreelist_debug(cachep, &list); }
接下來執行cache_grow_end函數,先獲取到這個頁的node節點,該節點上的slab數量加1。若是page中的對象都是空閒的,就將page的lru鏈表加在node的slabs_free鏈表後面。若是已經有對象被申請過了,就執行fixup_slab_list。最後,node中的可用obj的數量要加上這個kmem_cache的總數減去已經在使用的。
static inline void fixup_slab_list(struct kmem_cache *cachep, struct kmem_cache_node *n, struct page *page, void **list) { /* 將slabp移至正確的slabp列表: */ list_del(&page->lru); if (page->active == cachep->num) { list_add(&page->lru, &n->slabs_full); if (OBJFREELIST_SLAB(cachep)) { #if DEBUG /* Poisoning will be done without holding the lock */ if (cachep->flags & SLAB_POISON) { void **objp = page->freelist; *objp = *list; *list = objp; } #endif page->freelist = NULL; } } else list_add(&page->lru, &n->slabs_partial); }
若是kmem_cache中的全部對象都在使用了,就把這page->lru添加到node的slabs_full隊列中去,而後將page的freelist置爲空。不然的話,就將page->lru加入到node的slabs_partial鏈表中去。
這時,在cache_alloc_refill函數最後就能夠返回avail所指向的對象。就是可用的對象。