【內核源碼學習筆記】slab分配器(3)建立slab描述符

4.建立slab描述符

struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
		slab_flags_t flags, void (*ctor)(void *))
{
	return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
					  ctor);
}

首先會先查找是否有已經建立的描述符能夠直接使用node

struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
		   slab_flags_t flags, void (*ctor)(void *))
{
	struct kmem_cache *cachep;

	cachep = find_mergeable(size, align, flags, name, ctor);
	if (cachep) {
		cachep->refcount++;

		/*調整對象大小,以便咱們清除kzalloc上的完整對象。*/
		cachep->object_size = max_t(int, cachep->object_size, size);
	}
	return cachep;
}


struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
		slab_flags_t flags, const char *name, void (*ctor)(void *))
{
	struct kmem_cache *s;

	if (slab_nomerge)
		return NULL;

	if (ctor)
		return NULL;

	size = ALIGN(size, sizeof(void *));
	align = calculate_alignment(flags, align, size);
	size = ALIGN(size, align);
	flags = kmem_cache_flags(size, flags, name, NULL);

	if (flags & SLAB_NEVER_MERGE)
		return NULL;

	list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) { //遍歷slab_root_caches中的節點,找到size合適的
		if (slab_unmergeable(s))
			continue;

		if (size > s->size)
			continue;

		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
			continue;
		/*
		 * Check if alignment is compatible.
		 * Courtesy of Adrian Drzewiecki
		 */
		if ((s->size & ~(align - 1)) != s->size)
			continue;

		if (s->size - size >= sizeof(void *))
			continue;

		if (IS_ENABLED(CONFIG_SLAB) && align &&
			(align > s->align || s->align % align))
			continue;

		return s;
	}
	return NULL;
}

若是沒有找到就會調用create_cache函數建立新的kmem_cache。數組

static struct kmem_cache *create_cache(const char *name,
		unsigned int object_size, unsigned int align,
		slab_flags_t flags, unsigned int useroffset,
		unsigned int usersize, void (*ctor)(void *),
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
	struct kmem_cache *s;
	int err;

	if (WARN_ON(useroffset + usersize > object_size))
		useroffset = usersize = 0;

	err = -ENOMEM;
	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);//分配一個kmem_cache數據結構
	if (!s)
		goto out;

	//將name/size/align等參數寫到成員中,
	s->name = name;
	s->size = s->object_size = object_size;
	s->align = align;
	s->ctor = ctor;
	s->useroffset = useroffset;
	s->usersize = usersize;

	err = init_memcg_params(s, root_cache);
	if (err)
		goto out_free_cache;

	//建立slab緩衝區
	err = __kmem_cache_create(s, flags);
	if (err)
		goto out_free_cache;

	s->refcount = 1;
	//將新的緩衝區加入到全局鏈表slab_caches中
	list_add(&s->list, &slab_caches);
	memcg_link_cache(s, memcg);
out:
	if (err)
		return ERR_PTR(err);
	return s;

out_free_cache:
	destroy_memcg_params(s);
	kmem_cache_free(kmem_cache, s);
	goto out;
}

這裏會先調用kmem_cache_zalloc申請一個kmem_cache數據結構,而後調用__kmem_cache_create()建立緩衝區,最後將緩衝區s->list加入到全局鏈表slab_caches中緩存

int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
{
	size_t ralign = BYTES_PER_WORD;
	gfp_t gfp;
	int err;
	unsigned int size = cachep->size;

	/*
檢查大小是否以字爲單位。爲了不在使用Redzoning時某些拱門未對齊的訪問,而且確保全部slab上的bufctl也正確對齊,須要這樣作。
	 */
	size = ALIGN(size, BYTES_PER_WORD);//檢查size與系統的的word長度對齊

	if (flags & SLAB_RED_ZONE) {
		ralign = REDZONE_ALIGN;
		/* If redzoning, ensure that the second redzone is suitably
		 * aligned, by adjusting the object size accordingly. */
		size = ALIGN(size, REDZONE_ALIGN);
	}

	/* 3) caller mandated alignment */
	if (ralign < cachep->align) { //計算align對齊的大小
		ralign = cachep->align;
	}
	/* disable debug if necessary */
	if (ralign > __alignof__(unsigned long long))
		flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
	/*
	 * 4) Store it.
	 */
	cachep->align = ralign;
	cachep->colour_off = cache_line_size(); //計算L1 cache行的大小
	/* Offset must be a multiple of the alignment. */
	if (cachep->colour_off < cachep->align)
		cachep->colour_off = cachep->align;

	if (slab_is_available())
		gfp = GFP_KERNEL; //分配掩碼
	else
		gfp = GFP_NOWAIT;


	kasan_cache_create(cachep, &size, &flags);

	size = ALIGN(size, cachep->align);//根據size 與align的對齊關係,計算出size的大小
	/*
	 * We should restrict the number of objects in a slab to implement
	 * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
	 */
	if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
		size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);


	if (set_objfreelist_slab_cache(cachep, size, flags)) {
		flags |= CFLGS_OBJFREELIST_SLAB;
		goto done;
	}

	if (set_off_slab_cache(cachep, size, flags)) {
		flags |= CFLGS_OFF_SLAB;
		goto done;
	}

	if (set_on_slab_cache(cachep, size, flags))
		goto done;

	return -E2BIG;

done:
	cachep->freelist_size = cachep->num * sizeof(freelist_idx_t); //freelist index佔用空間的大小
	cachep->flags = flags;
	cachep->allocflags = __GFP_COMP;
	if (flags & SLAB_CACHE_DMA)
		cachep->allocflags |= GFP_DMA;
	if (flags & SLAB_CACHE_DMA32)
		cachep->allocflags |= GFP_DMA32;
	if (flags & SLAB_RECLAIM_ACCOUNT)
		cachep->allocflags |= __GFP_RECLAIMABLE;
	cachep->size = size;
	cachep->reciprocal_buffer_size = reciprocal_value(size);


	if (OFF_SLAB(cachep)) {
		cachep->freelist_cache =
			kmalloc_slab(cachep->freelist_size, 0u);
	}

	err = setup_cpu_cache(cachep, gfp); //配置slab描述符
	if (err) {
		__kmem_cache_release(cachep);
		return err;
	}

	return 0;
}

這裏咱們先拿到內存的大小size,檢查是否與系統的WORD長度對齊。設置kmem_cache的colour爲第1行緩存的大小。網絡

下面咱們會先調用slab_objfreelist_slab_cache函數數據結構

static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
			size_t size, slab_flags_t flags)
{
	size_t left;

	cachep->num = 0;

	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
		return false;

	left = calculate_slab_order(cachep, size,
			flags | CFLGS_OBJFREELIST_SLAB);
	if (!cachep->num)
		return false;

	if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
		return false;

	cachep->colour = left / cachep->colour_off;

	return true;
}

在這個函數中,咱們先計算slab的order和left空間。kmem_cache的着色區爲left/colour_offdom

static size_t calculate_slab_order(struct kmem_cache *cachep,
				size_t size, slab_flags_t flags)
{
	size_t left_over = 0;
	int gfporder;

	for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {//從0開始計算最合適的gpforder值, 2^22
		unsigned int num;
		size_t remainder;

		num = cache_estimate(gfporder, size, flags, &remainder); //計算在2^gfporder個頁面大小時,能夠容納多少個obj對象,剩下的用來cache colour
		if (!num)
			continue;

		/* 沒法處理超過SLAB_OBJ_MAX_NUM個對象 */
		if (num > SLAB_OBJ_MAX_NUM)
			break;

		if (flags & CFLGS_OFF_SLAB) {
			struct kmem_cache *freelist_cache;
			size_t freelist_size;

			freelist_size = num * sizeof(freelist_idx_t);
			freelist_cache = kmalloc_slab(freelist_size, 0u);
			if (!freelist_cache)
				continue;

			/*須要避免在cache_grow_begin()中可能出現的循環條件*/
			if (OFF_SLAB(freelist_cache))
				continue;

			/* check if off slab has enough benefit */
			if (freelist_cache->size > cachep->size / 2)
				continue;
		}

		/* Found something acceptable - save it away */
		cachep->num = num;
		cachep->gfporder = gfporder;
		left_over = remainder;

		/*可回收VFS的平板一般具備GFP_NOFS的大部分分配,當咱們沒法縮小dcac時,咱們真的不想分配高階頁面*/
		if (flags & SLAB_RECLAIM_ACCOUNT)
			break;

		/*大量的對象是好的,可是對於gfp()來講,很是大的slab目前是不利的。*/
		if (gfporder >= slab_max_order)
			break;

		/*可接受的內部碎片?*/
		if (left_over * 8 <= (PAGE_SIZE << gfporder))
			break;
	}
	return left_over;
}

計算slab的order時,是從0開始嘗試,一直到gfporder的最大值。針對每個order值,先估算在當前的2^order數量個頁面中,能夠容納多少個對象。個數要大於SLAB能夠容納的最大值。這裏咱們的flag應該不會進入CFLGS_OFF_SLAB分支。那麼就設置kmem_cache的個數與gfporder。剩餘空間若是小於頁面的1/8,那麼這個內碎片也是能夠接受的。函數

計算申請頁面能夠放多少個object須要調用cache_estimate函數ui

static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
		slab_flags_t flags, size_t *left_over)
{
	unsigned int num;
	size_t slab_size = PAGE_SIZE << gfporder;

slab管理結構能夠在slab外,也能夠在slab內。
若是在slab內,則爲slab分配的內存用於:每一個對象的buffer_size字節,每一個對象的freelist。不須要考慮freelist的對齊,由於freelist會放在在slab頁面的末尾。每一個對象將處於正確的對齊狀態。
若是在slab外,則對齊須要的大小將已經計算到尺寸中。由於slab都是頁面對齊的,因此對象在分配時將處於正確的對齊狀態。
	if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
		num = slab_size / buffer_size;
		*left_over = slab_size % buffer_size;
	} else {
		num = slab_size / (buffer_size + sizeof(freelist_idx_t));
		*left_over = slab_size %
			(buffer_size + sizeof(freelist_idx_t));
	}

	return num;
}

若是上面的過程執行失敗,會調用set_off_slab_cache函數,申請slab結構在slab外的緩衝區。調用流程與set_objfreelist_slab_cache相似。set_objfreelist_slab_cache會嘗試把freelist放在slab外面,若是一個object放不下freelist index,就表示這樣作不太合適,須要選擇其餘的kmem_cache.set_objfreelist_slab_cache若是執行失敗,會調用set_off_slab_cache,這個是會把freelist_index放在slab外面,這裏會先找合適的kmem_cache,若是找不到就算是失敗了。若是找到了,就判斷當前的剩餘空間能不能放下一個freelist,若是放不下,就將freelist放在slab外面,若是能放下,就把freelist放在slab裏面。若是都不行,就會把freelist放在slab內部。這個函數中不會判斷freelist index與object的大小。spa

最後會進入done標籤中。kmem_cache中的freelist大小爲對象個數*index。若是管理結構是在slab以外,那麼會給freelist_cache單獨申請一塊內存,用來放free_list。debug

最後調用setup_cpu_cache函數配置slab描述符。

static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
{
	if (slab_state >= FULL) //狀態爲FULL時,表示slab機制已經初始化完成
		return enable_cpucache(cachep, gfp);

	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1); 
	if (!cachep->cpu_cache)
		return 1;

	if (slab_state == DOWN) {
		/* Creation of first cache (kmem_cache). */
		set_up_node(kmem_cache, CACHE_CACHE);
	} else if (slab_state == PARTIAL) {
		/* For kmem_cache_node */
		set_up_node(cachep, SIZE_NODE);
	} else {
		int node;

		for_each_online_node(node) {
			cachep->node[node] = kmalloc_node(
				sizeof(struct kmem_cache_node), gfp, node);
			BUG_ON(!cachep->node[node]);
			kmem_cache_node_init(cachep->node[node]);
		}
	}

	cachep->node[numa_mem_id()]->next_reap =
			jiffies + REAPTIMEOUT_NODE +
			((unsigned long)cachep) % REAPTIMEOUT_NODE;

	cpu_cache_get(cachep)->avail = 0;
	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
	cpu_cache_get(cachep)->batchcount = 1;
	cpu_cache_get(cachep)->touched = 0;
	cachep->batchcount = 1;
	cachep->limit = BOOT_CPUCACHE_ENTRIES;
	return 0;
}

若是此時slab_state的狀態爲FULL,表示slab機制已經初始化完成了。調用enable_cpucache函數,使能cpu_cache。若是狀態是PATRIAL_NODE或UP,會遍歷全部的節點,申請kmem_cache_node結構,寫到node節點中。並調用kmem_cache_node_init對這個節點進行初始化

/* Called with slab_mutex held always */
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
	int err;
	int limit = 0;
	int shared = 0;
	int batchcount = 0;

	err = cache_random_seq_create(cachep, cachep->num, gfp);
	if (err)
		goto end;

	if (!is_root_cache(cachep)) {
		struct kmem_cache *root = memcg_root_cache(cachep);
		limit = root->limit;
		shared = root->shared;
		batchcount = root->batchcount;
	}

	if (limit && shared && batchcount)
		goto skip_setup;
	/*
頭陣列用於三個目的:
-建立LIFO排序,即返回高速緩存的對象
 -減小自旋鎖操做的次數。
 -減小slab和bufctl鏈上的鏈表操做數:數組操做更便宜。
 猜中了數字,咱們應該按照Bonwick的描述進行自動調諧。
	 */
	//根據對象的大小來計算空閒對象的最大閾值limit,limit默認選擇120
	if (cachep->size > 131072)
		limit = 1;
	else if (cachep->size > PAGE_SIZE)
		limit = 8;
	else if (cachep->size > 1024)
		limit = 24;
	else if (cachep->size > 256)
		limit = 54;
	else
		limit = 120;

	/*
CPU限制的任務(例如網絡路由)可能表現出cpu限制的分配行爲:一個cpu上的大多數分配,另外一個cpu上的大多數空閒操做。對於這些狀況,必須在cpus之間傳遞有效的對象。這是由共享陣列提供的。該陣列替代Bonwick的彈匣層。在單處理器上,它在功能上等效於(但效率較低)更大的限制。所以默認狀況下處於禁用狀態。
	 */
	shared = 0;
	//若是slab對象須要小於一個頁面,shared設爲8
	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
		shared = 8;

	batchcount = (limit + 1) / 2;
skip_setup:
	//計算batchcount數目(用於本地緩衝池和共享緩衝池之間填充對象的數量)
	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
end:
	if (err)
		pr_err("enable_cpucache failed for %s, error %d\n",
		       cachep->name, -err);
	return err;
}

在enable_cpucache函數中。會根據對象的大小來計算空閒對象的最大閾值。設置shared,batchcount大小,而後調用do_tune_cpucache

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
				int batchcount, int shared, gfp_t gfp)
{
	int ret;
	struct kmem_cache *c;

	//配置slab描述符
	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);

	if (slab_state < FULL)
		return ret;

	if ((ret < 0) || !is_root_cache(cachep))
		return ret;

	lockdep_assert_held(&slab_mutex);
	for_each_memcg_cache(c, cachep) {
		/* return value determined by the root cache only */
		__do_tune_cpucache(c, limit, batchcount, shared, gfp);
	}

	return ret;
}

在這個函數中,首先會調用__do_tune_cpucache來配置slab描述符,若是slab狀態是FULL。

/*始終在保持slab_mutex的狀況下調用 */
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
				int batchcount, int shared, gfp_t gfp)
{
	struct array_cache __percpu *cpu_cache, *prev;
	int cpu;

	//分配per-CPU類型的struct array_cache數據結構(對象緩衝池)
	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
	if (!cpu_cache)
		return -ENOMEM;

	prev = cachep->cpu_cache;
	cachep->cpu_cache = cpu_cache;
	/*
若是沒有先前的cpu_cache,則無需同步遠程cpus,所以跳過IPI。
	 */
	if (prev)
		kick_all_cpus_sync();

	check_irq_on();
	cachep->batchcount = batchcount;
	cachep->limit = limit;
	cachep->shared = shared;

	if (!prev)
		goto setup_node;

	for_each_online_cpu(cpu) {
		LIST_HEAD(list);
		int node;
		struct kmem_cache_node *n;
		struct array_cache *ac = per_cpu_ptr(prev, cpu);

		node = cpu_to_mem(cpu);
		n = get_node(cachep, node);
		spin_lock_irq(&n->list_lock);
		free_block(cachep, ac->entry, ac->avail, node, &list);
		spin_unlock_irq(&n->list_lock);
		slabs_destroy(cachep, &list);
	}
	free_percpu(prev);

setup_node:
	//初始化slab緩衝區cachep->kmem_cache_node數據結構
	return setup_kmem_cache_nodes(cachep, gfp);
}

在__do_tune_cpucache函數中,會先調用alloc_kmem_cache_cpus申請cpu_cache.這裏還會設置kmem_cache中的limit,shared,batchcount等值。而後遍歷每個在線的CPU,讀取他的array_cache。再獲取nodeID ,找到他的kmem_node,而後調用free_block函數,釋放裏面的對象。最後調用setup_kmem_cache_node函數初始化kmem_cache_node緩衝區

static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
{
	int ret;
	int node;
	struct kmem_cache_node *n;

	for_each_online_node(node) { 
		//遍歷全部的numa節點
		ret = setup_kmem_cache_node(cachep, node, gfp, true);
		if (ret)
			goto fail;

	}

	return 0;

fail:
	if (!cachep->list.next) {
		/* Cache is not active yet. Roll back what we did */
		node--;
		while (node >= 0) {
			n = get_node(cachep, node);
			if (n) {
				kfree(n->shared);
				free_alien_cache(n->alien);
				kfree(n);
				cachep->node[node] = NULL;
			}
			node--;
		}
	}
	return -ENOMEM;
}

static int setup_kmem_cache_node(struct kmem_cache *cachep,
				int node, gfp_t gfp, bool force_change)
{
	int ret = -ENOMEM;
	struct kmem_cache_node *n; //slab節點
	struct array_cache *old_shared = NULL;
	struct array_cache *new_shared = NULL;
	struct alien_cache **new_alien = NULL;
	LIST_HEAD(list);

	if (use_alien_caches) {
		new_alien = alloc_alien_cache(node, cachep->limit, gfp);
		if (!new_alien)
			goto fail;
	}

	//多核系統中shared可能大於0,
	if (cachep->shared) {
		//分配一個共享對象緩衝池,多核CPU之間共享空閒緩存對象
		new_shared = alloc_arraycache(node,
			cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
		if (!new_shared)
			goto fail;
	}

	ret = init_cache_node(cachep, node, gfp);
	if (ret)
		goto fail;

	n = get_node(cachep, node);
	spin_lock_irq(&n->list_lock);
	if (n->shared && force_change) {
		free_block(cachep, n->shared->entry,
				n->shared->avail, node, &list);
		n->shared->avail = 0;
	}

	if (!n->shared || force_change) {
		old_shared = n->shared;
		n->shared = new_shared;
		new_shared = NULL;
	}

	if (!n->alien) {
		n->alien = new_alien;
		new_alien = NULL;
	}

	spin_unlock_irq(&n->list_lock);
	slabs_destroy(cachep, &list);

	/*爲了保護在禁用irq的狀況下對n-> shared的無鎖訪問。若是在禁用irq的上下文中n-> shared不爲NULL,則能夠保證在從新啓用irq以前對其進行訪問都是有效的,由於它將在syncnize_rcu()以後釋放。*/
	if (old_shared && force_change)
		synchronize_rcu();

fail:
	kfree(old_shared);
	kfree(new_shared);
	free_alien_cache(new_alien);

	return ret;
}

在這裏會遍歷全部的numa節點,調用setup_mem_cache_node函數進行初始化。若是這個kmem_cache中須要配置共享緩衝池,就身親一個array_cache結構。

static struct array_cache *alloc_arraycache(int node, int entries,
					    int batchcount, gfp_t gfp)
{
	size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
	struct array_cache *ac = NULL;

	ac = kmalloc_node(memsize, gfp, node);
	/*
array_cache結構包含指向空閒對象的指針。可是,當將此類對象分配或轉移到另外一個緩存時,不會清除指針,而且在kmemleak掃描期間能夠將它們視爲有效引用。所以,kmemleak不得掃描此類對象。
	 */
	kmemleak_no_scan(ac);
	init_arraycache(ac, entries, batchcount);
	return ac;
}

static void init_arraycache(struct array_cache *ac, int limit, int batch)
{
	if (ac) {
		ac->avail = 0;
		ac->limit = limit;
		ac->batchcount = batch;
		ac->touched = 0;
	}
}

申請的大小是指定的entrys大小加上array_cache自己的大小。申請了以後會將各個成員初始化。

static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
{
	struct kmem_cache_node *n;

	/*
在開始任何事情以前,請爲cpu設置kmem_cache_node。確保此節點上的其餘CPU還沒有分配此CPU
	 */
	n = get_node(cachep, node);
	if (n) {
		spin_lock_irq(&n->list_lock);
		n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
				cachep->num;
		spin_unlock_irq(&n->list_lock);

		return 0;
	}

	n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
	if (!n)
		return -ENOMEM;

	kmem_cache_node_init(n);
	n->next_reap = jiffies + REAPTIMEOUT_NODE +
		    ((unsigned long)cachep) % REAPTIMEOUT_NODE;

	n->free_limit =
		(1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;

	/*kmem_cache_nodes不會隨CPU來來去去。 slab_mutex在這裏是足夠的保護。*/
	cachep->node[node] = n;

	return 0;
}

static void kmem_cache_node_init(struct kmem_cache_node *parent)
{
	INIT_LIST_HEAD(&parent->slabs_full);
	INIT_LIST_HEAD(&parent->slabs_partial);
	INIT_LIST_HEAD(&parent->slabs_free);
	parent->total_slabs = 0;
	parent->free_slabs = 0;
	parent->shared = NULL;
	parent->alien = NULL;
	parent->colour_next = 0;
	spin_lock_init(&parent->list_lock);
	parent->free_objects = 0;
	parent->free_touched = 0;
}

而後會調用init_cache_node函數,若是這個node已經存在的話,找到這個nodeid 對應的kmeme_cache_node節點,設置它的free_limit值。若是不存在,就新申請一個node,把它填入kmem_cache的node數組中。

如今咱們保證了能夠拿到node節點,會釋放掉這個kmem_cache的共享緩衝池。以上就完成了slab的初始化。

 

kmem_cache的銷燬是調用kmem_cache_destory函數

void kmem_cache_destroy(struct kmem_cache *s)
{
	int err;

	if (unlikely(!s)) //若是kmem_cache爲空就直接退出
		return;

	get_online_cpus();//與put_online_cpus配合使用
	get_online_mems();

	mutex_lock(&slab_mutex);

	s->refcount--; //緩存的應用計數減1.
	if (s->refcount)//若是計數不爲0 ,表示還有其餘人在使用,則直接退出
		goto out_unlock;

#ifdef CONFIG_MEMCG_KMEM
	memcg_set_kmem_cache_dying(s);

	mutex_unlock(&slab_mutex);

	put_online_mems();
	put_online_cpus();

	flush_memcg_workqueue(s);

	get_online_cpus();
	get_online_mems();

	mutex_lock(&slab_mutex);
#endif
	//引用計數已是0了,就西安曉輝memcg,成功的話繼續調用shutdown_cache銷燬緩存
	err = shutdown_memcg_caches(s);
	if (!err)
		err = shutdown_cache(s);

	if (err) {
		pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
		       s->name);
		dump_stack();
	}
out_unlock:
	mutex_unlock(&slab_mutex);

	put_online_mems();
	put_online_cpus();
}

釋放緩存

static int shutdown_cache(struct kmem_cache *s)
{
	/* free asan quarantined objects */
	kasan_cache_shutdown(s);

	//釋放全部被slab佔用的資源
	if (__kmem_cache_shutdown(s) != 0)
		return -EBUSY;

	memcg_unlink_cache(s);
	//刪除list
	list_del(&s->list);

	if (s->flags & SLAB_TYPESAFE_BY_RCU) {
#ifdef SLAB_SUPPORTS_SYSFS
		sysfs_slab_unlink(s);
#endif
		//若是有 rcu的話,就由slab_caches_to_rcu_destroy_work來釋放
		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
		schedule_work(&slab_caches_to_rcu_destroy_work);
	} else {
#ifdef SLAB_SUPPORTS_SYSFS
		sysfs_slab_unlink(s);
		sysfs_slab_release(s);
#else
		//釋放緩存
		slab_kmem_cache_release(s);
#endif
	}

	return 0;
}


void slab_kmem_cache_release(struct kmem_cache *s)
{
	__kmem_cache_release(s);
	destroy_memcg_params(s);
	kfree_const(s->name);
	kmem_cache_free(kmem_cache, s);//釋放緩存對象
}


void __kmem_cache_release(struct kmem_cache *cachep)
{
	int i;
	struct kmem_cache_node *n;

	cache_random_seq_destroy(cachep);

	free_percpu(cachep->cpu_cache); //釋放cpu_cache

	/* NUMA: free the node structures */
	for_each_kmem_cache_node(cachep, i, n) {
		kfree(n->shared); //釋放共享緩衝池
		free_alien_cache(n->alien);
		kfree(n);//釋放kmem_node節點
		cachep->node[i] = NULL;
	}
}
相關文章
相關標籤/搜索