在numa架構下,slab分配object:node
3192static __always_inline void * 3193__do_cache_alloc(struct kmem_cache *cache, gfp_t flags) 3194{ 3195 void *objp;
...
3202 objp = ____cache_alloc(cache, flags); 3203 3204 /* 3205 * We may just have run out of memory on the local node. 3206 * ____cache_alloc_node() knows how to locate memory on other nodes 3207 */ 3208 if (!objp) 3209 objp = ____cache_alloc_node(cache, flags, numa_mem_id()); 3210 3211 out: 3212 return objp; 3213}
首先,調用____cache_alloc來分配,該函數實現以下:bootstrap
2920static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 2921{ 2922 void *objp; 2923 struct array_cache *ac; 2924 bool force_refill = false; 2925 2926 check_irq_off(); 2927 2928 ac = cpu_cache_get(cachep); 2929 if (likely(ac->avail)) { 2930 ac->touched = 1; 2931 objp = ac_get_obj(cachep, ac, flags, false); 2932 2933 /* 2934 * Allow for the possibility all avail objects are not allowed 2935 * by the current flags 2936 */ 2937 if (objp) { 2938 STATS_INC_ALLOCHIT(cachep); 2939 goto out; 2940 } 2941 force_refill = true; 2942 } 2943 2944 STATS_INC_ALLOCMISS(cachep); 2945 objp = cache_alloc_refill(cachep, flags, force_refill); 2946 /* 2947 * the 'ac' may be updated by cache_alloc_refill(), 2948 * and kmemleak_erase() requires its correct value. 2949 */ 2950 ac = cpu_cache_get(cachep); 2951 2952out: 2953 /* 2954 * To avoid a false negative, if an object that is in one of the 2955 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 2956 * treat the array pointers as a reference to the object. 2957 */ 2958 if (objp) 2959 kmemleak_erase(&ac->entry[ac->avail]); 2960 return objp; 2961}
1. 先從array cache裏面去找,若是找到,返回,若是沒找到,走到2.架構
2.調用cache_alloc_refill來從node的shared裏去找object,或者slab的partial/free list裏面獲取object而後填充到cpu的array cache.app
cache_alloc_refill實現以下:函數
2751static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, 2752 bool force_refill) 2753{ 2754 int batchcount; 2755 struct kmem_cache_node *n; 2756 struct array_cache *ac; 2757 int node; 2758 2759 check_irq_off(); 2760 node = numa_mem_id(); 2761 if (unlikely(force_refill)) 2762 goto force_grow; 2763retry: 2764 ac = cpu_cache_get(cachep); 2765 batchcount = ac->batchcount; 2766 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 2767 /* 2768 * If there was little recent activity on this cache, then 2769 * perform only a partial refill. Otherwise we could generate 2770 * refill bouncing. 2771 */ 2772 batchcount = BATCHREFILL_LIMIT; 2773 } 2774 n = get_node(cachep, node); 2775 2776 BUG_ON(ac->avail > 0 || !n); 2777 spin_lock(&n->list_lock); 2778 2779 /* See if we can refill from the shared array */ 2780 if (n->shared && transfer_objects(ac, n->shared, batchcount)) { 2781 n->shared->touched = 1; 2782 goto alloc_done; 2783 } 2784 2785 while (batchcount > 0) { 2786 struct list_head *entry; 2787 struct page *page; 2788 /* Get slab alloc is to come from. */ 2789 entry = n->slabs_partial.next; 2790 if (entry == &n->slabs_partial) { 2791 n->free_touched = 1; 2792 entry = n->slabs_free.next; 2793 if (entry == &n->slabs_free) 2794 goto must_grow; 2795 } 2796 2797 page = list_entry(entry, struct page, lru); 2798 check_spinlock_acquired(cachep); 2799 2800 /* 2801 * The slab was either on partial or free list so 2802 * there must be at least one object available for 2803 * allocation. 2804 */ 2805 BUG_ON(page->active >= cachep->num); 2806 2807 while (page->active < cachep->num && batchcount--) { 2808 STATS_INC_ALLOCED(cachep); 2809 STATS_INC_ACTIVE(cachep); 2810 STATS_SET_HIGH(cachep); 2811 2812 ac_put_obj(cachep, ac, slab_get_obj(cachep, page, 2813 node)); 2814 } 2815 2816 /* move slabp to correct slabp list: */ 2817 list_del(&page->lru); 2818 if (page->active == cachep->num) 2819 list_add(&page->lru, &n->slabs_full); 2820 else 2821 list_add(&page->lru, &n->slabs_partial); 2822 } 2823 2824must_grow: 2825 n->free_objects -= ac->avail; 2826alloc_done: 2827 spin_unlock(&n->list_lock); 2828 2829 if (unlikely(!ac->avail)) { 2830 int x; 2831force_grow: 2832 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); 2833 2834 /* cache_grow can reenable interrupts, then ac could change. */ 2835 ac = cpu_cache_get(cachep); 2836 node = numa_mem_id(); 2837 2838 /* no objects in sight? abort */ 2839 if (!x && (ac->avail == 0 || force_refill)) 2840 return NULL; 2841 2842 if (!ac->avail) /* objects refilled by interrupt? */ 2843 goto retry; 2844 } 2845 ac->touched = 1; 2846 2847 return ac_get_obj(cachep, ac, flags, force_refill); 2848}
3. 若從n->shared裏面能夠transfer nr(nr>0)個object,返回,分配成功。
ui
4. 若n->shared也沒有可用的object,則從slab的partial/free list裏獲取object,填充ac.this
page->active是該slab裏面已經使用的object的數量。atom
ac->available是ac裏面可用的object的index.遞減使用。spa
注意2825 n->free_objects -= ac->avail; 說明當ac被填充後,該ac裏面的object就認爲被分配出去了。code
若是3和4均未成功transfer object到ac,只能從新申請slab。如cache_grow的實現:
2588static int cache_grow(struct kmem_cache *cachep, 2589 gfp_t flags, int nodeid, struct page *page) 2590{ 2591 void *freelist; 2592 size_t offset; 2593 gfp_t local_flags; 2594 struct kmem_cache_node *n; 2595 2596 /* 2597 * Be lazy and only check for valid flags here, keeping it out of the 2598 * critical path in kmem_cache_alloc(). 2599 */ 2600 BUG_ON(flags & GFP_SLAB_BUG_MASK); 2601 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); 2602 2603 /* Take the node list lock to change the colour_next on this node */ 2604 check_irq_off(); 2605 n = get_node(cachep, nodeid); 2606 spin_lock(&n->list_lock); 2607 2608 /* Get colour for the slab, and cal the next value. */ 2609 offset = n->colour_next; 2610 n->colour_next++; 2611 if (n->colour_next >= cachep->colour) 2612 n->colour_next = 0; 2613 spin_unlock(&n->list_lock); 2614 2615 offset *= cachep->colour_off; 2616 2617 if (local_flags & __GFP_WAIT) 2618 local_irq_enable(); 2619 2620 /* 2621 * The test for missing atomic flag is performed here, rather than 2622 * the more obvious place, simply to reduce the critical path length 2623 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they 2624 * will eventually be caught here (where it matters). 2625 */ 2626 kmem_flagcheck(cachep, flags); 2627 2628 /* 2629 * Get mem for the objs. Attempt to allocate a physical page from 2630 * 'nodeid'. 2631 */ 2632 if (!page) 2633 page = kmem_getpages(cachep, local_flags, nodeid); 2634 if (!page) 2635 goto failed; 2636 2637 /* Get slab management. */ 2638 freelist = alloc_slabmgmt(cachep, page, offset, 2639 local_flags & ~GFP_CONSTRAINT_MASK, nodeid); 2640 if (!freelist) 2641 goto opps1; 2642 2643 slab_map_pages(cachep, page, freelist); 2644 2645 cache_init_objs(cachep, page); 2646 2647 if (local_flags & __GFP_WAIT) 2648 local_irq_disable(); 2649 check_irq_off(); 2650 spin_lock(&n->list_lock); 2651 2652 /* Make slab active. */ 2653 list_add_tail(&page->lru, &(n->slabs_free)); 2654 STATS_INC_GROWN(cachep); 2655 n->free_objects += cachep->num; 2656 spin_unlock(&n->list_lock); 2657 return 1; 2658opps1: 2659 kmem_freepages(cachep, page); 2660failed: 2661 if (local_flags & __GFP_WAIT) 2662 local_irq_disable(); 2663 return 0; 2664}
申請完pages以後,申請slabmgmt.以下:
2445static void *alloc_slabmgmt(struct kmem_cache *cachep, 2446 struct page *page, int colour_off, 2447 gfp_t local_flags, int nodeid) 2448{ 2449 void *freelist; 2450 void *addr = page_address(page); 2451 2452 if (OFF_SLAB(cachep)) { 2453 /* Slab management obj is off-slab. */ 2454 freelist = kmem_cache_alloc_node(cachep->freelist_cache, 2455 local_flags, nodeid); 2456 if (!freelist) 2457 return NULL; 2458 } else { 2459 freelist = addr + colour_off; 2460 colour_off += cachep->freelist_size; 2461 } 2462 page->active = 0; 2463 page->s_mem = addr + colour_off; 2464 return freelist; 2465}
slabmgmt能夠放在slab內部,也能夠放在slab外部。放在slab外部的條件以下:
2194 /* 2195 * Determine if the slab management is 'on' or 'off' slab. 2196 * (bootstrapping cannot cope with offslab caches so don't do 2197 * it too early on. Always use on-slab management when 2198 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak) 2199 */ 2200 if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init && 2201 !(flags & SLAB_NOLEAKTRACE)) 2202 /* 2203 * Size is large, assume best to place the slab management obj 2204 * off-slab (should allow better packing of objs). 2205 */ 2206 flags |= CFLGS_OFF_SLAB; 2207
colour_off |
freelist_size |
obj… |
若是在管理節點在slab內部,結構圖如上。若是開啓了CONFIG_DEBUG_SLAB_LEAK宏,freelist_size後面還會有每一個object的狀態。
而後初始化page和object。