/* An object allocator for Python. Here is an introduction to the layers of the Python memory architecture, showing where the object allocator is actually used (layer +2), It is called for every object allocation and deallocation (PyObject_New/Del), unless the object-specific allocators implement a proprietary allocation scheme (ex.: ints use a simple free list). This is also the place where the cyclic garbage collector operates selectively on container objects. Object-specific allocators _____ ______ ______ ________ [ int ] [ dict ] [ list ] ... [ string ] Python core | +3 | <----- Object-specific memory -----> | <-- Non-object memory --> | _______________________________ | | [ Python's object allocator ] | | +2 | ####### Object memory ####### | <------ Internal buffers ------> | ______________________________________________________________ | [ Python's raw memory allocator (PyMem_ API) ] | +1 | <----- Python memory (under PyMem manager's control) ------> | | __________________________________________________________________ [ Underlying general-purpose allocator (ex: C library malloc) ] 0 | <------ Virtual memory allocated for the python process -------> | ========================================================================= _______________________________________________________________________ [ OS-specific Virtual Memory Manager (VMM) ] -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | __________________________________ __________________________________ [ ] [ ] -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | */
reference:Objects/obmalloc.chtml
layer 3: Object-specific memory(int/dict/list/string....) python 實現並維護 用戶對Python對象的直接操做,主要是各種特定對象的緩衝池機制,緩衝池,好比小整數對象池等等 layer 2: Python's object allocator 實現了建立/銷燬python對象的接口(PyObject_New/Del),涉及對象參數/引用計數等 layer 1: Python's raw memory allocator (PyMem_ API) 包裝了第0層的內存管理接口,提供同一個raw memory管理接口 封裝的緣由:不一樣操做系統C行爲不一致,保證可移植性,相同語義相同行爲 layer 0: Underlying general-purpose allocator (ex: C library malloc) 操做系統提供的內存管理接口,由操做系統實現並管理,Python不能干涉這一層的行爲,大內存 分配調用malloc函數分配內存
Python中有分爲大內存和小內存,512K爲分界線python
大內存使用系統malloc進行分配c++
小內存使用python內存池進行分配git
1. 若是要分配的內存空間大於 SMALL_REQUEST_THRESHOLD bytes(512 bytes), 將直接使用layer 1的內存分配接口進行分配 2. 不然, 使用不一樣的block來知足分配需求
申請一塊大小28字節的內存, 實際從內存中劃到32字節的一個block (從size class index爲3的pool裏面劃出)
內存塊block 是python內存的最小單位github
* For small requests we have the following table: * * Request in bytes Size of allocated block Size class idx * ---------------------------------------------------------------- * 1-8 8 0 * 9-16 16 1 * 17-24 24 2 * 25-32 32 3 * 33-40 40 4 * 41-48 48 5 * 49-56 56 6 * 57-64 64 7 * 65-72 72 8 * ... ... ... * 497-504 504 62 * 505-512 512 63 * * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying * allocator. */
pool內存池,管理block, 一個pool管理着一堆固定大小的內存塊,在Python中, 一個pool的大小一般爲一個系統內存頁. 4kB數組
#define SYSTEM_PAGE_SIZE (4 * 1024) #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1) #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
pool的4kB內存 = pool_header + block集合(N多大小同樣的block)架構
typedef uint8_t block; /* Pool for small blocks. */ struct pool_header { union { block *_padding; uint count; } ref; /* number of allocated blocks */ block *freeblock; /* pool's free list head */ struct pool_header *nextpool; /* next pool of this size class */ struct pool_header *prevpool; /* previous pool "" */ uint arenaindex; /* index into arenas of base adr */ uint szidx; /* block size class index */ uint nextoffset; /* bytes to virgin block */ uint maxnextoffset; /* largest valid nextoffset */ };
pool_header 做用less
與其餘pool連接, 組成雙向鏈表 2. 維護pool中可用的block, 單鏈表 3. 保存 szidx , 這個和該pool中block的大小有關係, (block size=8, szidx=0), (block size=16, szidx=1)...用於內存分配時匹配到擁有對應大小block的pool
void * PyObject_Malloc(size_t nbytes) { ... init_pool: // 1. 鏈接到 used_pools 雙向鏈表, 做爲表頭 // 注意, 這裏 usedpools[0] 保存着 block size = 8 的全部used_pools的表頭 /* Frontlink to used pools. */ next = usedpools[size + size]; /* == prev */ pool->nextpool = next; pool->prevpool = next; next->nextpool = pool; next->prevpool = pool; pool->ref.count = 1; // 若是已經初始化過了...這裏看初始化, 跳過 if (pool->szidx == size) { /* Luckily, this pool last contained blocks * of the same size class, so its header * and free list are already initialized. */ bp = pool->freeblock; pool->freeblock = *(block **)bp; UNLOCK(); return (void *)bp; } /* * Initialize the pool header, set up the free list to * contain just the second block, and return the first * block. */ // 開始初始化pool_header // 這裏 size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; 實際上是Size class idx, 即szidx pool->szidx = size; // 計算得到每一個block的size size = INDEX2SIZE(size); // 注意 #define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header)) // bp => 初始化爲pool + pool_header size, 跳過pool_header的內存 bp = (block *)pool + POOL_OVERHEAD; // 計算偏移量, 這裏的偏移量是絕對值 // #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ // POOL_SIZE = 4kb, POOL_OVERHEAD = pool_header size // 下一個偏移位置: pool_header size + 2 * size pool->nextoffset = POOL_OVERHEAD + (size << 1); // 4kb - size pool->maxnextoffset = POOL_SIZE - size; // freeblock指向 bp + size = pool_header size + size pool->freeblock = bp + size; // 賦值NULL *(block **)(pool->freeblock) = NULL; UNLOCK(); return (void *)bp; }
if (pool != pool->nextpool) { // /* * There is a used pool for this size class. * Pick up the head block of its free list. */ ++pool->ref.count; bp = pool->freeblock; // 指針指向空閒block起始位置 assert(bp != NULL); // 代碼-1 // 調整 pool->freeblock (假設A節點)指向鏈表下一個, 即bp首字節指向的下一個節點(假設B節點) , 若是此時!= NULL // 表示 A節點可用, 直接返回 if ((pool->freeblock = *(block **)bp) != NULL) { UNLOCK(); return (void *)bp; } // 代碼-2 /* * Reached the end of the free list, try to extend it. */ // 有足夠的空間, 分配一個, pool->freeblock 指向後移 if (pool->nextoffset <= pool->maxnextoffset) { /* There is room for another block. */ // 變動位置信息 pool->freeblock = (block*)pool + pool->nextoffset; pool->nextoffset += INDEX2SIZE(size); *(block **)(pool->freeblock) = NULL; // 注意, 指向NULL UNLOCK(); // 返回bp return (void *)bp; } // 代碼-3 /* Pool is full, unlink from used pools. */ // 滿了, 須要從下一個pool獲取 next = pool->nextpool; pool = pool->prevpool; next->prevpool = pool; pool->nextpool = next; UNLOCK(); return (void *)bp; }
內存塊還沒有分配完, 且此時不存在回收的block, 全新進來的時候, 分配第一塊block函數
(pool->freeblock = *(block **)bp) == NULL
當進入代碼邏輯2時,表示有空閒的block, 代碼2的執行流程圖以下post
回收涉及的代碼:
void PyObject_Free(void *p) { poolp pool; block *lastfree; poolp next, prev; uint size; pool = POOL_ADDR(p); if (Py_ADDRESS_IN_RANGE(p, pool)) { /* We allocated this address. */ LOCK(); /* Link p to the start of the pool's freeblock list. Since * the pool had at least the p block outstanding, the pool * wasn't empty (so it's already in a usedpools[] list, or * was full and is in no list -- it's not in the freeblocks * list in any case). */ assert(pool->ref.count > 0); /* else it was empty */ // p被釋放, p的第一個字節值被設置爲當前freeblock的值 *(block **)p = lastfree = pool->freeblock; // freeblock被更新爲指向p的首地址 pool->freeblock = (block *)p; // 至關於往list中頭插入了一個節點 ... } }
每釋放一個block,該blcok就會變成pool->freeblock
的頭結點, 假設已經連續分配了5塊, 第1塊和第4塊被釋放,此時的內存圖示以下:
此時再一個block分配調用進來, 執行分配, 進入的邏輯是代碼-1
bp = pool->freeblock; // 指針指向空閒block起始位置 // 代碼-1 // 調整 pool->freeblock (假設A節點)指向鏈表下一個, 即bp首字節指向的下一個節點(假設B節點) , 若是此時!= NULL // 表示 A節點可用, 直接返回 if ((pool->freeblock = *(block **)bp) != NULL) { UNLOCK(); return (void *)bp; }
pool中內存空間都用完了, 進入代碼-3
/* Pool is full, unlink from used pools. */ // 滿了, 須要從下一個pool獲取 next = pool->nextpool; pool = pool->prevpool; next->prevpool = pool; pool->nextpool = next; UNLOCK(); return (void *)bp;
arena: 多個pool聚合的結果, 可放置64個pool
#define ARENA_SIZE (256 << 10) /* 256KB */
一個完整的arena = arena_object + pool集合
/* Record keeping for arenas. */ struct arena_object { /* The address of the arena, as returned by malloc. Note that 0 * will never be returned by a successful malloc, and is used * here to mark an arena_object that doesn't correspond to an * allocated arena. */ uintptr_t address; /* Pool-aligned pointer to the next pool to be carved off. */ block* pool_address; /* The number of available pools in the arena: free pools + never- * allocated pools. */ uint nfreepools; /* The total number of pools in the arena, whether or not available. */ uint ntotalpools; /* Singly-linked list of available pools. */ struct pool_header* freepools; /* Whenever this arena_object is not associated with an allocated * arena, the nextarena member is used to link all unassociated * arena_objects in the singly-linked `unused_arena_objects` list. * The prevarena member is unused in this case. * * When this arena_object is associated with an allocated arena * with at least one available pool, both members are used in the * doubly-linked `usable_arenas` list, which is maintained in * increasing order of `nfreepools` values. * * Else this arena_object is associated with an allocated arena * all of whose pools are in use. `nextarena` and `prevarena` * are both meaningless in this case. */ struct arena_object* nextarena; struct arena_object* prevarena; };
arena_object的做用 1. 與其餘arena鏈接, 組成雙向鏈表 2. 維護arena中可用的pool, 單鏈表
uint maxnextoffset; /* largest valid nextoffset */
/* The head of the singly-linked, NULL-terminated list of available * arena_objects. */ // 單鏈表 static struct arena_object* unused_arena_objects = NULL; /* The head of the doubly-linked, NULL-terminated at each end, list of * arena_objects associated with arenas that have pools available. */ // 雙向鏈表 static struct arena_object* usable_arenas = NULL;
* Allocate a new arena. If we run out of memory, return NULL. Else * allocate a new arena, and return the address of an arena_object * describing the new arena. It's expected that the caller will set * `usable_arenas` to the return value. */ static struct arena_object* new_arena(void) { struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ void *address; static int debug_stats = -1; if (debug_stats == -1) { const char *opt = Py_GETENV("PYTHONMALLOCSTATS"); debug_stats = (opt != NULL && *opt != '\0'); } if (debug_stats) _PyObject_DebugMallocStats(stderr); // 判斷是否須要擴充"未使用"的arena_object列表 if (unused_arena_objects == NULL) { uint i; uint numarenas; size_t nbytes; /* Double the number of arena objects on each allocation. * Note that it's possible for `numarenas` to overflow. */ // 肯定須要申請的個數, 首次初始化, 16, 以後每次翻倍 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS; if (numarenas <= maxarenas) return NULL; /* overflow */ #if SIZEOF_SIZE_T <= SIZEOF_INT if (numarenas > SIZE_MAX / sizeof(*arenas)) return NULL; /* overflow */ #endif nbytes = numarenas * sizeof(*arenas); // 申請內存 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes); if (arenaobj == NULL) return NULL; arenas = arenaobj; /* We might need to fix pointers that were copied. However, * new_arena only gets called when all the pages in the * previous arenas are full. Thus, there are *no* pointers * into the old array. Thus, we don't have to worry about * invalid pointers. Just to be sure, some asserts: */ assert(usable_arenas == NULL); assert(unused_arena_objects == NULL); /* Put the new arenas on the unused_arena_objects list. */ for (i = maxarenas; i < numarenas; ++i) { arenas[i].address = 0; /* mark as unassociated */ // 新申請的一概爲0, 標識着這個arena處於"未使用" arenas[i].nextarena = i < numarenas - 1 ? &arenas[i+1] : NULL; } // 將其放入unused_arena_objects鏈表中 // unused_arena_objects 爲新分配內存空間的開頭 /* Update globals. */ unused_arena_objects = &arenas[maxarenas]; maxarenas = numarenas; } /* Take the next available arena object off the head of the list. */ assert(unused_arena_objects != NULL); // 從unused_arena_objects中, 獲取一個未使用的object arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; // 更新鏈表 assert(arenaobj->address == 0); // 申請內存, 256KB, 內存地址賦值給arena的address. 這塊內存可用 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. */ arenaobj->nextarena = unused_arena_objects; unused_arena_objects = arenaobj; return NULL; } arenaobj->address = (uintptr_t)address; ++narenas_currently_allocated; ++ntimes_arena_allocated; if (narenas_currently_allocated > narenas_highwater) narenas_highwater = narenas_currently_allocated; arenaobj->freepools = NULL; /* pool_address <- first pool-aligned address in the arena nfreepools <- number of whole pools that fit after alignment */ arenaobj->pool_address = (block*)arenaobj->address; arenaobj->nfreepools = MAX_POOLS_IN_ARENA; // 將pool的起始地址調整爲系統頁的邊界 // 申請到 256KB, 放棄了一些內存, 而將可以使用的內存邊界pool_address調整到了與系統頁對齊 excess = (uint)(arenaobj->address & POOL_SIZE_MASK); if (excess != 0) { --arenaobj->nfreepools; arenaobj->pool_address += POOL_SIZE - excess; } arenaobj->ntotalpools = arenaobj->nfreepools; return arenaobj; }
從arenas取一個arena進行初始化
new一個全新的arena
static void* pymalloc_alloc(void *ctx, size_t nbytes) { // 剛開始沒有可用的arena if (usable_arenas == NULL) { // new一個, 做爲雙向鏈表的表頭 usable_arenas = new_arena(); if (usable_arenas == NULL) { UNLOCK(); goto redirect; } usable_arenas->nextarena = usable_arenas->prevarena = NULL; } ....... // 從arena中獲取一個pool pool = (poolp)usable_arenas->pool_address; assert((block*)pool <= (block*)usable_arenas->address + ARENA_SIZE - POOL_SIZE); pool->arenaindex = usable_arenas - arenas; assert(&arenas[pool->arenaindex] == usable_arenas); pool->szidx = DUMMY_SIZE_IDX; // 更新 pool_address 向下一個節點 usable_arenas->pool_address += POOL_SIZE; // 可用節點數量-1 --usable_arenas->nfreepools; }
從全新的arena中獲取一個pool
假設arena是舊的, 怎麼分配的pool, 跟pool分配block原理同樣,使用單鏈表記錄freepools
pool = usable_arenas->freepools; if (pool != NULL) {
當arena中一整塊pool被釋放的時候
/* Free a memory block allocated by pymalloc_alloc(). Return 1 if it was freed. Return 0 if the block was not allocated by pymalloc_alloc(). */ static int pymalloc_free(void *ctx, void *p) { struct arena_object* ao; uint nf; /* ao->nfreepools */ /* Link the pool to freepools. This is a singly-linked * list, and pool->prevpool isn't used there. */ ao = &arenas[pool->arenaindex]; pool->nextpool = ao->freepools; ao->freepools = pool; nf = ++ao->nfreepools; }
在pool整塊被釋放的時候, 會將pool加入到arena->freepools
做爲單鏈表的表頭, 而後, 在從非全新arena中分配pool時, 優先從arena->freepools
裏面取, 若是取不到, 再從arena內存塊裏面獲取
注: 上圖中nfreepools = n - 2
當arena1用完了,獲取arena1指向的下一個節點arena2
static void* pymalloc_alloc(void *ctx, size_t nbytes) { // 當發現用完了最後一個pool!!!!!!!!!!! // nfreepools = 0 if (usable_arenas->nfreepools == 0) { assert(usable_arenas->nextarena == NULL || usable_arenas->nextarena->prevarena == usable_arenas); /* Unlink the arena: it is completely allocated. */ // 找到下一個節點! usable_arenas = usable_arenas->nextarena; // 右下一個 if (usable_arenas != NULL) { usable_arenas->prevarena = NULL; // 更新下一個節點的prevarens assert(usable_arenas->address != 0); } // 沒有下一個, 此時 usable_arenas = NULL, 下次進行內存分配的時候, 就會從arenas數組中取一個 } }
注意: 這裏有個邏輯, 就是每分配一個pool, 就檢查是否是用到了最後一個, 若是是, 須要變動usable_arenas
到下一個可用的節點, 若是沒有可用的, 那麼下次進行內存分配的時候, 會斷定從arenas數組中取一個
內存分配和回收最小單位是block, 當一個block被回收的時候, 可能觸發pool被回收, pool被回收, 將會觸發arena的回收機制
關注點:如何尋找到一塊可用的nbytes的blcok內存?
pool = usedpools[size + size]
if pool:
pool 沒滿,取一個blcok返回
pool 滿了,從下一個pool取一個blcok返回
else:
獲取arena, 從裏面初始化一個pool, 拿到第一個blcok返回
進行內存分配和銷燬, 全部操做都是在pool上進行的
問題: pool中全部block的size同樣, 可是在arena中, 每一個pool的size均可能不同, 那麼最終這些pool是怎麼維護的? 怎麼根據大小找到須要的block所在的pool? =>
usedpools
Python內部維護的usedpools數組是一個很是巧妙的實現,維護着全部的處於used狀態的pool,當申請內存時,python就會經過usedpools尋找到一個可用的pool(處於used狀態)
,從中分配一個block。所以咱們想,必定有一個usedpools相關聯的機制,完成從申請的內存的大小到size class index之間的轉換,不然python就沒法找到最合適的pool了。這種機制和usedpools的結構有着密切的關係,咱們看一下它的結構
usedpools數組: 維護着全部處於used狀態的pool, 當申請內存的時候, 會經過usedpools尋找到一塊可用的(處於used狀態的)pool, 從中分配一個block。
//obmalloc.c typedef uint8_t block; #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *))) #define PT(x) PTA(x), PTA(x) //在我當前的機器就是512/8=64個,對應的size class index就是從0到63 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7) #if NB_SMALL_SIZE_CLASSES > 8 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15) #if NB_SMALL_SIZE_CLASSES > 16 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23) #if NB_SMALL_SIZE_CLASSES > 24 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31) #if NB_SMALL_SIZE_CLASSES > 32 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39) #if NB_SMALL_SIZE_CLASSES > 40 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47) #if NB_SMALL_SIZE_CLASSES > 48 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) #if NB_SMALL_SIZE_CLASSES > 56 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) #if NB_SMALL_SIZE_CLASSES > 64 #error "NB_SMALL_SIZE_CLASSES should be less than 64" #endif /* NB_SMALL_SIZE_CLASSES > 64 */ #endif /* NB_SMALL_SIZE_CLASSES > 56 */ #endif /* NB_SMALL_SIZE_CLASSES > 48 */ #endif /* NB_SMALL_SIZE_CLASSES > 40 */ #endif /* NB_SMALL_SIZE_CLASSES > 32 */ #endif /* NB_SMALL_SIZE_CLASSES > 24 */ #endif /* NB_SMALL_SIZE_CLASSES > 16 */ #endif /* NB_SMALL_SIZE_CLASSES > 8 */ };
若是正在申請28字節, python首先會獲取(size class index) size = (uint )(nbytes - 1) >> ALIGNMENT_SHIFT
顯然這裏size=3
, 那麼在usedpools中,尋找第3+3=6個元素,發現usedpools[6]的值是指向usedpools[4]的地址
//obmalloc.c /* Pool for small blocks. */ struct pool_header { union { block *_padding; uint count; } ref; /* 固然pool裏面的block數量 */ block *freeblock; /* 一個鏈表,指向下一個可用的block */ struct pool_header *nextpool; /* 指向下一個pool */ struct pool_header *prevpool; /* 指向上一個pool "" */ uint arenaindex; /* 在area裏面的索引 */ uint szidx; /* block的大小(固定值?後面說) */ uint nextoffset; /* 下一個可用block的內存偏移量 */ uint maxnextoffset; /* 最後一個block距離開始位置的距離 */ };
顯然是從usedpools[6]
(即usedpools+4)
開始向後偏移8個字節(一個ref的大小加上一個freeblock的大小)後的內存,正好是usedpools[6]的地址(即usedpools+6)
,這是python內部的trick
當咱們要申請一個size class爲32字節的pool,想要將其放入這個usedpools中時,要怎麼作呢?從上面的描述咱們知道,只須要進行usedpools[i+i] -> nextpool = pool
便可,其中i爲size class index,對應於32字節,這個i爲3.當下次須要訪問size class 爲32字節(size class index爲3)
的pool時,只須要簡單地訪問usedpools[3+3]就能夠獲得了。python正是使用這個usedpools快速地從衆多的pool中快速地尋找到一個最適合當前內存需求的pool,從中分配一塊block。
//obmalloc.c static int pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes) { block *bp; poolp pool; poolp next; uint size; ... LOCK(); //得到size class index size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT; //直接經過usedpools[size+size],這裏的size不就是咱們上面說的i嗎? pool = usedpools[size + size]; //若是usedpools中有可用的pool if (pool != pool->nextpool) { ... //有可用pool } ... //無可用pool,嘗試獲取empty狀態的pool }
參考: