#include <iostream> #include <vector> #include <list> #include <boost/pool/object_pool.hpp> #include <boost/pool/pool_alloc.hpp> #include <boost/timer/timer.hpp> using namespace std; using namespace boost; const int MAXLENGTH = 100000; class A { public: A() { cout << "Construct: " << endl; } A ( int a ) { cout << "Construct: " << a << endl; } ~A() { cout << "Destruct" << endl; } }; function<void ( void ) > pool_sample = []() { cout << "==============================\n"; boost::object_pool<A> p; A *ptr = p.construct ( 1 ); p.destroy ( ptr ); }; function<void ( void ) > pool_sample_1 = []() { cout << "==============================\n"; boost::object_pool<A> p; A *ptr = p.malloc(); cout << "malloc doesn't invoke constructor and destructor.\n"; ptr = new ( ptr ) A ( 1 ); ptr->~A(); p.free ( ptr ); }; auto test_pool_alloc = []() { cout << "==============================\n"; vector<int, pool_allocator<int>> vec1; vector<int> vec2; { cout << "USE pool_allocator:\n"; boost::timer::auto_cpu_timer t1; for ( int i = 0; i < MAXLENGTH; ++i ) { vec1.push_back ( i ); vec1.pop_back(); } } { cout << "USE STL allocator:\n"; boost::timer::auto_cpu_timer t2; for ( int i = 0; i < MAXLENGTH; ++i ) { vec2.push_back ( i ); vec2.pop_back(); } } }; auto test_fast_pool_alloc = []() { cout << "==============================\n"; list<int, fast_pool_allocator<int>> vec1; list<int> vec2; { cout << "USE fast_pool_allocator:\n"; boost::timer::auto_cpu_timer t1; for ( int i = 0; i < MAXLENGTH; ++i ) { vec1.push_back ( i ); vec1.pop_back(); } } { cout << "USE STL allocator:\n"; boost::timer::auto_cpu_timer t2; for ( int i = 0; i < MAXLENGTH; ++i ) { vec2.push_back ( i ); vec2.pop_back(); } } }; int main() { pool_sample(); pool_sample_1(); test_pool_alloc(); test_fast_pool_alloc(); system ( "pause" ); }
pool去按照必定的增加規則,從操做系統申請一大塊內存,稱爲block,源碼中用PODptr表示。
這個PODptr結構將block分爲三塊,第一塊是大塊數據區,第二塊只有sizeof(void*) 個字節,即指針大小,保存下一個PODptr的指針,第三塊保存下一PODptr的長度。最後一個PODptr指針爲空。node
PODptr的數據區被simple_segregated_storage格式化爲許多個小塊,稱爲chunk。一個chunk的大小是定義boost::object_pool
pool::free(ptr)操做就是找到ptr屬於哪一個PODptr,而後把ptr添加到單向鏈表頭。數據結構
pool::ordered_free(ptr)找到ptr屬於哪一個PODptr,而後經過插入排序把ptr添加到單向鏈表。less
/* 該函數是simple_segregated_storage的成員函數。第一次看到一下懵逼了,不知其何用意。難道不就是獲得 *ptr 的功能嗎?! 事實是,對於一個void*是不能dereference的。由於*ptr你將獲得一個void類型,C++不容許void類型。 */ static void * & nextof(void * const ptr) { return *(static_cast<void **>(ptr)); }
//segregate會把給的一個sz大小的內存塊,拆分爲每一個partition_sz大小的多個chunk單元, //每一個chunk的前4字節指向下一個chunk(做爲鏈表的next),而最後一個chunk頭指向end。 template <typename SizeType> void * simple_segregated_storage<SizeType>::segregate( void * const block, const size_type sz, const size_type partition_sz, void * const end) { //找到最後一個chunk char * old = static_cast<char *>(block) + ((sz - partition_sz) / partition_sz) * partition_sz; nextof(old) = end;//把最後一個chunk指向end if (old == block) return block;//若是這塊內存只有一個chunk就返回 //格式化其餘的chunk,使每一個chunk的前4字節指向下一個chunk for (char * iter = old - partition_sz; iter != block; old = iter, iter -= partition_sz) nextof(iter) = old; nextof(block) = old; return block; } //添加一個block時,會把這該塊分解成chunk,添加到鏈表的頭部。由於無序,因此複雜度O(1) void add_block(void * const block, const size_type nsz, const size_type npartition_sz) { first = segregate(block, nsz, npartition_sz, first); } //經過find_prev找到這個內存塊對應的位置,而後添加進去。複雜度O(n) void add_ordered_block(void * const block, const size_type nsz, const size_type npartition_sz) { void * const loc = find_prev(block); if (loc == 0) add_block(block, nsz, npartition_sz); else nextof(loc) = segregate(block, nsz, npartition_sz, nextof(loc)); } //這個沒什麼好說的,經過比較地址,找到ptr在當前block中的位置,相似插入排序。 template <typename SizeType> void * simple_segregated_storage<SizeType>::find_prev(void * const ptr) { if (first == 0 || std::greater<void *>()(first, ptr)) return 0; void * iter = first; while (true) { if (nextof(iter) == 0 || std::greater<void *>()(nextof(iter), ptr)) return iter; iter = nextof(iter); } } //simple_segregated_storage成員變量。 鏈表頭指針。 void * first;
下段代碼從simple_segregated_storage鏈表中獲取內存:ide
template <typename SizeType> void * simple_segregated_storage<SizeType>::malloc_n(const size_type n, const size_type partition_size) { if(n == 0) return 0; void * start = &first; void * iter; do { if (nextof(start) == 0) return 0; //try_malloc_n會從start開始(不算start)向後申請n個partition_size大小的chunk,返回最後一個chunk的指針 iter = try_malloc_n(start, n, partition_size); } while (iter == 0); //此處返回內存chunk頭 void * const ret = nextof(start); //此處是經典的單向鏈表移除其中一個節點的操做。把該內存的前面chunk頭指向該內存尾部chunk頭指向的內存。即把該部分排除出鏈表。 nextof(start) = nextof(iter); return ret; } //start會指向知足條件(連續的n個partition_size大小的chunk內存)的chunk頭部,返回最後一個chunk指針。 template <typename SizeType> void * simple_segregated_storage<SizeType>::try_malloc_n( void * & start, size_type n, const size_type partition_size) { void * iter = nextof(start); //start後面的塊是不是連續的n塊partition_size大小的內存 while (--n != 0) { void * next = nextof(iter); //若是next != static_cast<char *>(iter) + partition_size,說明下一塊chunk被佔用或是到了大塊內存(block)的尾部。 if (next != static_cast<char *>(iter) + partition_size) { // next == 0 (end-of-list) or non-contiguous chunk found start = iter; return 0; } iter = next; } return iter; }
如上圖,類PODptr指示了一個block結構,這個block大小不必定相同,但都由 chunk data+ next ptr + next block size三部分組成。函數
void * pool<UserAllocator>::malloc_need_resize()
.//pool 從simple_segregated_storage派生 template <typename UserAllocator> class pool: protected simple_segregated_storage < typename UserAllocator::size_type >; //返回父類指針以便調用父類函數,其實就是類型轉換 simple_segregated_storage<size_type> & store() { //! \returns pointer to store. return *this; }
在調用pool::malloc只申請一個chunk時,若是有足夠空間,使用父類指針調用malloc返回內存,不然就從新申請一個大block。代碼簡單,就不貼了。post
下面代碼是申請n個連續的chunk。若是沒有連續的n個內存就須要從新分配內存了。分配好的內存,經過add_ordered_block添加到chunks的有序鏈表,並經過地址大小把剛申請的block放到PODptr鏈表的排序位置。this
template <typename UserAllocator> void * pool<UserAllocator>::ordered_malloc(const size_type n) { //! Gets address of a chunk n, allocating new memory if not already available. //! \returns Address of chunk n if allocated ok. //! \returns 0 if not enough memory for n chunks. const size_type partition_size = alloc_size(); const size_type total_req_size = n * requested_size; const size_type num_chunks = total_req_size / partition_size + ((total_req_size % partition_size) ? true : false); void * ret = store().malloc_n(num_chunks, partition_size); #ifdef BOOST_POOL_INSTRUMENT std::cout << "Allocating " << n << " chunks from pool of size " << partition_size << std::endl; #endif if ((ret != 0) || (n == 0)) return ret; #ifdef BOOST_POOL_INSTRUMENT std::cout << "Cache miss, allocating another chunk...\n"; #endif // Not enough memory in our storages; make a new storage, BOOST_USING_STD_MAX(); //計算下次申請內存的大小,基本就是乘以2.integer::static_lcm是求最小公倍數。 next_size = max BOOST_PREVENT_MACRO_SUBSTITUTION(next_size, num_chunks); size_type POD_size = static_cast<size_type>(next_size * partition_size + integer::static_lcm<sizeof(size_type), sizeof(void *)>::value + sizeof(size_type)); char * ptr = (UserAllocator::malloc)(POD_size); if (ptr == 0) { if(num_chunks < next_size) { // Try again with just enough memory to do the job, or at least whatever we // allocated last time: next_size >>= 1; next_size = max BOOST_PREVENT_MACRO_SUBSTITUTION(next_size, num_chunks); POD_size = static_cast<size_type>(next_size * partition_size + integer::static_lcm<sizeof(size_type), sizeof(void *)>::value + sizeof(size_type)); ptr = (UserAllocator::malloc)(POD_size); } if(ptr == 0) return 0; } const details::PODptr<size_type> node(ptr, POD_size); // Split up block so we can use what wasn't requested. if (next_size > num_chunks) store().add_ordered_block(node.begin() + num_chunks * partition_size, node.element_size() - num_chunks * partition_size, partition_size); BOOST_USING_STD_MIN(); if(!max_size) next_size <<= 1; else if( next_size*partition_size/requested_size < max_size) next_size = min BOOST_PREVENT_MACRO_SUBSTITUTION(next_size << 1, max_size*requested_size/ partition_size); // insert it into the list, // handle border case. //對大塊block進行排序 if (!list.valid() || std::greater<void *>()(list.begin(), node.begin())) { node.next(list); list = node; } else { details::PODptr<size_type> prev = list; while (true) { // if we're about to hit the end, or if we've found where "node" goes. if (prev.next_ptr() == 0 || std::greater<void *>()(prev.next_ptr(), node.begin())) break; prev = prev.next(); } node.next(prev.next()); prev.next(node); } // and return it. return node.begin(); }
下面代碼是釋放未被佔用的塊。(一個block任何一個chunk被佔用就不會釋放)spa
template <typename UserAllocator> bool pool<UserAllocator>::release_memory() { //! pool must be ordered. Frees every memory block that doesn't have any allocated chunks. //! \returns true if at least one memory block was freed. // ret is the return value: it will be set to true when we actually call // UserAllocator::free(..) bool ret = false; // This is a current & previous iterator pair over the memory block list details::PODptr<size_type> ptr = list; details::PODptr<size_type> prev; // This is a current & previous iterator pair over the free memory chunk list // Note that "prev_free" in this case does NOT point to the previous memory // chunk in the free list, but rather the last free memory chunk before the // current block. void * free_p = this->first; void * prev_free_p = 0; const size_type partition_size = alloc_size(); // Search through all the all the allocated memory blocks while (ptr.valid()) { // At this point: // ptr points to a valid memory block // free_p points to either: // 0 if there are no more free chunks // the first free chunk in this or some next memory block // prev_free_p points to either: // the last free chunk in some previous memory block // 0 if there is no such free chunk // prev is either: // the PODptr whose next() is ptr // !valid() if there is no such PODptr // If there are no more free memory chunks, then every remaining // block is allocated out to its fullest capacity, and we can't // release any more memory if (free_p == 0) break; // We have to check all the chunks. If they are *all* free (i.e., present // in the free list), then we can free the block. bool all_chunks_free = true; // Iterate 'i' through all chunks in the memory block // if free starts in the memory block, be careful to keep it there void * saved_free = free_p; for (char * i = ptr.begin(); i != ptr.end(); i += partition_size) { // If this chunk is not free if (i != free_p) { // We won't be able to free this block all_chunks_free = false; // free_p might have travelled outside ptr free_p = saved_free; // Abort searching the chunks; we won't be able to free this // block because a chunk is not free. break; } // We do not increment prev_free_p because we are in the same block free_p = nextof(free_p); } // post: if the memory block has any chunks, free_p points to one of them // otherwise, our assertions above are still valid const details::PODptr<size_type> next = ptr.next(); if (!all_chunks_free) { if (is_from(free_p, ptr.begin(), ptr.element_size())) { std::less<void *> lt; void * const end = ptr.end(); do { prev_free_p = free_p; free_p = nextof(free_p); } while (free_p && lt(free_p, end)); } // This invariant is now restored: // free_p points to the first free chunk in some next memory block, or // 0 if there is no such chunk. // prev_free_p points to the last free chunk in this memory block. // We are just about to advance ptr. Maintain the invariant: // prev is the PODptr whose next() is ptr, or !valid() // if there is no such PODptr prev = ptr; } else { // All chunks from this block are free // Remove block from list if (prev.valid()) prev.next(next); else list = next; // Remove all entries in the free list from this block //關鍵點在這裏,釋放了一個block以後,會把上一個chunk頭修改。 if (prev_free_p != 0) nextof(prev_free_p) = free_p; else this->first = free_p; // And release memory (UserAllocator::free)(ptr.begin()); ret = true; } // Increment ptr ptr = next; } next_size = start_size; return ret; }
pool的實現基本就是利用simple_segregated_storage內部實現的維護chunk的鏈表來實現內存管理的。simple_segregated_storage能夠說是pool的核心。pool內部一共維護了兩個鏈表:操作系統
details::PODptr<size_type> list;
用來維護一個大塊內存block的鏈表。能夠知道,一個block內部是連續的,但block之間能夠認爲是不連續的內存。這個鏈表至關於一個內存地址索引,主要是爲了提升查找效率:對於有序排列的內存池,歸還內存時,用來快速判斷是屬於哪一個塊的。若是沒有這個鏈表,就須要挨個chunk去判斷地址大小。class object_pool: protected pool<UserAllocator>;
object_pool繼承自pool,但和pool的區別是,pool用於申請固定大小的內存,而object_pool用於申請固定類型的內存,並會調用構造函數和析構函數。主要的函數就兩個:
調用構造函數,用到了一個placement new的方式,老生常談。
惟一須要注意的是construct和destroy調用的malloc和free,都是調用的 ordered_malloc
和 ordered_free
。
elem``ent_type * construct(Arg1&, ... ArgN&){...} element_type * construct() { element_type * const ret = (malloc)(); if (ret == 0) return ret; try { new (ret) element_type(); } catch (...) { (free)(ret); throw; } return ret; } element_type * malloc BOOST_PREVENT_MACRO_SUBSTITUTION() { return static_cast<element_type *>(store().ordered_malloc()); }
destroy顯式調用析構函數去析構,而後把內存還給鏈表維護。
void destroy(element_type * const chunk) { chunk->~T(); (free)(chunk); } void free BOOST_PREVENT_MACRO_SUBSTITUTION(element_type * const chunk) { store().ordered_free(chunk); }
單例內存池的實現,值得注意的有以下幾點:
#if !defined(BOOST_HAS_THREADS) || defined(BOOST_NO_MT) || defined(BOOST_POOL_NO_MT) typedef null_mutex default_mutex;
static object_creator create_object;
class singleton_pool { public: ... private: typedef boost::aligned_storage<sizeof(pool_type), boost::alignment_of<pool_type>::value> storage_type; static storage_type storage; static pool_type& get_pool() { static bool f = false; if(!f) { // This code *must* be called before main() starts, // and when only one thread is executing. f = true; new (&storage) pool_type; } // The following line does nothing else than force the instantiation // of singleton<T>::create_object, whose constructor is // called before main() begins. create_object.do_nothing(); return *static_cast<pool_type*>(static_cast<void*>(&storage)); } struct object_creator { object_creator() { // This constructor does nothing more than ensure that instance() // is called before main() begins, thus creating the static // T object before multithreading race issues can come up. singleton_pool<Tag, RequestedSize, UserAllocator, Mutex, NextSize, MaxSize>::get_pool(); } inline void do_nothing() const { } }; static object_creator create_object; };
boost::pool 的源代碼一共就幾個文件,簡潔明瞭,讀起來也不很難。因爲代碼時間遠早於現代C++(C++11以後)成型,兼容編譯器的代碼建議忽略。由於重要的是其設計思想:如何經過自構兩個鏈表來提高內存管理效率的。
數據結構很簡單。適用場景比較狹窄,跟GC無法比。