體系結構相關代碼須要在啓動期間創建如下信息:node
1.系統中各個內存域的頁幀邊界,保存在max_zone_pfn中跨域
2.個結點頁幀的分配狀況,保存在全局變量early_node_map中。數組
從內核版本2.6.10開始提供一個通用的框架,用於將上述信息轉換爲夥伴系統預期的 結點和內存域數據結構。在這之前,各個體系結構必須自行創建相關結構。如今,體系結構相關代碼只須要創建前述的簡單結構,將繁重的工做留給 free_area_init_nodes便可。圖1給出了該過程概述,圖2給出了free_area_init_nodes的代碼流程圖。緩存
圖1:free_area_init_nodes過程概述數據結構
圖2:free_area_init_nodes代碼流程圖app
free_area_init_nodes的源代碼的詳細分析以下:框架
- void __init free_area_init_nodes(unsigned long *max_zone_pfn)
- {
- unsigned long nid;
- enum zone_type i;
-
- /* Sort early_node_map as initialisation assumes it is sorted */
- sort_node_map();//排序使得後續的任務稍微容易些,排序自己並不特別複雜
-
- /* Record where the zone boundaries are */
- memset(arch_zone_lowest_possible_pfn, 0,
- sizeof(arch_zone_lowest_possible_pfn));//全局數組arch_zone_lowest_possible_pfn用來存儲各個內存域可以使用的最低內存頁幀編號
- memset(arch_zone_highest_possible_pfn, 0,
- sizeof(arch_zone_highest_possible_pfn));//全局數組arch_zone_highest_possible_pfn用來存儲各個內存域可以使用的最高內存頁幀編號
- arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();//輔助函數find_min_pfn_with_active_regions用於找到註冊的最低內存域中可用的編號最小的頁幀
-
- arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];//max_zone_pfn記錄了各個內存域包含的最大頁幀號
- for (i = 1; i < MAX_NR_ZONES; i++) {//依次遍歷,肯定各個內存域的邊界
- if (i == ZONE_MOVABLE)//因爲ZONE_MOVABLE是一個虛擬內存域,不與真正的硬件內存域關聯,該內存域的邊界老是設置爲0,如後面的代碼所示
- continue;
- arch_zone_lowest_possible_pfn[i] =
- arch_zone_highest_possible_pfn[i-1];//第n個內存域的最小頁幀,即前一個(第n-1個)內存域的最大頁幀
- arch_zone_highest_possible_pfn[i] =
- max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);//不出意外,當前內存域的最大頁幀由max_zone_pfn給出
- }
- arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
- arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
-
- /* Find the PFNs that ZONE_MOVABLE begins at in each node */
- memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
- find_zone_movable_pfns_for_nodes(zone_movable_pfn);//用於計算進入ZONE_MOVABLE的內存數量,詳細分析見下文
-
- /* Print out the zone ranges */
- printk("Zone PFN ranges:\n");
- for (i = 0; i < MAX_NR_ZONES; i++) {//將各個內存域的最大、最小頁幀號顯示出來
- if (i == ZONE_MOVABLE)
- continue;
- printk(" %-8s %8lu -> %8lu\n",
- zone_names[i],
- arch_zone_lowest_possible_pfn[i],
- arch_zone_highest_possible_pfn[i]);
- }
-
- /* Print out the PFNs ZONE_MOVABLE begins at in each node */
- printk("Movable zone start PFN for each node\n");
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (zone_movable_pfn[i])//對每一個結點來講,zone_movable_pfn[node_id]表示ZONE_MOVABLE在movable_zone內存域中所取得內存的起始地址。內核確保這些頁將用於知足符合ZONE_MOVABLE職責的內存分配
- printk(" Node %d: %lu\n", i, zone_movable_pfn[i]);
- }
-
- /* Print out the early_node_map[] */
- printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
- for (i = 0; i < nr_nodemap_entries; i++)//顯示各個內存域的分配狀況
- printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid,
- early_node_map[i].start_pfn,
- early_node_map[i].end_pfn);
-
- /* Initialise every node */
- setup_nr_node_ids();
- for_each_online_node(nid) {//代碼遍歷全部的活動結點,並分別對各個結點調用free_area_init_node創建數據結構,該函數須要結點第一個可用的頁幀做爲一個參數,而find_min_pfn_for_node則從early_node_map數組提取該信息
- pg_data_t *pgdat = NODE_DATA(nid);
- free_area_init_node(nid, pgdat, NULL,
- find_min_pfn_for_node(nid), NULL);
-
- /* Any memory on that node */
- if (pgdat->node_present_pages)// 根據node_present_pages字段判斷結點具備內存,則在結點位圖中設置N_HIGH_MEMORY標誌,該標誌只表示結點上存在普通或高端 內存,所以check_for_regular_memory進一步檢查低於ZONE_HIGHMEM的內存域中是否有內存,並據此在結點位圖中相應地設 置N_NORMAL_MEMORY
- node_set_state(nid, N_HIGH_MEMORY);
- check_for_regular_memory(pgdat);
- }
- }
free_area_init_node源代碼詳細分析:函數
- void __meminit free_area_init_node(int nid, struct pglist_data *pgdat,
- unsigned long *zones_size, unsigned long node_start_pfn,
- unsigned long *zholes_size)
- {
- pgdat->node_id = nid;
- pgdat->node_start_pfn = node_start_pfn;
- calculate_node_totalpages(pgdat, zones_size, zholes_size);//首先累計各個內存域的頁數,計算結點中頁的總數。對連續內存模型而言,這能夠經過zone_sizes_init完成,但calculate_node_totalpages還考慮了內存空洞
-
- alloc_node_mem_map(pgdat);//分配了該節點的頁面描述符數組[pgdat->node_mem_map數組的內存分配]
-
- free_area_init_core(pgdat, zones_size, zholes_size);//對該節點的每一個區[DMA,NORMAL,HIGH]的的結構進行初始化
- }
calculate_node_totalpages源代碼詳細分析:
- static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
- unsigned long *zones_size, unsigned long *zholes_size)
- {
- unsigned long realtotalpages, totalpages = 0;
- enum zone_type i;
-
- for (i = 0; i < MAX_NR_ZONES; i++)
- totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
- zones_size);//累計計算各個內存域包含空洞的內存總頁數
- pgdat->node_spanned_pages = totalpages;
-
- realtotalpages = totalpages;
- for (i = 0; i < MAX_NR_ZONES; i++)
- realtotalpages -=
- zone_absent_pages_in_node(pgdat->node_id, i,
- zholes_size)//;以包含空洞的內存總頁數累計減去各個內存域中空洞的數量,就能夠得出實際可用的內存頁數
- pgdat->node_present_pages = realtotalpages;
- printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
- realtotalpages);
- }
alloc_node_mem_map源代碼詳細分析:
- static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
- {
- /* Skip empty nodes */
- if (!pgdat->node_spanned_pages)//若是內存結點沒有沒存頁,直接返回
- return;
-
- #ifdef CONFIG_FLAT_NODE_MEM_MAP
- /* ia64 gets its own node_mem_map, before this, without bootmem */
- if (!pgdat->node_mem_map) {//若是尚未爲結點分配mem_map,則須要爲結點分配mem_map
- unsigned long size, start, end;
- struct page *map;
-
- /*
- * The zone's endpoints aren't required to be MAX_ORDER
- * aligned but the node_mem_map endpoints must be in order
- * for the buddy allocator to function correctly.
- */
- start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);//肯定起點,以MAX_ORDER_NR_PAGES的大小對齊
- end = pgdat->node_start_pfn + pgdat->node_spanned_pages;//計算結束點
- end = ALIGN(end, MAX_ORDER_NR_PAGES);//以MAX_ORDER_NR_PAGES對齊,與上面的功能一致,將內存映射對齊到夥伴系統的最大分配階
- size = (end - start) * sizeof(struct page);//計算所需內存的大小
- map = alloc_remap(pgdat->node_id, size);//爲內存映射分配內存
- if (!map)//若是分配不成功,則使用普通的自舉內存分配器進行分配
- map = alloc_bootmem_node(pgdat, size);
- pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
- }
- #ifndef CONFIG_NEED_MULTIPLE_NODES
- /*
- * With no DISCONTIG, the global mem_map is just set as node 0's
- */
- if (pgdat == NODE_DATA(0)) {
- mem_map = NODE_DATA(0)->node_mem_map;
- #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
- if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
- mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
- #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
- }
- #endif
- #endif /* CONFIG_FLAT_NODE_MEM_MAP */
- }
free_area_init_core源代碼詳細分析:ui
- static void __meminit free_area_init_core(struct pglist_data *pgdat,
- unsigned long *zones_size, unsigned long *zholes_size)
- {
- enum zone_type j;
- int nid = pgdat->node_id;
- unsigned long zone_start_pfn = pgdat->node_start_pfn;
- int ret;
-
- pgdat_resize_init(pgdat);
- pgdat->nr_zones = 0;
- init_waitqueue_head(&pgdat->kswapd_wait);
- pgdat->kswapd_max_order = 0;
-
- for (j = 0; j < MAX_NR_ZONES; j++) {
- struct zone *zone = pgdat->node_zones + j;
- unsigned long size, realsize, memmap_pages;
-
- size = zone_spanned_pages_in_node(nid, j, zones_size);//內存域跨域的頁數
- realsize = size - zone_absent_pages_in_node(nid, j,
- zholes_size);//內存域的可用長度,可經過跨域的頁數減去空洞覆蓋的頁數而獲得
-
- /*
- * Adjust realsize so that it accounts for how much memory
- * is used by this zone for memmap. This affects the watermark
- * and per-cpu initialisations
- */
- memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;//用於內存映射須要的頁數
- if (realsize >= memmap_pages) {若是內存域的可用長度大於用於內存映射須要的頁數
- realsize -= memmap_pages;//則將須要映射的頁數分配出去
- printk(KERN_DEBUG
- " %s zone: %lu pages used for memmap\n",
- zone_names[j], memmap_pages);
- } else//不然,顯示警告信息,可用內存不足
- printk(KERN_WARNING
- " %s zone: %lu pages exceeds realsize %lu\n",
- zone_names[j], memmap_pages, realsize);
-
- /* Account for reserved pages */
- if (j == 0 && realsize > dma_reserve) {
- realsize -= dma_reserve;
- printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
- zone_names[0], dma_reserve);
- }//除去用於保留的內存頁
-
- if (!is_highmem_idx(j))
- nr_kernel_pages += realsize;//nr_kernel_pages表示不包含高端內存的系統內存共有的內存頁面數,用於統計全部一致映射的頁
- nr_all_pages += realsize;
-
- zone->spanned_pages = size;//跨域的內存頁
- zone->present_pages = realsize;//通過一系列初始化以後,還可以使用的內存頁
- #ifdef CONFIG_NUMA
- zone->node = nid;
- zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)/ 100;//這句話不理解,請指教
- zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;//這句話不理解,請指教
- #endif
- zone->name = zone_names[j];
- spin_lock_init(&zone->lock);//關於鎖機制,本身尚未學到,後面會詳細介紹鎖機制
- spin_lock_init(&zone->lru_lock);
- zone_seqlock_init(zone);
- zone->zone_pgdat = pgdat;
-
- zone->prev_priority = DEF_PRIORITY;
-
- zone_pcp_init(zone);//初始化該內存域的per_cpu緩存
- INIT_LIST_HEAD(&zone->active_list);
- INIT_LIST_HEAD(&zone->inactive_list);
- zone->nr_scan_active = 0;
- zone->nr_scan_inactive = 0;
- zap_zone_vm_stats(zone);
- zone->flags = 0;
- if (!size)
- continue;
-
- set_pageblock_order(pageblock_default_order());
- setup_usemap(pgdat, zone, size);
- ret = init_currently_empty_zone(zone, zone_start_pfn,
- size, MEMMAP_EARLY);//init_currently_empty_zone用於初始化free_area列表,並將屬於該內存域的全部page實例都設置爲初始默認值
- BUG_ON(ret);
- zone_start_pfn += size;
- }
- }
check_for_regular_memory源代碼詳細分析:
- static void check_for_regular_memory(pg_data_t *pgdat)
- {
- #ifdef CONFIG_HIGHMEM
- enum zone_type zone_type;
-
- for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {//進一步檢查低於ZONE_HIGHMEM的內存域中是否有內存
-
- struct zone *zone = &pgdat->node_zones[zone_type];
- if (zone->present_pages)
- node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);//並根據上面的檢查在結點位圖中相應地設置N_NORMAL_MEMORY
-
- }
#endif this
- }