題外語:本人對linux內核的瞭解尚淺,若是有差池歡迎指正,也歡迎提問交流!css
首先要理解一下每個進程是如何維護本身獨立的尋址空間的,個人電腦裏呢是8G內存空間。瞭解過的朋友應該都知道這是虛擬內存技術解決的這個問題,然而再linux中具體是怎樣的模型解決的操做系統的這個設計需求的呢,讓咱們從linux源碼的片斷開始看吧!(如下內核源碼均來自fedora21 64位系統的fc-3.19.3版本內核)node
<include/linux/mm_type.h>中對於物理頁面的定義struct page,也就是咱們常說的頁表,關於這裏的結構體的每一個變量/位的操做函數大部分在<include/linux/mm.h>中。linux
1 struct page { 2 /* First double word block */ 3 unsigned long flags; /* Atomic flags, some possibly 4 * updated asynchronously */ 5 union { 6 struct address_space *mapping; /* If low bit clear, points to 7 * inode address_space, or NULL. 8 * If page mapped as anonymous 9 * memory, low bit is set, and 10 * it points to anon_vma object: 11 * see PAGE_MAPPING_ANON below. 12 */ 13 void *s_mem; /* slab first object */ 14 }; 15 16 /* Second double word */ 17 struct { 18 union { 19 pgoff_t index; /* Our offset within mapping. */ 20 void *freelist; /* sl[aou]b first free object */ 21 bool pfmemalloc; /* If set by the page allocator, 22 * ALLOC_NO_WATERMARKS was set 23 * and the low watermark was not 24 * met implying that the system 25 * is under some pressure. The 26 * caller should try ensure 27 * this page is only used to 28 * free other pages. 29 */ 30 }; 31 32 union { 33 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ 34 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 35 /* Used for cmpxchg_double in slub */ 36 unsigned long counters; 37 #else 38 /* 39 * Keep _count separate from slub cmpxchg_double data. 40 * As the rest of the double word is protected by 41 * slab_lock but _count is not. 42 */ 43 unsigned counters; 44 #endif 45 46 struct { 47 48 union { 49 /* 50 * Count of ptes mapped in 51 * mms, to show when page is 52 * mapped & limit reverse map 53 * searches. 54 * 55 * Used also for tail pages 56 * refcounting instead of 57 * _count. Tail pages cannot 58 * be mapped and keeping the 59 * tail page _count zero at 60 * all times guarantees 61 * get_page_unless_zero() will 62 * never succeed on tail 63 * pages. 64 */ 65 atomic_t _mapcount; 66 67 struct { /* SLUB */ 68 unsigned inuse:16; 69 unsigned objects:15; 70 unsigned frozen:1; 71 }; 72 int units; /* SLOB */ 73 }; 74 atomic_t _count; /* Usage count, see below. */ 75 }; 76 unsigned int active; /* SLAB */ 77 }; 78 }; 79 80 /* Third double word block */ 81 union { 82 struct list_head lru; /* Pageout list, eg. active_list 83 * protected by zone->lru_lock ! 84 * Can be used as a generic list 85 * by the page owner. 86 */ 87 struct { /* slub per cpu partial pages */ 88 struct page *next; /* Next partial slab */ 89 #ifdef CONFIG_64BIT 90 int pages; /* Nr of partial slabs left */ 91 int pobjects; /* Approximate # of objects */ 92 #else 93 short int pages; 94 short int pobjects; 95 #endif 96 }; 97 98 struct slab *slab_page; /* slab fields */ 99 struct rcu_head rcu_head; /* Used by SLAB 100 * when destroying via RCU 101 */ 102 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS 103 pgtable_t pmd_huge_pte; /* protected by page->ptl */ 104 #endif 105 }; 106 107 /* Remainder is not double word aligned */ 108 union { 109 unsigned long private; /* Mapping-private opaque data: 110 * usually used for buffer_heads 111 * if PagePrivate set; used for 112 * swp_entry_t if PageSwapCache; 113 * indicates order in the buddy 114 * system if PG_buddy is set. 115 */ 116 #if USE_SPLIT_PTE_PTLOCKS 117 #if ALLOC_SPLIT_PTLOCKS 118 spinlock_t *ptl; 119 #else 120 spinlock_t ptl; 121 #endif 122 #endif 123 struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ 124 struct page *first_page; /* Compound tail pages */ 125 }; 126 127 #ifdef CONFIG_MEMCG 128 struct mem_cgroup *mem_cgroup; 129 #endif 130 131 /* 132 * On machines where all RAM is mapped into kernel address space, 133 * we can simply calculate the virtual address. On machines with 134 * highmem some memory is mapped into kernel virtual memory 135 * dynamically, so we need a place to store that address. 136 * Note that this field could be 16 bits on x86 ... ;) 137 * 138 * Architectures with slow multiplication can define 139 * WANT_PAGE_VIRTUAL in asm/page.h 140 */ 141 #if defined(WANT_PAGE_VIRTUAL) 142 void *virtual; /* Kernel virtual address (NULL if 143 not kmapped, ie. highmem) */ 144 #endif /* WANT_PAGE_VIRTUAL */ 145 146 #ifdef CONFIG_KMEMCHECK 147 /* 148 * kmemcheck wants to track the status of each byte in a page; this 149 * is a pointer to such a status block. NULL if not tracked. 150 */ 151 void *shadow; 152 #endif 153 154 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS 155 int _last_cpupid; 156 #endif 157 }
在整個struct page的定義裏面的註釋對每一個位都做了詳盡的解釋,但我仍是以爲有幾個重要的定義要重複一下:算法
(1)void*virtual:頁的虛擬地址(因爲在64位系統之中C語言裏的void*指針的長度最長爲64bit,尋址空間是2^64大遠遠超出了當前主流微機的硬件內存RAM的大小(8GB,16GB左右)這也就給虛擬空間尋址,交換技術提供了可能性)對virtual中的虛擬地址進行映射須要經過四級頁表來進行。windows
(2)pgoff_t index:這個變量和freelist被定義在同一個union中,index變量被內存管理子系統中的多個模塊使用,好比高速緩存。數組
(3)unsigned long flags:flag變量不多有設成long的可見裏面的信息量比較大,這裏是用來存放頁的狀態,好比鎖/未鎖,換出(虛擬內存用),激活等等。緩存
再繼續說內存管理機制以前,有一點很是重要,就是linux中關於進程和內存之間的對應關係。session
linux中的每個進程維護一個PCB,而這個PCB就是/include/linux/sched.h中定義的task_struct,在這個結構體的定義之中有定義變量:app
struct mm_struct *mm, *active_mm;less
這也就是進程和內存管理的橋樑之一,也是因而可知進程和內存塊/頁之間的關係是一對多的(考慮進程共享的內存的話是多對多),進程在裝入內存的時候,操做系統的工做的實質是將task_struct中的相關的內存數據映射到部分映射到物理內存之中,而對於並無映射的頁就採起交換技術來解決。和windows系統中的程序裝入過程相比較,windows中的程序裝入過程都是靠loader完成的,loader的工做就是針對PE格式的可執行文件經過二進制的分析(好比IDT,IAT等等)進行裝入,不少狀況下一個進程都會被裝入到同一個虛擬地址之中0x40000000(90%都是裝入這裏)。而linux之中,咱們的進程是根據調度算法來安排其在虛擬地址之中的分佈狀況,buddy算法能夠將進程的使用的頁儘量整齊地裝入(其實這裏我有些不是很清楚的地方,linux若是這麼動態分配內存那麼該如何處理一些動態加載的庫的問題,像windows中的dll文件都是經過計算偏移來重定位,而linux會怎麼作呢?)進程在已經裝入物理內存的頁的基礎之上開始執行指令,跳轉到並未被裝入物理內存的頁的虛擬地址的時候,會觸發一個缺頁中斷,缺頁中斷觸發頁的交換的過程,從而幫助程序繼續執行,這也就是虛擬內存的過程。
1 struct task_struct { 2 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 3 void *stack; 4 atomic_t usage; 5 unsigned int flags; /* per process flags, defined below */ 6 unsigned int ptrace; 7 8 #ifdef CONFIG_SMP 9 struct llist_node wake_entry; 10 int on_cpu; 11 struct task_struct *last_wakee; 12 unsigned long wakee_flips; 13 unsigned long wakee_flip_decay_ts; 14 15 int wake_cpu; 16 #endif 17 int on_rq; 18 19 int prio, static_prio, normal_prio; 20 unsigned int rt_priority; 21 const struct sched_class *sched_class; 22 struct sched_entity se; 23 struct sched_rt_entity rt; 24 #ifdef CONFIG_CGROUP_SCHED 25 struct task_group *sched_task_group; 26 #endif 27 struct sched_dl_entity dl; 28 29 #ifdef CONFIG_PREEMPT_NOTIFIERS 30 /* list of struct preempt_notifier: */ 31 struct hlist_head preempt_notifiers; 32 #endif 33 34 #ifdef CONFIG_BLK_DEV_IO_TRACE 35 unsigned int btrace_seq; 36 #endif 37 38 unsigned int policy; 39 int nr_cpus_allowed; 40 cpumask_t cpus_allowed; 41 42 #ifdef CONFIG_PREEMPT_RCU 43 int rcu_read_lock_nesting; 44 union rcu_special rcu_read_unlock_special; 45 struct list_head rcu_node_entry; 46 #endif /* #ifdef CONFIG_PREEMPT_RCU */ 47 #ifdef CONFIG_PREEMPT_RCU 48 struct rcu_node *rcu_blocked_node; 49 #endif /* #ifdef CONFIG_PREEMPT_RCU */ 50 #ifdef CONFIG_TASKS_RCU 51 unsigned long rcu_tasks_nvcsw; 52 bool rcu_tasks_holdout; 53 struct list_head rcu_tasks_holdout_list; 54 int rcu_tasks_idle_cpu; 55 #endif /* #ifdef CONFIG_TASKS_RCU */ 56 57 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 58 struct sched_info sched_info; 59 #endif 60 61 struct list_head tasks; 62 #ifdef CONFIG_SMP 63 struct plist_node pushable_tasks; 64 struct rb_node pushable_dl_tasks; 65 #endif 66 67 struct mm_struct *mm, *active_mm; 68 #ifdef CONFIG_COMPAT_BRK 69 unsigned brk_randomized:1; 70 #endif 71 /* per-thread vma caching */ 72 u32 vmacache_seqnum; 73 struct vm_area_struct *vmacache[VMACACHE_SIZE]; 74 #if defined(SPLIT_RSS_COUNTING) 75 struct task_rss_stat rss_stat; 76 #endif 77 /* task state */ 78 int exit_state; 79 int exit_code, exit_signal; 80 int pdeath_signal; /* The signal sent when the parent dies */ 81 unsigned int jobctl; /* JOBCTL_*, siglock protected */ 82 83 /* Used for emulating ABI behavior of previous Linux versions */ 84 unsigned int personality; 85 86 unsigned in_execve:1; /* Tell the LSMs that the process is doing an 87 * execve */ 88 unsigned in_iowait:1; 89 90 /* Revert to default priority/policy when forking */ 91 unsigned sched_reset_on_fork:1; 92 unsigned sched_contributes_to_load:1; 93 94 #ifdef CONFIG_MEMCG_KMEM 95 unsigned memcg_kmem_skip_account:1; 96 #endif 97 98 unsigned long atomic_flags; /* Flags needing atomic access. */ 99 100 pid_t pid; 101 pid_t tgid; 102 103 #ifdef CONFIG_CC_STACKPROTECTOR 104 /* Canary value for the -fstack-protector gcc feature */ 105 unsigned long stack_canary; 106 #endif 107 /* 108 * pointers to (original) parent process, youngest child, younger sibling, 109 * older sibling, respectively. (p->father can be replaced with 110 * p->real_parent->pid) 111 */ 112 struct task_struct __rcu *real_parent; /* real parent process */ 113 struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ 114 /* 115 * children/sibling forms the list of my natural children 116 */ 117 struct list_head children; /* list of my children */ 118 struct list_head sibling; /* linkage in my parent's children list */ 119 struct task_struct *group_leader; /* threadgroup leader */ 120 121 /* 122 * ptraced is the list of tasks this task is using ptrace on. 123 * This includes both natural children and PTRACE_ATTACH targets. 124 * p->ptrace_entry is p's link on the p->parent->ptraced list. 125 */ 126 struct list_head ptraced; 127 struct list_head ptrace_entry; 128 129 /* PID/PID hash table linkage. */ 130 struct pid_link pids[PIDTYPE_MAX]; 131 struct list_head thread_group; 132 struct list_head thread_node; 133 134 struct completion *vfork_done; /* for vfork() */ 135 int __user *set_child_tid; /* CLONE_CHILD_SETTID */ 136 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ 137 138 cputime_t utime, stime, utimescaled, stimescaled; 139 cputime_t gtime; 140 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 141 struct cputime prev_cputime; 142 #endif 143 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 144 seqlock_t vtime_seqlock; 145 unsigned long long vtime_snap; 146 enum { 147 VTIME_SLEEPING = 0, 148 VTIME_USER, 149 VTIME_SYS, 150 } vtime_snap_whence; 151 #endif 152 unsigned long nvcsw, nivcsw; /* context switch counts */ 153 u64 start_time; /* monotonic time in nsec */ 154 u64 real_start_time; /* boot based time in nsec */ 155 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ 156 unsigned long min_flt, maj_flt; 157 158 struct task_cputime cputime_expires; 159 struct list_head cpu_timers[3]; 160 161 /* process credentials */ 162 const struct cred __rcu *real_cred; /* objective and real subjective task 163 * credentials (COW) */ 164 const struct cred __rcu *cred; /* effective (overridable) subjective task 165 * credentials (COW) */ 166 char comm[TASK_COMM_LEN]; /* executable name excluding path 167 - access with [gs]et_task_comm (which lock 168 it with task_lock()) 169 - initialized normally by setup_new_exec */ 170 /* file system info */ 171 int link_count, total_link_count; 172 #ifdef CONFIG_SYSVIPC 173 /* ipc stuff */ 174 struct sysv_sem sysvsem; 175 struct sysv_shm sysvshm; 176 #endif 177 #ifdef CONFIG_DETECT_HUNG_TASK 178 /* hung task detection */ 179 unsigned long last_switch_count; 180 #endif 181 /* CPU-specific state of this task */ 182 struct thread_struct thread; 183 /* filesystem information */ 184 struct fs_struct *fs; 185 /* open file information */ 186 struct files_struct *files; 187 /* namespaces */ 188 struct nsproxy *nsproxy; 189 /* signal handlers */ 190 struct signal_struct *signal; 191 struct sighand_struct *sighand; 192 193 sigset_t blocked, real_blocked; 194 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ 195 struct sigpending pending; 196 197 unsigned long sas_ss_sp; 198 size_t sas_ss_size; 199 int (*notifier)(void *priv); 200 void *notifier_data; 201 sigset_t *notifier_mask; 202 struct callback_head *task_works; 203 204 struct audit_context *audit_context; 205 #ifdef CONFIG_AUDITSYSCALL 206 kuid_t loginuid; 207 unsigned int sessionid; 208 #endif 209 struct seccomp seccomp; 210 211 /* Thread group tracking */ 212 u32 parent_exec_id; 213 u32 self_exec_id; 214 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, 215 * mempolicy */ 216 spinlock_t alloc_lock; 217 218 /* Protection of the PI data structures: */ 219 raw_spinlock_t pi_lock; 220 221 #ifdef CONFIG_RT_MUTEXES 222 /* PI waiters blocked on a rt_mutex held by this task */ 223 struct rb_root pi_waiters; 224 struct rb_node *pi_waiters_leftmost; 225 /* Deadlock detection and priority inheritance handling */ 226 struct rt_mutex_waiter *pi_blocked_on; 227 #endif 228 229 #ifdef CONFIG_DEBUG_MUTEXES 230 /* mutex deadlock detection */ 231 struct mutex_waiter *blocked_on; 232 #endif 233 #ifdef CONFIG_TRACE_IRQFLAGS 234 unsigned int irq_events; 235 unsigned long hardirq_enable_ip; 236 unsigned long hardirq_disable_ip; 237 unsigned int hardirq_enable_event; 238 unsigned int hardirq_disable_event; 239 int hardirqs_enabled; 240 int hardirq_context; 241 unsigned long softirq_disable_ip; 242 unsigned long softirq_enable_ip; 243 unsigned int softirq_disable_event; 244 unsigned int softirq_enable_event; 245 int softirqs_enabled; 246 int softirq_context; 247 #endif 248 #ifdef CONFIG_LOCKDEP 249 # define MAX_LOCK_DEPTH 48UL 250 u64 curr_chain_key; 251 int lockdep_depth; 252 unsigned int lockdep_recursion; 253 struct held_lock held_locks[MAX_LOCK_DEPTH]; 254 gfp_t lockdep_reclaim_gfp; 255 #endif 256 257 /* journalling filesystem info */ 258 void *journal_info; 259 260 /* stacked block device info */ 261 struct bio_list *bio_list; 262 263 #ifdef CONFIG_BLOCK 264 /* stack plugging */ 265 struct blk_plug *plug; 266 #endif 267 268 /* VM state */ 269 struct reclaim_state *reclaim_state; 270 271 struct backing_dev_info *backing_dev_info; 272 273 struct io_context *io_context; 274 275 unsigned long ptrace_message; 276 siginfo_t *last_siginfo; /* For ptrace use. */ 277 struct task_io_accounting ioac; 278 #if defined(CONFIG_TASK_XACCT) 279 u64 acct_rss_mem1; /* accumulated rss usage */ 280 u64 acct_vm_mem1; /* accumulated virtual memory usage */ 281 cputime_t acct_timexpd; /* stime + utime since last update */ 282 #endif 283 #ifdef CONFIG_CPUSETS 284 nodemask_t mems_allowed; /* Protected by alloc_lock */ 285 seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ 286 int cpuset_mem_spread_rotor; 287 int cpuset_slab_spread_rotor; 288 #endif 289 #ifdef CONFIG_CGROUPS 290 /* Control Group info protected by css_set_lock */ 291 struct css_set __rcu *cgroups; 292 /* cg_list protected by css_set_lock and tsk->alloc_lock */ 293 struct list_head cg_list; 294 #endif 295 #ifdef CONFIG_FUTEX 296 struct robust_list_head __user *robust_list; 297 #ifdef CONFIG_COMPAT 298 struct compat_robust_list_head __user *compat_robust_list; 299 #endif 300 struct list_head pi_state_list; 301 struct futex_pi_state *pi_state_cache; 302 #endif 303 #ifdef CONFIG_PERF_EVENTS 304 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; 305 struct mutex perf_event_mutex; 306 struct list_head perf_event_list; 307 #endif 308 #ifdef CONFIG_DEBUG_PREEMPT 309 unsigned long preempt_disable_ip; 310 #endif 311 #ifdef CONFIG_NUMA 312 struct mempolicy *mempolicy; /* Protected by alloc_lock */ 313 short il_next; 314 short pref_node_fork; 315 #endif 316 #ifdef CONFIG_NUMA_BALANCING 317 int numa_scan_seq; 318 unsigned int numa_scan_period; 319 unsigned int numa_scan_period_max; 320 int numa_preferred_nid; 321 unsigned long numa_migrate_retry; 322 u64 node_stamp; /* migration stamp */ 323 u64 last_task_numa_placement; 324 u64 last_sum_exec_runtime; 325 struct callback_head numa_work; 326 327 struct list_head numa_entry; 328 struct numa_group *numa_group; 329 330 /* 331 * numa_faults is an array split into four regions: 332 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer 333 * in this precise order. 334 * 335 * faults_memory: Exponential decaying average of faults on a per-node 336 * basis. Scheduling placement decisions are made based on these 337 * counts. The values remain static for the duration of a PTE scan. 338 * faults_cpu: Track the nodes the process was running on when a NUMA 339 * hinting fault was incurred. 340 * faults_memory_buffer and faults_cpu_buffer: Record faults per node 341 * during the current scan window. When the scan completes, the counts 342 * in faults_memory and faults_cpu decay and these values are copied. 343 */ 344 unsigned long *numa_faults; 345 unsigned long total_numa_faults; 346 347 /* 348 * numa_faults_locality tracks if faults recorded during the last 349 * scan window were remote/local. The task scan period is adapted 350 * based on the locality of the faults with different weights 351 * depending on whether they were shared or private faults 352 */ 353 unsigned long numa_faults_locality[2]; 354 355 unsigned long numa_pages_migrated; 356 #endif /* CONFIG_NUMA_BALANCING */ 357 358 struct rcu_head rcu; 359 360 /* 361 * cache last used pipe for splice 362 */ 363 struct pipe_inode_info *splice_pipe; 364 365 struct page_frag task_frag; 366 367 #ifdef CONFIG_TASK_DELAY_ACCT 368 struct task_delay_info *delays; 369 #endif 370 #ifdef CONFIG_FAULT_INJECTION 371 int make_it_fail; 372 #endif 373 /* 374 * when (nr_dirtied >= nr_dirtied_pause), it's time to call 375 * balance_dirty_pages() for some dirty throttling pause 376 */ 377 int nr_dirtied; 378 int nr_dirtied_pause; 379 unsigned long dirty_paused_when; /* start of a write-and-pause period */ 380 381 #ifdef CONFIG_LATENCYTOP 382 int latency_record_count; 383 struct latency_record latency_record[LT_SAVECOUNT]; 384 #endif 385 /* 386 * time slack values; these are used to round up poll() and 387 * select() etc timeout values. These are in nanoseconds. 388 */ 389 unsigned long timer_slack_ns; 390 unsigned long default_timer_slack_ns; 391 392 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 393 /* Index of current stored address in ret_stack */ 394 int curr_ret_stack; 395 /* Stack of return addresses for return function tracing */ 396 struct ftrace_ret_stack *ret_stack; 397 /* time stamp for last schedule */ 398 unsigned long long ftrace_timestamp; 399 /* 400 * Number of functions that haven't been traced 401 * because of depth overrun. 402 */ 403 atomic_t trace_overrun; 404 /* Pause for the tracing */ 405 atomic_t tracing_graph_pause; 406 #endif 407 #ifdef CONFIG_TRACING 408 /* state flags for use by tracers */ 409 unsigned long trace; 410 /* bitmask and counter of trace recursion */ 411 unsigned long trace_recursion; 412 #endif /* CONFIG_TRACING */ 413 #ifdef CONFIG_MEMCG 414 struct memcg_oom_info { 415 struct mem_cgroup *memcg; 416 gfp_t gfp_mask; 417 int order; 418 unsigned int may_oom:1; 419 } memcg_oom; 420 #endif 421 #ifdef CONFIG_UPROBES 422 struct uprobe_task *utask; 423 #endif 424 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) 425 unsigned int sequential_io; 426 unsigned int sequential_io_avg; 427 #endif 428 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP 429 unsigned long task_state_change; 430 #endif 431 };
愚蠢的問題1:
MMU是由硬件實現的專門爲解決虛擬地址和物理地址映射問題而設計的部件,那麼爲何要在linux的源代碼中體現呢?爲何在要在軟件中再描述一次呢?
虛擬地址到物理地址的映射,(目前而講)須要4級頁表索引的訪問來完成。在mm_struct結構體中的定義之中有一個pdg_t類型的指針名叫pgd(PageGlobalDirectory),由此出發繼續向下級訪問有pud(PageUpperDirectory)pmd(PageMiddleDirectory)pte(PageTableEntry),最後一級是具體的頁表很遺憾的是,我暫時沒有在3.19內核的源碼中找到關於pte_t的定義,可是根據書籍上的描述應該是一個指向struct page數組的指針。
因而咱們能夠這樣總結,程序在執行的過程會有大量的跳轉的過程,而每次的跳轉須要一個操做數即地址,這個地址是一個虛擬地址,而後根據該虛擬地址進行MMU的操做,過程當中獲得一個頁表,首先根據頁表判斷該頁是否已經存在於物理內存中,若是不是的話則進行一次交換的操做,上文已經闡述過該過程,頁交換完成以後,尋址過程就得以繼續進行了,此時使用相同的虛擬地址訪問到的是另外一個物理頁面,即交換進入的物理頁面。
愚蠢的問題2:
虛擬內存的機制像是把物理內存和外部存儲容量共同地址編碼,這個共同的編碼就是虛擬地址,所謂「編碼」過程不必定是順序一對一的,可是虛擬地址和頁表的索引之間必定是個滿射關係。
這是我最初對於虛擬內存機制的理解,表面看起來沒有什麼問題,可仍是當考慮每一個進程的尋址空間獨立性的時候就會發現問題,相同的地址在兩個進程中映射外部地址應該能夠是不相同的,但是一旦將他們看做共同地址編碼,就不會有相同的邏輯地址映射到不一樣的物理地址這回事了。
其實答案很簡單一句話:每一個進程維護一個頁表 !
最後一張大圖歸納一下上文