1、實驗部分:分析Linux內核建立一個新進程的過程。node
【第一部分】 根據要求完成第一部分,步驟以下:linux
cd LinuxKernel rm menu -rf //強制刪除 git clone https://github.com/mengning/menu.git //將menu更新 cd menu mv test_fork.c test.c //更新test.c make rootfs //運行腳本,自動編譯和自動生成根文件系統,同時啓動,輸入fork命令,子進程和父進程都輸出
qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -S
gdb file linux-3.18.6/vmlinux target remote:1234
b sys_clone b do_fork b dup_task_struct b copy_process b copy_thread b ret_from_fork
【第二部分】分析代碼git
int main(int argc, char * argv[]) { int pid; /* fork another process */ pid = fork(); if (pid < 0) { /* error occurred */ fprintf(stderr,"Fork Failed!"); exit(-1); } else if (pid == 0) { /* child process */ printf("This is Child Process!\n"); } else { /* parent process */ printf("This is Parent Process!\n"); /* parent will wait for the child to complete*/ wait(NULL); printf("Child Complete!\n"); } }
//fork #ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) { #ifdef CONFIG_MMU return do_fork(SIGCHLD, 0, 0, NULL, NULL); #else /* can not support in nommu mode */ return -EINVAL; #endif } #endif //vfork #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL); } #endif //clone #ifdef __ARCH_WANT_SYS_CLONE #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, int, tls_val, int __user *, child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val) #elif defined(CONFIG_CLONE_BACKWARDS3) SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, stack_size, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val) #endif { return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); } #endif
long do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; //進程描述符結構體指針 int trace = 0; long nr; //總的pid數量 /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event * for the type of forking is enabled. */ if (!(clone_flags & CLONE_UNTRACED)) { if (clone_flags & CLONE_VFORK) trace = PTRACE_EVENT_VFORK; else if ((clone_flags & CSIGNAL) != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK; if (likely(!ptrace_event_enabled(current, trace))) trace = 0; } // 複製進程描述符,返回建立的task_struct的指針 p = copy_process(clone_flags, stack_start, stack_size, child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; struct pid *pid; trace_sched_process_fork(current, p); // 取出task結構體內的pid pid = get_task_pid(p, PIDTYPE_PID); nr = pid_vnr(pid); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); // 若是使用的是vfork,那麼必須採用某種完成機制,確保父進程後運行 if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); get_task_struct(p); } // 將子進程添加到調度器的隊列,使得子進程有機會得到CPU wake_up_new_task(p); /* forking complete and child started to run, tell ptracer */ if (unlikely(trace)) ptrace_event_pid(trace, pid); // 若是設置了 CLONE_VFORK 則將父進程插入等待隊列,並掛起父進程直到子進程釋放本身的內存空間 // 保證子進程優先於父進程運行 if (clone_flags & CLONE_VFORK) { if (!wait_for_vfork_done(p, &vfork)) ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); } put_pid(pid); } else { nr = PTR_ERR(p); } return nr; }
/* 建立進程描述符以及子進程所須要的其餘全部數據結構 爲子進程準備運行環境 */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { ... int retval; struct task_struct *p; ... // 分配一個新的task_struct,此時的p與當前進程的task,僅僅是stack地址不一樣 p = dup_task_struct(current); if (!p) goto fork_out; ··· retval = -EAGAIN; // 檢查該用戶的進程數是否超過限制 if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { // 檢查該用戶是否具備相關權限,不必定是root if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; // 檢查進程數量是否超過 max_threads,後者取決於內存的大小 if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); // 代表子進程尚未調用exec系統調用 p->flags |= PF_FORKNOEXEC; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; // 初始化自旋鎖 spin_lock_init(&p->alloc_lock); // 初始化掛起信號 init_sigpending(&p->pending); // 初始化定時器 p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_init(&p->vtime_seqlock); p->vtime_snap = 0; p->vtime_snap_whence = VTIME_SLEEPING; #endif ... #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_BCACHE p->sequential_io = 0; p->sequential_io_avg = 0; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ // 完成對新進程調度程序數據結構的初始化,並把新進程的狀態設置爲TASK_RUNNING // 同時將thread_info中得preempt_count置爲1,禁止內核搶佔 retval = sched_fork(clone_flags, p); if (retval) goto bad_fork_cleanup_policy; retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_perf; /* copy all the process information */ // 複製全部的進程信息 shm_init_task(p); retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; ... // 初始化子進程的內核棧 retval = copy_thread(clone_flags, stack_start, stack_size, p); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; // 這裏爲子進程分配了新的pid號 pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } ... // 清除子進程thread_info結構的 TIF_SYSCALL_TRACE,防止 ret_from_fork將系統調用消息通知給調試進程 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ // 設置子進程的pid p->pid = pid_nr(pid); // 若是是建立線程 if (clone_flags & CLONE_THREAD) { p->exit_signal = -1; // 線程組的leader設置爲當前線程的leader p->group_leader = current->group_leader; // tgid是當前線程組的id,也就是main進程的pid p->tgid = current->tgid; } else { if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); // 建立的是進程,本身是一個單獨的線程組 p->group_leader = p; // tgid和pid相同 p->tgid = p->pid; } ... if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { ... // 將pid加入散列表 attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else { ... } // 將pid加入PIDTYPE_PID這個散列表 attach_pid(p, PIDTYPE_PID); // 遞增 nr_threads的值 nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); ... // 返回被建立的task結構體指針 return p; ...
static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; int node = tsk_fork_get_node(orig); int err; //分配一個 task_struct 節點 tsk = alloc_task_struct_node(node); if (!tsk) return NULL; //分配一個 thread_info 節點,包含進程的內核棧,ti 爲棧底 ti = alloc_thread_info_node(tsk, node); if (!ti) goto free_tsk; //將棧底的值賦給新節點的棧 tsk->stack = ti; //…… return tsk; }
總結 |
2、讀書筆記github
【第一部分】 定時器和時間管理緩存
` #define HZ 1000 //內核時間頻率
unsigned long delay = jiffies + 2*HZ ; //2秒 節拍整數倍才行; while(time_before(jiffies,delay)) ;
void udelay(unsigned long usecs) void mdelay(unsigned long msecs)
【第二部分】內存管理:虛擬內存機制數據結構