應用調試(三)oops
[TOC]linux
引入
在驅動程序調試中,發生段錯誤後內核打印出oops信息,包括pc值,寄存器值和棧信息shell
Unable to handle kernel paging request at virtual address 56000050 .....
可是咱們再應用程序故意引入一個錯誤(在地址0的地方寫數據),只是提示段錯誤,沒有信息打印,那麼如何打開這個選項呢?函數
配置內核打開用戶oops
搜索字符Unable to handle kernel
,能夠在arch\arm\mm\fault.c
找到oop
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) { ... printk(KERN_ALERT "Unable to handle kernel %s at virtual address %08lx\n", (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); die("Oops", regs, fsr); ... }
也能夠搜索下內核態的調用位置看到測試
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; /* * If we are in kernel mode at this point, we * have no context to handle this fault with. */ if (user_mode(regs)) __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs); else __do_kernel_fault(mm, addr, fsr, regs); }
同時在下方就能看到打印用戶態的oopsthis
static void __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int fsr, unsigned int sig, int code, struct pt_regs *regs) { struct siginfo si; #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_SEGV) { printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", tsk->comm, sig, addr, fsr); show_pte(tsk->mm, addr); show_regs(regs); } #endif tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; si.si_signo = sig; si.si_errno = 0; si.si_code = code; si.si_addr = (void __user *)addr; force_sig_info(sig, &si, tsk); }
從代碼上就能夠看到須要兩個處理spa
CONFIG_DEBUG_USER
配置須要打開user_debug
變量的設置
CONFIG_DEBUG_USER
搜索內核這個配置項DEBUG_USER
,已經打開了debug
│ Symbol: DEBUG_USER [=y] │ │ Prompt: Verbose user fault messages │ │ Defined at arch/arm/Kconfig.debug:18 │ │ Location: │ │ -> Kernel hacking
user_debug
搜索變量,能夠看到以下,很明顯能夠經過設置啓動參數arch\arm\kernel\traps.c
3d
#ifdef CONFIG_DEBUG_USER unsigned int user_debug; static int __init user_debug_setup(char *str) { get_option(&str, &user_debug); return 1; } __setup("user_debug=", user_debug_setup); #endif
設置啓動參數測試
set bootargs noinitrd root=/dev/mtdblock3 init=/linuxrc console=ttySAC0 user_debug=0xff boot mount -t nfs -o nolock,vers=2 192.168.95.222:/home/book/stu /mnt
運行錯誤的程序,能夠看到打印寄存器和pc值調試
# ./test_debug a = 0x12 pgd = c2cac000 [00000000] *pgd=32cca031, *pte=00000000, *ppte=00000000 Pid: 792, comm: test_debug CPU: 0 Not tainted (2.6.22.6 #9) PC is at 0x84ac LR is at 0x84d0 pc : [<000084ac>] lr : [<000084d0>] psr: 60000010 sp : bed7ae40 ip : bed7ae54 fp : bed7ae50 r10: 4013365c r9 : 00000000 r8 : 00008514 r7 : 00000001 r6 : 000085cc r5 : 00008568 r4 : bed7aec4 r3 : 00000012 r2 : 00000000 r1 : 00001000 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode USER_32 Segment user Control: c000717f Table: 32cac000 DAC: 00000015 [<c002cd1c>] (show_regs+0x0/0x4c) from [<c0031a98>] (__do_user_fault+0x5c/0xa4) r4:c06c30a0 [<c0031a3c>] (__do_user_fault+0x0/0xa4) from [<c0031d38>] (do_page_fault+0x1dc/0x20c) r7:c0026520 r6:c3333860 r5:c06c30a0 r4:ffffffec [<c0031b5c>] (do_page_fault+0x0/0x20c) from [<c002b224>] (do_DataAbort+0x3c/0xa0) [<c002b1e8>] (do_DataAbort+0x0/0xa0) from [<c002be48>] (ret_from_exception+0x0/0x10) Exception stack(0xc2ca1fb0 to 0xc2ca1ff8) 1fa0: 00000000 00001000 00000000 00000012 1fc0: bed7aec4 00008568 000085cc 00000001 00008514 00000000 4013365c bed7ae50 1fe0: bed7ae54 bed7ae40 000084d0 000084ac 60000010 ffffffff r8:00008514 r7:00000001 r6:000085cc r5:00008568 r4:c039bfc8 Segmentation fault
這裏實際上是有stack
的,但這個棧實際上寄存器的值而非程序的棧,能夠在notepad++上雙擊數字看到高亮的
Exception stack(0xc2ca1fb0 to 0xc2ca1ff8) r0 1fa0: 00000000 00001000 00000000 00000012 1fc0: bed7aec4 00008568 000085cc 00000001 00008514 00000000 4013365c bed7ae50 psr 1fe0: bed7ae54 bed7ae40 000084d0 000084ac 60000010 ffffffff r8:00008514 r7:00000001 r6:000085cc r5:00008568 r4:c039bfc8
打印用戶堆棧
能夠看到內核態打印棧
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) >die("Oops", regs, fsr); >show_stack_log_lvl(current, regs->sp, regs, KERN_EMERG) >dump_mem("Stack: ", log_lvl, sp,THREAD_SIZE + (unsigned long)tinfo); >show_trace_log_lvl(tsk, (unsigned long *)sp, regs, log_lvl);
試了下直接調用內核的打印棧的方式並不能成功,哈哈
添加代碼爲 { printk("Stack by die: \n"); die("Oops", regs, fsr); } 只會提示這個 Stack by die: Internal error: Oops: 817 [#1]
咱們這裏就手動複製到用戶態打印好了更改代碼以下
static void __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int fsr, unsigned int sig, int code, struct pt_regs *regs) { struct siginfo si; #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_SEGV) { printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", tsk->comm, sig, addr, fsr); show_pte(tsk->mm, addr); show_regs(regs); } #endif ///////////////////////////////////////////////////////////////////////////// if(0) { printk("Stack by die: \n"); die("Oops", regs, fsr); } if (1) { unsigned long i=0,val=0; printk("Stack: \n"); while(i<1024) { /* copy_from_user()只是用來檢測該地址是否有效,若有效,便獲取地址數據,不然break */ if(copy_from_user(&val, (const void __user *)(regs->ARM_sp+i*4), 4)) break; printk("%08x ",val); i++; if(i%8==0) printk("\n"); } printk("\n END of Stack\n"); } ///////////////////////////////////////////////////////////////////////////// tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; si.si_signo = sig; si.si_errno = 0; si.si_code = code; si.si_addr = (void __user *)addr; force_sig_info(sig, &si, tsk); }
能夠看到打印了棧了
# /mnt/code/test_debug a = 0x12 pgd = c2cc4000 [00000000] *pgd=32c82031, *pte=00000000, *ppte=00000000 Pid: 789, comm: test_debug CPU: 0 Not tainted (2.6.22.6 #6) PC is at 0x84ac LR is at 0x84d0 pc : [<000084ac>] lr : [<000084d0>] psr: 60000010 sp : be87ee50 ip : be87ee64 fp : be87ee60 r10: 4013365c r9 : 00000000 r8 : 00008514 r7 : 00000001 r6 : 000085cc r5 : 00008568 r4 : be87eed4 r3 : 00000012 r2 : 00000000 r1 : 00001000 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode USER_32 Segment user Control: c000717f Table: 32cc4000 DAC: 00000015 [<c002cd1c>] (show_regs+0x0/0x4c) from [<c0031a9c>] (__do_user_fault+0x60/0x148) r4:c04a8800 [<c0031a3c>] (__do_user_fault+0x0/0x148) from [<c0031ddc>] (do_page_fault+0x1dc/0x20c) [<c0031c00>] (do_page_fault+0x0/0x20c) from [<c002b224>] (do_DataAbort+0x3c/0xa0) [<c002b1e8>] (do_DataAbort+0x0/0xa0) from [<c002be48>] (ret_from_exception+0x0/0x10) Exception stack(0xc2cb1fb0 to 0xc2cb1ff8) 1fa0: 00000000 00001000 00000000 00000012 1fc0: be87eed4 00008568 000085cc 00000001 00008514 00000000 4013365c be87ee60 1fe0: be87ee64 be87ee50 000084d0 000084ac 60000010 ffffffff r8:00008514 r7:00000001 r6:000085cc r5:00008568 r4:c039bfc8 Stack: 00000000 be87ee74 be87ee64 000084d0 000084a0 00000000 be87ee88 be87ee78 000084f0 000084c4 00000000 be87eea8 be87ee8c 00008554 000084e4 00000000 00000012 be87eed4 00000001 00000000 be87eeac 40034f14 00008524 00000000 00000000 0000839c 00000000 00000000 4001d594 000083c4 000085cc 4000c02c be87eed4 be87ef7f 00000000 be87ef94 be87ef9e be87efa5 be87efb0 be87efd3 be87efe1 00000000 00000010 00000003 00000006 00001000 00000011 00000064 00000003 00008034 00000004 00000020 00000005 00000006 00000007 40000000 00000008 00000000 00000009 0000839c 0000000b 00000000 0000000c 00000000 0000000d 00000000 0000000e 00000000 00000017 00000000 0000000f be87ef7b 00000000 00000000 76000000 2f006c34 2f746e6d 65646f63 7365742f 65645f74 00677562 52455355 6f6f723d 4f480074 2f3d454d 52455400 74763d4d 00323031 48544150 62732f3d 2f3a6e69 2f727375 6e696273 69622f3a 752f3a6e 622f7273 53006e69 4c4c4548 69622f3d 68732f6e 44575000 2f002f3d 2f746e6d 65646f63 7365742f 65645f74 00677562 00000000 END of Stack Segmentation fault
分析棧
一樣能夠先反彙編arm-linux-objdump -D test_debug > test_debug.dis
PC is at 0x84ac LR is at 0x84d0 Stack: 00000000 be87ee74 be87ee64 000084d0 000084a0 00000000 be87ee88 be87ee78 C_sp ldr ↑ B_sp 000084f0 000084c4 00000000 be87eea8 be87ee8c 00008554 000084e4 00000000 ldr ↑ A_sp ldr ↑ main_sp 00000012 be87eed4 00000001 00000000 be87eeac 40034f14 00008524 00000000 ldr ↑ 00000000 0000839c 00000000 00000000 4001d594 000083c4 000085cc 4000c02c be87eed4 be87ef7f 00000000 be87ef94 be87ef9e be87efa5 be87efb0 be87efd3
這裏的mani函數最後會返回40034f14
這個動態庫,動態庫不太好分析,咱們從新編譯爲靜態連接分析下main被誰調用
main的調用
在動態庫中,能夠查看 /proc/pidxxx/maps
,看到程序動態庫的地址,可是這個函數咱們直接段錯誤退出了因此沒法查看,能夠看下別的pid(shell)的
# cat /proc/772/maps 00008000-000bf000 r-xp 00000000 1f:03 646 /bin/busybox 000c7000-000c8000 rw-p 000b7000 1f:03 646 /bin/busybox 000c8000-000ec000 rwxp 000c8000 00:00 0 [heap] 40000000-40015000 r-xp 00000000 1f:03 733 /lib/ld-2.3.6.so 40015000-40017000 rw-p 40015000 00:00 0 4001d000-4001e000 rw-p 00015000 1f:03 733 /lib/ld-2.3.6.so 4001e000-40023000 r-xp 00000000 1f:03 691 /lib/libcrypt-2.3.6.so 40023000-4002a000 ---p 00005000 1f:03 691 /lib/libcrypt-2.3.6.so 4002a000-4002b000 rw-p 00004000 1f:03 691 /lib/libcrypt-2.3.6.so 4002b000-40052000 rw-p 4002b000 00:00 0 40052000-400f9000 r-xp 00000000 1f:03 708 /lib/libm-2.3.6.so 400f9000-40101000 ---p 000a7000 1f:03 708 /lib/libm-2.3.6.so 40101000-40102000 rw-p 000a7000 1f:03 708 /lib/libm-2.3.6.so 40102000-4020d000 r-xp 00000000 1f:03 734 /lib/libc-2.3.6.so 4020d000-40215000 ---p 0010b000 1f:03 734 /lib/libc-2.3.6.so 40215000-40219000 rw-p 0010b000 1f:03 734 /lib/libc-2.3.6.so 40219000-4021b000 rw-p 40219000 00:00 0 bed05000-bed1a000 rwxp bed05000 00:00 0 [stack]
那麼咱們能夠靜態編譯這個文件,看看被誰調用
arm-linux-gcc -static -o test_debug_static test_debug.c arm-linux-objdump -D test_debug_static > test_debug_static.dis
運行後打印了錯誤信息
# /mnt/code/test_debug_static <Physical Layer error> <Physical Layer error> a = 0x12 pgd = c2cd4000 [00000000] *pgd=32c94031, *pte=00000000, *ppte=00000000 Pid: 789, comm: test_debug_stat CPU: 0 Not tainted (2.6.22.6 #6) PC is at 0x81e0 LR is at 0x8204 pc : [<000081e0>] lr : [<00008204>] psr: 60000010 ..... Stack: 00000000 beeeac94 beeeac84 00008204 000081d4 00000000 beeeaca8 beeeac98 C_Sp ldr B_Sp 00008224 000081f8 00000000 beeeacc8 beeeacac 00008288 00008218 00000000 ldr A_Sp ldr main_Sp 00000012 beeeaec4 00000001 00000000 beeeaccc 000084ac 00008258 756e694c ldr ↑ 00000078 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 6f6e2800 0029656e 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 2e320000 32322e36 0000362e 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 23000000 61532036 614a2074 3931206e 3a333120 303a3630 53432032 30322054 00003931 ....... END of Stack Segmentation fault
跳過一些棧,直接看到
00008248 <main>: 能夠看到main的返回 000084ac 0000829c <__libc_start_main>: 84a4: e1a0e00f mov lr, pc ;這個就是main 84ac: eb0000fc bl 88a4 <exit>
也就是最終的關係是
__libc_start_main > main > exit