關鍵詞:/dev/mem、/dev/kmem、mmap、__va、__pa、remap_pfn_range等等。html
在平常工做中常有直接操做寄存器或者某一物理地址的需求,busybox中提供了devmem。經過它能夠讀寫物理內存。node
它的實現藉助mmap和/dev/mem,經過mmap將/dev/mem物理地址映射到用戶空間,devmem就能夠像操做虛擬地址同樣進行讀寫。git
hexdump一樣也能夠相似devmem的功能。編程
若是須要在用戶空間獲取內核某個變量值,可使用devkmem經過/dev/kmem進行。app
下面分別介紹這三種工具。ide
用戶空間是沒法直接操做物理地址的;可是平常工做中常須要對某一物理地址進行讀寫,尤爲是寄存器。函數
devmem能夠實現這個功能。那麼devmem作了什麼?/dev/mem在內核中優點如何實現的呢?工具
devmem使用介紹以下:post
BusyBox v1.27.2 (2019-04-16 17:00:28 CST) multi-call binary. Usage: devmem ADDRESS [WIDTH [VALUE]] Read/write from physical address ADDRESS Address to act upon WIDTH Width (8/16/...) VALUE Data to be written
devmem的能力有限,只能處理最大64字節的數目。fetch
下面向0xfc20700這個地址寫入32位數據0x12345678:
devmem 0xfc20700 32 0x12345678
而後從0xfc20700讀取進行驗證。
devmem 0xfc20700 32 0x12345678
從下面的代碼可知,devmem解析參數,而後將地址轉換成頁面對齊的地址。mmap將/dev/mem的輸入地址偏移的頁面映射到用戶空間,而後讀取數值。
int devmem_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int devmem_main(int argc UNUSED_PARAM, char **argv) { void *map_base, *virt_addr; uint64_t read_result; uint64_t writeval = writeval; /* for compiler */ off_t target; unsigned page_size, mapped_size, offset_in_page; int fd; unsigned width = 8 * sizeof(int); /* ADDRESS */ if (!argv[1]) bb_show_usage(); errno = 0; target = bb_strtoull(argv[1], NULL, 0); /* allows hex, oct etc */---------------第一個參數是地址 /* WIDTH */ if (argv[2]) {------------------------------------------------------------------第二個參數,在寫的狀況下,須要知道寫數據的位寬。 if (isdigit(argv[2][0]) || argv[2][1]) width = xatou(argv[2]); else { static const char bhwl[] ALIGN1 = "bhwl"; static const uint8_t sizes[] ALIGN1 = { 8 * sizeof(char), 8 * sizeof(short), 8 * sizeof(int), 8 * sizeof(long), 0 /* bad */ }; width = strchrnul(bhwl, (argv[2][0] | 0x20)) - bhwl; width = sizes[width]; } /* VALUE */ if (argv[3])-----------------------------------------------------------------第三個參數,待寫入數值。 writeval = bb_strtoull(argv[3], NULL, 0); } else { /* argv[2] == NULL */ /* make argv[3] to be a valid thing to fetch */ argv--; } if (errno) bb_show_usage(); /* one of bb_strtouXX failed */ fd = xopen("/dev/mem", argv[3] ? (O_RDWR | O_SYNC) : (O_RDONLY | O_SYNC));-------根據第三個參數肯定是以只讀形式打開,仍是以讀寫形式打開。/dev/mem表明整個內核空間。 mapped_size = page_size = getpagesize(); offset_in_page = (unsigned)target & (page_size - 1);-----------------------------對地址進行也對齊。 if (offset_in_page + width > page_size) {----------------------------------------若是跨頁,則mapped_size編程兩個頁面。 /* This access spans pages. * Must map two pages to make it possible: */ mapped_size *= 2; } map_base = mmap(NULL, mapped_size, argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, target & ~(off_t)(page_size - 1));---------------------------------------將/dev/mem文件的從target的頁對齊偏移開始,映射mapped_size塊大小內存。映射結果是map_base。 if (map_base == MAP_FAILED) bb_perror_msg_and_die("mmap"); // printf("Memory mapped at address %p.\n", map_base); virt_addr = (char*)map_base + offset_in_page; if (!argv[3]) { switch (width) { case 8: read_result = *(volatile uint8_t*)virt_addr; break; case 16: read_result = *(volatile uint16_t*)virt_addr; break; case 32: read_result = *(volatile uint32_t*)virt_addr; break; case 64: read_result = *(volatile uint64_t*)virt_addr; break; default: bb_error_msg_and_die("bad width"); } // printf("Value at address 0x%"OFF_FMT"X (%p): 0x%llX\n", // target, virt_addr, // (unsigned long long)read_result); /* Zero-padded output shows the width of access just done */ printf("0x%0*llX\n", (width >> 2), (unsigned long long)read_result);------------讀取數據並打印。 } else { switch (width) { case 8: *(volatile uint8_t*)virt_addr = writeval; // read_result = *(volatile uint8_t*)virt_addr; break; case 16: *(volatile uint16_t*)virt_addr = writeval; // read_result = *(volatile uint16_t*)virt_addr; break; case 32: *(volatile uint32_t*)virt_addr = writeval; // read_result = *(volatile uint32_t*)virt_addr; break; case 64: *(volatile uint64_t*)virt_addr = writeval; // read_result = *(volatile uint64_t*)virt_addr; break; default: bb_error_msg_and_die("bad width"); } // printf("Written 0x%llX; readback 0x%llX\n", // (unsigned long long)writeval, // (unsigned long long)read_result); } if (ENABLE_FEATURE_CLEAN_UP) { if (munmap(map_base, mapped_size) == -1) bb_perror_msg_and_die("munmap"); close(fd); } return EXIT_SUCCESS; }
/dev/mem在chr_dev_init()中建立,須要建立的節點在devlist[]中。
static const struct memdev { const char *name; umode_t mode; const struct file_operations *fops; fmode_t fmode; } devlist[] = { #ifdef CONFIG_DEVMEM [1] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET }, #endif #ifdef CONFIG_DEVKMEM [2] = { "kmem", 0, &kmem_fops, FMODE_UNSIGNED_OFFSET }, #endif... };
其中mem_fops對應/dev/mem節點的操做函數。
static const struct file_operations __maybe_unused mem_fops = { .llseek = memory_lseek, .read = read_mem, .write = write_mem,-------------------------------直接對/dev/mem進行讀寫。 .mmap = mmap_mem,---------------------------------對/dev/mem進行mmap映射。 .open = open_mem,---------------------------------主要檢查權限是否知足CAP_SYS_RAWIO。 #ifndef CONFIG_MMU .get_unmapped_area = get_unmapped_area_mem, .mmap_capabilities = memory_mmap_capabilities, #endif }; static int open_port(struct inode *inode, struct file *filp) { return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } #define open_mem open_port
mmap_mem()是mmap()內存映射的執行者,經過將/dev/mem對應的物理地址映射到用戶空間虛擬地址。
static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; /* It's illegal to wrap around the end of the physical address space. */ if (offset + (phys_addr_t)size - 1 < offset) return -EINVAL; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))return -EINVAL; if (!private_mapping_ok(vma)) return -ENOSYS; if (!range_is_allowed(vma->vm_pgoff, size)) return -EPERM; if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, &vma->vm_page_prot)) return -EINVAL; vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); vma->vm_ops = &mmap_mem_ops; /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma,---------------------------------------------將內核中vma->vm_pgoff對應的size個頁面,映射到vma區域,返回的虛擬空間起始地址是vma->vm_start。 vma->vm_start, vma->vm_pgoff, size, vma->vm_page_prot)) { return -EAGAIN; } return 0; }
下面兩個對應read()和write()兩個系統調用。
static ssize_t read_mem(struct file *file, char __user *buf, size_t count, loff_t *ppos) { phys_addr_t p = *ppos; ssize_t read, sz; void *ptr; if (p != *ppos) return 0; if (!valid_phys_addr_range(p, count))---------------------------對輸入的物理地址+大小進行驗證,確保在low_memory範圍內。 return -EFAULT; read = 0; ... while (count > 0) { unsigned long remaining; int allowed; sz = size_inside_page(p, count); allowed = page_is_allowed(p >> PAGE_SHIFT); if (!allowed) return -EPERM; if (allowed == 2) { /* Show zeros for restricted memory. */ remaining = clear_user(buf, sz); } else { ptr = xlate_dev_mem_ptr(p);-----------------------------將物理地址轉換成虛擬地址,不成功則返回-EFAULT。注意這裏的地址經過_va進行轉換,只有特定區域的地址纔可使用。 if (!ptr) return -EFAULT; remaining = copy_to_user(buf, ptr, sz);-----------------將物理地址對應內容拷貝到用戶空間。 unxlate_dev_mem_ptr(p, ptr); } if (remaining) return -EFAULT; buf += sz; p += sz; count -= sz; read += sz; } *ppos += read; return read; } static ssize_t write_mem(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { phys_addr_t p = *ppos; ssize_t written, sz; unsigned long copied; void *ptr; if (p != *ppos) return -EFBIG; if (!valid_phys_addr_range(p, count))---------------------------確保地址在low_memory範圍內。 return -EFAULT; written = 0; #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED /* we don't have page 0 mapped on sparc and m68k.. */ if (p < PAGE_SIZE) { sz = size_inside_page(p, count); /* Hmm. Do something? */ buf += sz; p += sz; count -= sz; written += sz; } #endif while (count > 0) { int allowed; sz = size_inside_page(p, count); allowed = page_is_allowed(p >> PAGE_SHIFT); if (!allowed) return -EPERM; /* Skip actual writing when a page is marked as restricted. */ if (allowed == 1) { /* * On ia64 if a page has been mapped somewhere as * uncached, then it must also be accessed uncached * by the kernel or data corruption may occur. */ ptr = xlate_dev_mem_ptr(p);--------------------------------__va()進行物理地址到虛擬地址的轉換。 if (!ptr) { if (written) break; return -EFAULT; } copied = copy_from_user(ptr, buf, sz); unxlate_dev_mem_ptr(p, ptr); if (copied) { written += sz - copied; if (written) break; return -EFAULT; } } buf += sz; p += sz; count -= sz; written += sz; } *ppos += written; return written; }
對比mmap和read()/write()兩種方式可知:
devmem一次讀寫的內容有限,hexdump能夠一次dump大量數據。
可是hexdump是經過read()/write()來獲取數據,物理地址的範圍受到限制。而devmem經過mmap()則沒有這些限制。
hexdump -s 0x10000000 -n 256 /dev/mem 10000000 0005 1908 fc11 18ff edf7 03fe e914 020b 10000010 0d00 1fe4 f202 1601 f703 0412 e814 1cfb 10000020 09fc 000b 06eb 07f0 ec12 01e6 11e9 03f7 10000030 1a2d 11eb f700 ece9 eef3 05f7 0009 eb03 10000040 ff1a e50b 1e08 0f16 0cfa 13fb 0b06 0a1b 10000050 0401 fefd fd1e 0b05 f317 f9ea f00a 3ef5 10000060 f118 fe02 f606 0f02 f1ec f4fe 0216 eefb 10000070 0c02 eefd f8ff 06eb 08fc f603 05fb f80e 10000080 f6fb 2503 f207 0a19 12ee fb0d 0512 09f8 10000090 fbfa 1303 f9fe 0dfc f2fa 06fb fef4 04fa 100000a0 2007 170e 1a05 f3f6 0c2d 0601 0f0b 061f 100000b0 1108 0b18 f80d ebef 05f8 f3eb 0207 e8ff 100000c0 fb07 fdea 0efd fb02 0f10 f8f8 f016 f8f2 100000d0 130f 0803 0909 0100 0b03 fc06 0307 1e10 100000e0 011b 2814 f7f3 fc01 f6f9 03ec 0afb ecf1 100000f0 05fb 070a f904 fbf5 f7fa 0304 f502 0d02
某些狀況下須要讀取內核某個變量的值,這時候能夠經過/dev/kmem。
要使用/dev/kmem就須要在內核中打開CONFIG_DEVKMEM,menuconfig路徑爲:Device Drivers->Character devices->/dev/kmem virtual device support。
static const struct file_operations __maybe_unused kmem_fops = { .llseek = memory_lseek, .read = read_kmem, .write = write_kmem, .mmap = mmap_kmem, .open = open_kmem, #ifndef CONFIG_MMU .get_unmapped_area = get_unmapped_area_mem, .mmap_capabilities = memory_mmap_capabilities, #endif }; static int mmap_kmem(struct file *file, struct vm_area_struct *vma) { unsigned long pfn; /* Turn a kernel-virtual address into a physical page frame */ pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;------------------將內核虛擬地址經過__pa()轉換成物理地址。 if (!pfn_valid(pfn)) return -EIO; vma->vm_pgoff = pfn; return mmap_mem(file, vma); }
read_kmem()和write_kmem()須要對low_memory和high_memory進行區別對待。
對low_memory須要通過xlate_dev_kmem_ptr()後進行讀寫;對high_memory經過vread()/vwrite()進行讀寫。
static ssize_t read_kmem(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; ssize_t low_count, read, sz; char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ int err = 0; read = 0; if (p < (unsigned long) high_memory) {---------------------------------------屬於low_memory內存處理。 low_count = count; if (count > (unsigned long)high_memory - p) low_count = (unsigned long)high_memory - p;--------------------------計算處於low_memory區域的內存大小。 ... while (low_count > 0) { sz = size_inside_page(p, low_count); kbuf = xlate_dev_kmem_ptr((void *)p); if (!virt_addr_valid(kbuf))------------------------------------------地址在PAGE_OFFSET和high_memory之間。 return -ENXIO; if (copy_to_user(buf, kbuf, sz)) return -EFAULT; buf += sz; p += sz; read += sz; low_count -= sz; count -= sz; } } if (count > 0) {-------------------------------------------------------------若是還有count沒處理完,那麼就屬於high_memory。 kbuf = (char *)__get_free_page(GFP_KERNEL); if (!kbuf) return -ENOMEM; while (count > 0) { sz = size_inside_page(p, count);-------------------------------------若是p+count不跨頁,那麼sz=count;不然sz只取p所在頁面剩餘部分大小。這樣確保下面的copy_to_user()不跨頁。 if (!is_vmalloc_or_module_addr((void *)p)) { err = -ENXIO; break; } sz = vread(kbuf, (char *)p, sz); if (!sz) break; if (copy_to_user(buf, kbuf, sz)) { err = -EFAULT; break; } count -= sz; buf += sz; read += sz; p += sz; } free_page((unsigned long)kbuf); } *ppos = p; return read ? read : err; } static ssize_t do_write_kmem(unsigned long p, const char __user *buf, size_t count, loff_t *ppos) { ssize_t written, sz; unsigned long copied; written = 0; ... while (count > 0) { void *ptr; sz = size_inside_page(p, count); ptr = xlate_dev_kmem_ptr((void *)p); if (!virt_addr_valid(ptr)) return -ENXIO; copied = copy_from_user(ptr, buf, sz); if (copied) { written += sz - copied; if (written) break; return -EFAULT; } buf += sz; p += sz; count -= sz; written += sz; } *ppos += written; return written; } static ssize_t write_kmem(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; ssize_t wrote = 0; ssize_t virtr = 0; char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ int err = 0; if (p < (unsigned long) high_memory) { unsigned long to_write = min_t(unsigned long, count, (unsigned long)high_memory - p); wrote = do_write_kmem(p, buf, to_write, ppos); if (wrote != to_write) return wrote; p += wrote; buf += wrote; count -= wrote; } if (count > 0) { kbuf = (char *)__get_free_page(GFP_KERNEL); if (!kbuf) return wrote ? wrote : -ENOMEM; while (count > 0) { unsigned long sz = size_inside_page(p, count); unsigned long n; if (!is_vmalloc_or_module_addr((void *)p)) { err = -ENXIO; break; } n = copy_from_user(kbuf, buf, sz); if (n) { err = -EFAULT; break; } vwrite(kbuf, (char *)p, sz); count -= sz; buf += sz; virtr += sz; p += sz; } free_page((unsigned long)kbuf); } *ppos = p; return virtr + wrote ? : err; }
相似於devmem相對於/dev/mem,devkmem經過將/dev/kmem映射到用戶空間,而後讀取內容。
#include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <string.h> #include <errno.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/mman.h> #define DEVKMEM "/dev/kmem" #define PAGE_SIZE 0x1000 #define PAGE_MASK (~(PAGE_SIZE-1)) int main(int argc, char* argv[]) { int fd; char *mbase; char read_buf[10]; unsigned int varAddr, regAddr; varAddr = strtoul(argv[1], 0, 16); unsigned int ptr = varAddr & ~(PAGE_MASK); fd = open(DEVKMEM, O_RDONLY); if (fd == -1) { perror("open"); exit(-1); } mbase = mmap(0,PAGE_SIZE,PROT_READ,MAP_SHARED,fd, (varAddr & PAGE_MASK)); if (mbase == MAP_FAILED) { printf("map failed %s\n",strerror(errno)); } printf("varAddr = 0x%X \n", varAddr); printf("mapbase = 0x%X \n", (unsigned int)mbase); printf("value = 0x%X \n",*(unsigned int*)(mbase+ptr)); close(fd); munmap(mbase,PAGE_SIZE); return 0; }
因爲devkmem須要輸入地址,但因爲是虛擬地址,徹底地址是沒有意義的。
須要經過/proc/kallsyms根據符號找到對應的內核虛擬地址,而後再經過devkmem查看其值。
好比想查看sysctl_sched_rt_runtime的值,首先查看其在內核的虛擬地址:
cat /proc/kallsyms | grep sysctl_sched_rt_runtime
808eb544 D sysctl_sched_rt_runtime
而後查看虛擬地址的值:
./devkmem 808eb544 varAddr = 0x808EB544 mapbase = 0x2ABFB000 value = 0xE7EF0
換算成10進制就是950000.
那麼這個值對不對呢?cat /proc/sys/kernel/sched_rt_runtime_us代表結果正確。
改進點:
1.直接輸入符號,顯示全部符號的值。
2.不一樣輸出格式,16進制、10進制等等。
參考文檔: