關鍵詞:ramdisk、rdint、.init.ramfs、__initramfs_start、__initramfs_size、rootfs、ramfs、populate_rootfs()、gzip、actions[]、free_initmem()、run_init_process()等等。node
本着瞭解ramdisk,帶着以下幾個問題進行分析:linux
若是要使用ramdisk功能,須要作兩步工做:一是修改Kernel的bootargs,增長rdinit選項;二是在編譯uImage的時候將rootfs.cpio嵌入。app
下面是使用ramdisk啓動和使用eMMC做爲啓動介質的兩種配置,ramdisk須要制定rdinit選項,而且root設備變成了/dev/ram0。less
bootargs = "console=ttyS0,115200 rdinit=/sbin/init root=/dev/ram0 quiet"; bootargs = "console=ttyS0,115200 root=/dev/mmcblk1p2 rw rootfstype=ext4 rootflags=data=journal,barrier=1 rootwait";
須要將rootfs.cpio嵌入到kernel image,能夠經過buildroot配置:ide
config BR2_TARGET_ROOTFS_INITRAMFS bool "initial RAM filesystem linked into linux kernel" depends on BR2_LINUX_KERNEL select BR2_TARGET_ROOTFS_CPIO help Integrate the root filesystem generated by Buildroot as an initramfs inside the kernel image. This integration will take place automatically. A rootfs.cpio file will be generated in the images/ directory. This is the archive that will be included in the kernel image. The default rootfs compression set in the kernel configuration is used, regardless of how buildroot's cpio archive is configured. Note that enabling initramfs together with another filesystem formats doesn't make sense: you would end up having two identical root filesystems, one embedded inside the kernel image, and one separately.
還能夠在編譯內核的時候經過以下編譯選項來達到:函數
make uImage -j16 CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="${BR_BINARIES_DIR}/rootfs.cpio" KCPPFLAGS=-DCONFIG_BLK_DEV_INITRD
看看rdinit和root在內核中是如何被處理的,若是bootargs設置了rdinit和root,那麼內核在啓動階段解析並分別賦給ramdisk_execute_command和saved_root_name。工具
在後面分析內核啓動的過程當中,這兩個重要的參數會被用到。oop
static int __init rdinit_setup(char *str)
{
unsigned int i; ramdisk_execute_command = str;--------------------------------此例中ramdisk_execute_command對應/sbin/init。 /* See "auto" comment in init_setup */ for (i = 1; i < MAX_INIT_ARGS; i++) argv_init[i] = NULL; return 1; } __setup("rdinit=", rdinit_setup); static int __init root_dev_setup(char *line) { strlcpy(saved_root_name, line, sizeof(saved_root_name));------saved_root_name對應/dev/ram0。 return 1; } __setup("root=", root_dev_setup);
從vmlinux.lds.h文件可知,ramfs根據CONFIG_BLK_DEV_INITRD定義是否使用。ui
INIT_RAM_FS存放ramfs相關內容,包括.init.ramfs和.init.ramfs.info兩個段。this
SECTIONS { . = PAGE_OFFSET + PHYS_OFFSET_OFFSET; _stext = .; __init_begin = .; ... INIT_DATA_SECTION(PAGE_SIZE) ... . = ALIGN(PAGE_SIZE); __init_end = .;------------------------------從__init_begin到__init_end部分的空間會在free_initmem()中被釋放。 .text : AT(ADDR(.text) - LOAD_OFFSET) { ... } = 0 _etext = .; ... } #define INIT_DATA_SECTION(initsetup_align) \ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { \ ... INIT_RAM_FS \ } #ifdef CONFIG_BLK_DEV_INITRD #define INIT_RAM_FS \ . = ALIGN(4); \ VMLINUX_SYMBOL(__initramfs_start) = .; \ KEEP(*(.init.ramfs)) \ . = ALIGN(8); \ KEEP(*(.init.ramfs.info)) #else #define INIT_RAM_FS #endif
.init.ramfs和.init.ramfs.info兩個段在initramfs_data.S中定義。
.section .init.ramfs,"a" __irf_start: .incbin __stringify(INITRAMFS_IMAGE)------------------原封不動的將INITRAMFS_IMAGE對應的二進制文件編譯到當前文件中。 __irf_end: .section .init.ramfs.info,"a" .globl VMLINUX_SYMBOL(__initramfs_size) VMLINUX_SYMBOL(__initramfs_size): #ifdef CONFIG_64BIT .quad __irf_end - __irf_start #else .long __irf_end - __irf_start #endif
INITRAMFS_IMAGE從哪裏來?須要查看/usr/目錄下Makefile。
從Makefile中可知,以CONFIG_INITRAMFS_SOURCE對應的rootfs.cpio文件做爲輸入,調用gen_init_cpio和gen_initramfs_list.sh生成initramfs_data.cpio.gz文件。
而後INITRAMFS_IMAGE對應,/usr/initramfs_data.cpio$(suffix_y)文件。
最終經過.incbin將INITRAMFS_IMAGE編譯到initramfs_data.o文件中,即對應.init.ramfs段。
800308cc T __security_initcall_start
800308d0 T __initramfs_start
800308d0 t __irf_start---------------------------ramfs區域起始地址。
800308d0 T __security_initcall_end
814ed9c0 T __initramfs_size----------------------ramfs文件大小。
814ed9c0 t __irf_end-----------------------------ramfs區域結束地址。
814ee000 T __init_end
ramfs做爲init數據的一部分,位於__init_begin和__init_end的末端,在free_initmem()中被釋放。
ramfs是以壓縮包的形式存放在__initramfs_start和__initramfs_size之間,在kernel_init()-->kernel_init_freeable()-->do_basic_setup()-->populate_rootfs()中調用unpack_to_rootfs()中解壓。
kernel_init() -->kernel_init_freeable()-------------------------------在執行完do_basic_setup(),即完成各類initcall以後,判斷ramdisk_execute_command命令。
-->free_initmem()---------------------------------------釋放__init_begin到__init_end之間的內存。
-->do_basic_setup()
-->populate_rootfs()---------------------------------解壓__initramfs_start包含的ramdisk到rootfs中。
-->run_init_process(ramdisk_execute_command)------------執行ramdisk_execute_command命令替代當前進程。
在start_kernel()以前,從dts中解析出initrd和root相關參數。
調用early_init_dt_scan()-->early_init_dt_scan_nodes-->early_init_dt_scan_nodes():
void __init early_init_dt_scan_nodes(void) { /* Retrieve various information from the /chosen node */ of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); ... } int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { ... early_init_dt_check_for_initrd(node); ... } static void __init early_init_dt_check_for_initrd(unsigned long node) { u64 start, end; int len; const __be32 *prop; pr_debug("Looking for initrd properties... "); prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len); if (!prop) return; start = of_read_number(prop, len/4); prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len); if (!prop) return; end = of_read_number(prop, len/4); __early_init_dt_declare_initrd(start, end); pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", (unsigned long long)start, (unsigned long long)end); }
關於initrd_start和initrd_end,從early_init_dt_check_for_initrd()可知,若是dts中沒有設置"linux,initrd-start"和"linux,initrd-end",那麼initrd_start和initrd_end這兩個參數都是原始值0。
#ifdef CONFIG_BLK_DEV_INITRD #ifndef __early_init_dt_declare_initrd static void __early_init_dt_declare_initrd(unsigned long start, unsigned long end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); initrd_below_start_ok = 1; } #endif
rootfs其實不是一種實際的文件系統,他根據實際狀況可能使用ramfs或者tmpfs。
這裏分析rootfs是如何對應ramfs,而且簡單介紹ramfs。
在start_kernel()-->vfs_caches_init()-->mnt_init()中,註冊rootfs類型的文件系統。
void __init mnt_init(void) { ... fs_kobj = kobject_create_and_add("fs", NULL); if (!fs_kobj) printk(KERN_WARNING "%s: kobj create error\n", __func__); init_rootfs(); init_mount_tree(); } int __init init_rootfs(void) { int err = register_filesystem(&rootfs_fs_type); if (err) return err; if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] && (!root_fs_names || strstr(root_fs_names, "tmpfs"))) {---------沒有指定saved_root_name而且root_fs_names爲tmpfs時候,初始化tmpfs文件系統。 err = shmem_init();-------------------------------------------初始化tmpfs文件系統。 is_tmpfs = true;----------------------------------------------後面rootfs_mount()會須要判斷是使用tmpfs仍是ramfs做爲文件系統類型。 } else { err = init_ramfs_fs();----------------------------------------初始化ramfs文件系統。 } ... } static void __init init_mount_tree(void) { struct vfsmount *mnt; struct mnt_namespace *ns; struct path root; struct file_system_type *type; type = get_fs_type("rootfs");-------------------------------------獲取rootfs對應的file_system_type,這裏對應的是ramfs操做函數。 if (!type) panic("Can't find rootfs type"); mnt = vfs_kern_mount(type, 0, "rootfs", NULL);--------------------這裏會調用mount_fs(),進而調用rootfs_fs_type->mount(),即rootfs_mount()。 put_filesystem(type); if (IS_ERR(mnt)) panic("Can't create rootfs"); ns = create_mnt_ns(mnt); if (IS_ERR(ns)) panic("Can't allocate initial namespace"); init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); root.mnt = mnt; root.dentry = mnt->mnt_root; mnt->mnt_flags |= MNT_LOCKED; set_fs_pwd(current->fs, &root); set_fs_root(current->fs, &root); }
下面來看看rootfs文件系統是如何掛載的?rootfs沒有本身的固定類型,或者使用ramfs或者使用tmpfs。
static bool is_tmpfs; static struct dentry *rootfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { static unsigned long once; void *fill = ramfs_fill_super; if (test_and_set_bit(0, &once)) return ERR_PTR(-ENODEV); if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs) fill = shmem_fill_super; return mount_nodev(fs_type, flags, data, fill);--------------這裏的fill究竟用的是ramfs仍是tmpfs,在init_roofs()中已經決定。 } static struct file_system_type rootfs_fs_type = { .name = "rootfs", .mount = rootfs_mount, .kill_sb = kill_litter_super, }; struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)) { int error; struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);------調用ramfs_fill_super()或者shmem_fill_super()。 if (error) { deactivate_locked_super(s); return ERR_PTR(error); } s->s_flags |= MS_ACTIVE; return dget(s->s_root); } int ramfs_fill_super(struct super_block *sb, void *data, int silent) { struct ramfs_fs_info *fsi; struct inode *inode; int err; save_mount_options(sb, data); fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); sb->s_fs_info = fsi; if (!fsi) return -ENOMEM; err = ramfs_parse_options(data, &fsi->mount_opts); if (err) return err; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = RAMFS_MAGIC; sb->s_op = &ramfs_ops;--------------------------rootfs最終使用的仍是ramfs文件系統類型的操做函數,若是是tmpfs則使用shmem_ops。 sb->s_time_gran = 1; inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); sb->s_root = d_make_root(inode);-----------------------建立根節點"/"。 if (!sb->s_root) return -ENOMEM; return 0; } struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; if (root_inode) { res = __d_alloc(root_inode->i_sb, NULL);-----------在name參數爲NULL的時候,即建立根節點"/"。 if (res) d_instantiate(res, root_inode); else iput(root_inode); } return res; }
綜上所述,在內核啓動是init_rootfs()首先根據參數來肯定是使用tmpfs仍是ramfs,而後在init_mount_tree()進行掛載。
ramfs根據請求的mode類型選擇合適的inode或者file操做類型。
struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode, dev_t dev) { struct inode * inode = new_inode(sb); printk("lubaoquan %s line=%d\n", __func__, __LINE__); if (inode) { inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_mapping->a_ops = &ramfs_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_unevictable(inode->i_mapping); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev);---------------------處理char、block、pipefifo等類型的文件。 break; case S_IFREG:-------------------------------------------------處理普通文件。 inode->i_op = &ramfs_file_inode_operations; inode->i_fop = &ramfs_file_operations; break; case S_IFDIR:-------------------------------------------------處理目錄。 inode->i_op = &ramfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); break; case S_IFLNK:-------------------------------------------------處理link文件。 inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); break; } } return inode; } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_mode = mode; if (S_ISCHR(mode)) { inode->i_fop = &def_chr_fops; inode->i_rdev = rdev; } else if (S_ISBLK(mode)) { inode->i_fop = &def_blk_fops; inode->i_rdev = rdev; } else if (S_ISFIFO(mode)) inode->i_fop = &pipefifo_fops; else if (S_ISSOCK(mode)) ; /* leave it no_open_fops */ else printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" " inode %s:%lu\n", mode, inode->i_sb->s_id, inode->i_ino); } const struct file_operations ramfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = noop_fsync, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, .get_unmapped_area = ramfs_mmu_get_unmapped_area, }; const struct inode_operations ramfs_file_inode_operations = { .setattr = simple_setattr, .getattr = simple_getattr, }; static const struct inode_operations ramfs_dir_inode_operations = { .create = ramfs_create, .lookup = simple_lookup, .link = simple_link, .unlink = simple_unlink, .symlink = ramfs_symlink, .mkdir = ramfs_mkdir, .rmdir = simple_rmdir, .mknod = ramfs_mknod, .rename = simple_rename, }; const struct inode_operations page_symlink_inode_operations = { .readlink = generic_readlink, .get_link = page_get_link, };
根據inode->i_mode不一樣類型,採起不一樣inode->i_fop和inode->i_op。
全部的initcall在start_kernel()-->reset_init()-->kernel_init()-->kernel_init_freeable()-->do_basic_setup()中依次調用initcall。
其中rootfs_initcall()在fs_initcall()以後,在device_initcall()以前。
#define fs_initcall(fn) __define_initcall(fn, 5) #define fs_initcall_sync(fn) __define_initcall(fn, 5s) #define rootfs_initcall(fn) __define_initcall(fn, rootfs) #define device_initcall(fn) __define_initcall(fn, 6) #define device_initcall_sync(fn) __define_initcall(fn, 6s)
rootfs_initcall()在沒有定義CONFIG_BLK_DEV_INITRD的狀況下,調用default_rootfs()。
default_rootfs()主要生成兩個目錄/dev和/root,以及一個設備文件/dev/console。
static int __init default_rootfs(void) { int err; err = sys_mkdir((const char __user __force *) "/dev", 0755); if (err < 0) goto out; err = sys_mknod((const char __user __force *) "/dev/console", S_IFCHR | S_IRUSR | S_IWUSR, new_encode_dev(MKDEV(5, 1))); if (err < 0) goto out; err = sys_mkdir((const char __user __force *) "/root", 0700); if (err < 0) goto out; return 0; out: printk(KERN_WARNING "Failed to create a rootfs\n"); return err; }
在定義CONFIG_BLK_DEV_INITRD的狀況下,調用populate_rootfs()將ramdisk解壓到RAM中。
unpack_to_rootfs()根據參數__initramfs_start和__initramfs_size,從頭部獲取decompress的類型;而後調用decompress_fn進行解壓縮。
static int __init populate_rootfs(void) { char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err) panic("%s", err); /* Failed to decompress INTERNAL initramfs */ if (initrd_start) {---------------------------------------------判斷是否特別指定了initrd_start。若是指定,就對initrd進行單獨處理。 #ifdef CONFIG_BLK_DEV_RAM int fd; printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start);-----------------------------判斷加載的是否是initramfs CPIO文件。 if (!err) { free_initrd();------------------------------------------若是解壓成功,釋放image中initrd對應內存。 goto done; } else { clean_rootfs(); unpack_to_rootfs(__initramfs_start, __initramfs_size);--多是initrd文件。 } printk(KERN_INFO "rootfs image is not initramfs (%s)" "; looks like an initrd\n", err); fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);--------------------------建立文件/initrd.image。 if (fd >= 0) { ssize_t written = xwrite(fd, (char *)initrd_start, initrd_end - initrd_start);-----------------將intird_start到initrd_end內容保存到/initrd.image文件中。 if (written != initrd_end - initrd_start) pr_err("/initrd.image: incomplete write (%zd != %ld)\n", written, initrd_end - initrd_start); sys_close(fd); free_initrd();------------------------------------------關閉文件並釋放image中initrd對應內存。 } done: #else printk(KERN_INFO "Unpacking initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start); if (err) printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); free_initrd(); #endif load_default_modules(); } return 0; } static char * __init unpack_to_rootfs(char *buf, unsigned long len) { long written; decompress_fn decompress; const char *compress_name; static __initdata char msg_buf[64]; header_buf = kmalloc(110, GFP_KERNEL); symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); if (!header_buf || !symlink_buf || !name_buf) panic("can't allocate buffers"); state = Start; this_header = 0; message = NULL; while (!message && len) { ... decompress = decompress_method(buf, len, &compress_name);------根據buf的第一、2個字節的magic來判斷decompress類型。好比這裏對應gzip,因此返回值decompress及對應gunzip()。 pr_debug("Detected %s compressed data\n", compress_name); if (decompress) { int res = decompress(buf, len, NULL, flush_buffer, NULL, &my_inptr, error); if (res) error("decompressor failed"); } else if (compress_name) { ... } else error("junk in compressed archive"); if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; buf += my_inptr; len -= my_inptr; } dir_utime(); kfree(name_buf); kfree(symlink_buf); kfree(header_buf); return message; }
內核中支持的decompressor用struct compress_format表示,核心是decompress_fn()函數。
struct compress_format { unsigned char magic[2]; const char *name; decompress_fn decompressor; }; typedef int (*decompress_fn) (unsigned char *inbuf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), unsigned char *outbuf, long *posp, void(*error)(char *x)); /* inbuf - input buffer *len - len of pre-read data in inbuf *fill - function to fill inbuf when empty *flush - function to write out outbuf *outbuf - output buffer *posp - if non-null, input position (number of bytes read) will be * returned here
decompress_method根據傳入的inbuf頭部兩字節來判斷對應空間所採起的decompressor。
decompressed_formats[]保存了系統支持的decompressor類型。
static const struct compress_format compressed_formats[] __initconst = { { {0x1f, 0x8b}, "gzip", gunzip }, { {0x1f, 0x9e}, "gzip", gunzip }, { {0x42, 0x5a}, "bzip2", bunzip2 }, { {0x5d, 0x00}, "lzma", unlzma }, { {0xfd, 0x37}, "xz", unxz }, { {0x89, 0x4c}, "lzo", unlzo }, { {0x02, 0x21}, "lz4", unlz4 }, { {0, 0}, NULL, NULL } }; decompress_fn __init decompress_method(const unsigned char *inbuf, long len, const char **name) { ... pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]); for (cf = compressed_formats; cf->name; cf++) { if (!memcmp(inbuf, cf->magic, 2))------------------------遍歷compressed_formats[]知道找到吻合的magic做爲後續ramfs解壓工具。 break; } if (name) *name = cf->name; return cf->decompressor; }
gzip類型對應的decompres_fn()爲gunzip,這裏不深刻研究,可是入參flush()函數跟ramfs密切相關。
STATIC int INIT gunzip(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), unsigned char *out_buf, long *pos, void (*error)(char *x)) { return __gunzip(buf, len, fill, flush, out_buf, 0, pos, error); } STATIC int INIT __gunzip(unsigned char *buf, long len, long (*fill)(void*, unsigned long), long (*flush)(void*, unsigned long), unsigned char *out_buf, long out_len, long *pos, void(*error)(char *x)) { u8 *zbuf; struct z_stream_s *strm; int rc; rc = -1; if (flush) { out_len = 0x8000; /* 32 K */ out_buf = malloc(out_len);-----------------------以32K爲單位進行處理。 } else { if (!out_len) out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ } ... while (rc == Z_OK) { ... rc = zlib_inflate(strm, 0); /* Write any data generated */ if (flush && strm->next_out > out_buf) { long l = strm->next_out - out_buf; if (l != flush(out_buf, l)) {-----------------將解壓後的數據刷出,這裏即調用flush_buffer()進行處理。 rc = -1; error("write error"); break; } strm->next_out = out_buf; strm->avail_out = out_len; } /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ if (rc == Z_STREAM_END) { rc = 0; break; } else if (rc != Z_OK) { error("uncompression error"); rc = -1; } } zlib_inflateEnd(strm); if (pos) /* add + 8 to skip over trailer */ *pos = strm->next_in - zbuf+8; gunzip_5: free(strm->workspace); gunzip_nomem4: free(strm); gunzip_nomem3: if (!buf) free(zbuf); gunzip_nomem2: if (flush) free(out_buf); gunzip_nomem1: return rc; /* returns Z_OK (0) if successful */ }
由以上分析可知rootfs採用了ramfs文件系統類型。
ramfs部分經過gzip進行解壓縮,而後將解壓的內容經過flush_buffer刷出。
下面就來看看flush_buffer()是如何將__initramfs_start開始__initramfs_size大小的內存刷成rootfs文件系統的。
flush_buffer()調用write_buffer進行處理,這裏一個核心是經過不一樣狀態機state調用不一樣actions[state]進行處理。
static long __init write_buffer(char *buf, unsigned long len) { byte_count = len; victim = buf; while (!actions[state]()) ; return len - byte_count; } static long __init flush_buffer(void *bufv, unsigned long len) { char *buf = (char *) bufv; long written; long origLen = len; if (message) return -1; while ((written = write_buffer(buf, len)) < len && !message) { ... } return origLen; }
actions[]能夠說是將解壓後數據轉換並生成rootfs的核心。
actions[]調用相應的系統調用,按照解壓數據一步一步生成整個文件系統。
static __initdata int (*actions[])(void) = { [Start] = do_start, [Collect] = do_collect, [GotHeader] = do_header, [SkipIt] = do_skip, [GotName] = do_name, [CopyFile] = do_copy, [GotSymlink] = do_symlink, [Reset] = do_reset, }; static int __init do_start(void) { read_into(header_buf, 110, GotHeader);----------------------讀取開頭110字節,用於解析cpio文件頭。 return 0; } static int __init do_collect(void) { unsigned long n = remains; if (byte_count < n) n = byte_count; memcpy(collect, victim, n); eat(n); collect += n; if ((remains -= n) != 0) return 1; state = next_state; return 0; } static int __init do_header(void) { if (memcmp(collected, "070707", 6)==0) {---------------------cpio文件的magic,開頭6個字節「070707」或者「070701」。 error("incorrect cpio method used: use -H newc option"); return 1; } if (memcmp(collected, "070701", 6)) { error("no cpio magic"); return 1; } parse_header(collected); next_header = this_header + N_ALIGN(name_len) + body_len; next_header = (next_header + 3) & ~3; state = SkipIt; if (name_len <= 0 || name_len > PATH_MAX) return 0; if (S_ISLNK(mode)) { if (body_len > PATH_MAX) return 0; collect = collected = symlink_buf; remains = N_ALIGN(name_len) + body_len; next_state = GotSymlink; state = Collect; return 0; } if (S_ISREG(mode) || !body_len) read_into(name_buf, N_ALIGN(name_len), GotName); return 0; } static int __init do_skip(void) { if (this_header + byte_count < next_header) { eat(byte_count); return 1; } else { eat(next_header - this_header); state = next_state; return 0; } } static int __init do_reset(void) { while (byte_count && *victim == '\0') eat(1); if (byte_count && (this_header & 3)) error("broken padding"); return 1; } static int __init maybe_link(void) { if (nlink >= 2) { char *old = find_link(major, minor, ino, mode, collected); if (old) return (sys_link(old, collected) < 0) ? -1 : 1; } return 0; } static void __init clean_path(char *path, umode_t fmode) { struct stat st; if (!sys_newlstat(path, &st) && (st.st_mode ^ fmode) & S_IFMT) { if (S_ISDIR(st.st_mode))--------------------------------刪除目錄,若是確實是一個目錄調用sys_rmdir();若是是一個link,只須要sys_unlink()。 sys_rmdir(path); else sys_unlink(path); } } static __initdata int wfd; static int __init do_name(void) { state = SkipIt; next_state = Reset; if (strcmp(collected, "TRAILER!!!") == 0) { free_hash(); return 0; } clean_path(collected, mode); if (S_ISREG(mode)) {---------------------------------------若是是一個普通文件,調用sys_open()建立文件,而且經過sys_fchown()和sys_fchmod()等進行屬性修改。 int ml = maybe_link(); if (ml >= 0) { int openflags = O_WRONLY|O_CREAT; if (ml != 1) openflags |= O_TRUNC; wfd = sys_open(collected, openflags, mode); if (wfd >= 0) { sys_fchown(wfd, uid, gid); sys_fchmod(wfd, mode); if (body_len) sys_ftruncate(wfd, body_len); vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile;-----------------------------而後進行do_copy()將gzip解壓的數據寫入wfd中。 } } } else if (S_ISDIR(mode)) {-------------------------------若是是一個目錄則調用sys_mkdir()建立目錄。 sys_mkdir(collected, mode); sys_chown(collected, uid, gid); sys_chmod(collected, mode); dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { sys_mknod(collected, mode, rdev); sys_chown(collected, uid, gid); sys_chmod(collected, mode); do_utime(collected, mtime); } } return 0; } static int __init do_copy(void) { if (byte_count >= body_len) {-----------------------------將數據寫入wfd中,若是遇到寫完則關閉文件,而且更新do_utime()。 if (xwrite(wfd, victim, body_len) != body_len) error("write error"); sys_close(wfd); do_utime(vcollected, mtime); kfree(vcollected); eat(body_len); state = SkipIt; return 0; } else { if (xwrite(wfd, victim, byte_count) != byte_count) error("write error"); body_len -= byte_count; eat(byte_count); return 1; } } static int __init do_symlink(void) { collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); sys_symlink(collected + N_ALIGN(name_len), collected);-------對於符號連接調用sys_symlink()建立符號。 sys_lchown(collected, uid, gid); do_utime(collected, mtime); state = SkipIt; next_state = Reset; return 0; }
經過上面一系列actions[]函數可知,gzip解壓後的數據通過複雜的mode跳轉到不一樣函數處理buffer。
最終仍是經過內核中調用相似open()/write()/close()/mkdir()系統調用一樣功能函數,建立完整的rootfs。
在全部的initcall執行完畢後,調用free_initmem()來釋放內存。
void free_initmem(void) { unsigned long addr; addr = (unsigned long) &__init_begin; while (addr < (unsigned long) &__init_end) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); free_page(addr);---------------------每次釋放一個頁面。 totalram_pages++;--------------------totalram_pages遞增。 addr += PAGE_SIZE;-------------------addr後移一個頁面。 } pr_info("Freeing unused kernel memory: %dk freed\n", ((unsigned int)&__init_end - (unsigned int)&__init_begin) >> 10); }
因爲存放ramdisk的段.init.ramfs在__init_begin和__init_end之間,全部也會被一同釋放。
kernel_init()是用戶空間第一個進程,和ramdisk相關的有ramfs文件系統類型準備;ramdisk解壓;啓動ramdisk_execute_command來替代當前進程。
static int __ref kernel_init(void *unused) { int ret; kernel_init_freeable();--------執行各類initcall,包括對ramfs註冊和populate_rootfs()解壓ramdisk;以及判斷ramdisk_execute_command是否存在,不然prepare_namespace() ... if (ramdisk_execute_command) { ret = run_init_process(ramdisk_execute_command); if (!ret) return 0; pr_err("Failed to execute %s (error %d)\n", ramdisk_execute_command, ret); }... panic("No working init found. Try passing init= option to kernel. " "See Linux Documentation/init.txt for guidance."); }
kernel_init_freeable()中註冊ramfs文件系統類型,而且將vmlinux中__initramfs_start開始__initramfs_size大小的代碼解壓到rootfs。
而後sys_access()檢查rootfs中是否存在ramdisk_execute_command,沒有則須要prepare_namespace()準備rootfs。
static noinline void __init kernel_init_freeable(void) { ... if (!ramdisk_execute_command) ramdisk_execute_command = "/init"; if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { ramdisk_execute_command = NULL; prepare_namespace(); } ... }
run_init_process()根據init_filename從rootfs中啓動,替代當前進程,做爲用戶空間第一個進程。
static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; return do_execve(getname_kernel(init_filename),--------------------------init_filename對應/sbin/init。 (const char __user *const __user *)argv_init,------------------------argv_init[0]對應/sbin/init,其餘爲空。 (const char __user *const __user *)envp_init);-----------------------envp_init[0]對應"HOME=/",envp_init[1]對應"TERM=linux"。 } int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, int flags) { char *pathbuf = NULL; struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; int retval; if (IS_ERR(filename)) return PTR_ERR(filename); if ((current->flags & PF_NPROC_EXCEEDED) && atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } current->flags &= ~PF_NPROC_EXCEEDED; retval = unshare_files(&displaced); if (retval) goto out_ret; retval = -ENOMEM; bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) goto out_files; retval = prepare_bprm_creds(bprm); if (retval) goto out_free; check_unsafe_exec(bprm); current->in_execve = 1; file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; sched_exec(); bprm->file = file; if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd); else pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s", fd, filename->name); if (!pathbuf) { retval = -ENOMEM; goto out_unmark; } if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; bprm->filename = pathbuf; } bprm->interp = bprm->filename; retval = bprm_mm_init(bprm); if (retval) goto out_unmark; bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) goto out; bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) goto out; retval = prepare_binprm(bprm); if (retval < 0) goto out; retval = copy_strings_kernel(1, &bprm->filename, bprm); if (retval < 0) goto out; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; would_dump(bprm, bprm->file); retval = exec_binprm(bprm); if (retval < 0) goto out; /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); kfree(pathbuf); putname(filename); if (displaced) put_files_struct(displaced); return retval; out: if (bprm->mm) { acct_arg_size(bprm, 0); mmput(bprm->mm); } out_unmark: current->fs->in_exec = 0; current->in_execve = 0; out_free: free_bprm(bprm); kfree(pathbuf); out_files: if (displaced) reset_files_struct(displaced); out_ret: putname(filename); return retval; }
綜上所述,在buildroot或者kernel編譯時打開ramdisk功能後,ramdisk會嵌入在vmlinux中。
在Linux啓動階段,經過populate_rootfs()將ramdisk從代碼中讀出。而後調用gzip decompressor解壓到RAM中,解壓後的數據通過actions[]解析轉換成rootfs文件系統。
在init初始化完成後,ramfs相關內存隨着init內存一塊兒釋放,迴歸totalram_pages。
在kernel_init()最後階段經過run_init_process()執行ramdisk中的init進程,做爲用戶空間第一個進程。