2017-03-13node
上文針對VFS的基本信息作了介紹,並簡單介紹了VFS涉及的幾個數據機構,本節結合LInux源碼,對各個結構之間的關係進行分析。數組
1、整體架構圖緩存
整體架構圖如上圖所示,結合進程訪問文件的實際狀況,根據上圖進行細節化的描述。進程經過其結構中的files_struct結構和文件創建聯繫,看戲files_struct結構架構
struct files_struct { /* * read mostly part */ atomic_t count; struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP */ spinlock_t file_lock ____cacheline_aligned_in_smp; int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; };
首先是一個原子變量,記錄打開文件的個數,注意這裏不僅是普通文件,還包括設備文件等其餘文件。next_fd記錄當前下一個可用的文件描述符,用於在下次進程打開文件時快速分配。而close_on_exec_init和open_fds_init是位圖。fd_array是初始化狀態的文件描述符數組,而fdtab是真正管理文件描述符的結構。看下fdtable結構app
struct fdtable { unsigned int max_fds; struct file __rcu **fd; /* current fd array */ unsigned long *close_on_exec; unsigned long *open_fds; struct rcu_head rcu; };
max_fds表示最大的打開文件數,能夠更改。fd是一個指向文件描述符數組的指針,初始化爲files_struct結構中fd_array數組的地址,close_on_exec指向files_struct結構中的位域。open_fds是一個指向位域的指針,管理着當前打開的全部描述符,若是位域中的對應位被置位表示該描述符在使用中。函數
關於描述符表擴展的狀況,最後進行解釋。上面的描述符表中,都是指向file結構的指針。進程每打開一個文件,就會有一個file結構與之對應。換句話說,file結構記錄的某次進程對文件的某一次操做信息。看下file結構ui
struct file { /* * fu_list becomes invalid after file_free is called and queued via * fu_rcuhead for RCU freeing */ union { struct list_head fu_list; struct rcu_head fu_rcuhead; } f_u; struct path f_path; #define f_dentry f_path.dentry struct inode *f_inode; /* cached value */ const struct file_operations *f_op; /* * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR. * Must not be taken from IRQ context. */ spinlock_t f_lock; #ifdef CONFIG_SMP int f_sb_list_cpu; #endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; #ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; #ifdef CONFIG_DEBUG_WRITECOUNT unsigned long f_mnt_write_state; #endif };
同一個超級塊下打開的全部文件都會經過雙鏈錶鏈接起來。另外,file結構中主要由對應文件的inode緩存,在下次訪問不須要經過dentry查找inode了。還有一個path結構,該結構記錄當前文件的emulation項和vfsmount結構。其他記錄文件的權限模式、讀寫位置等信息,還有一個重要的函數表,保存操做文件的一些函數的指針。關鍵是一個進程在這裏能夠根據dentry查找到inode。到底是如何查找的呢?看下dentry的結構:this
struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ seqcount_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ /* Ref lookup also touches following */ unsigned int d_count; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ /* * d_child and d_rcu can share memory */ union { struct list_head d_child; /* child of parent list */ struct rcu_head d_rcu; } d_u; struct list_head d_subdirs; /* our children */ struct hlist_node d_alias; /* inode alias list */ };
dentry結構中有指向當前操做文件的inode指針,父目錄的dentry,,固然還包括文件名信息。注意這裏文件名並無做爲屬性保存在inode節點中,而是保存在dentry結構中。由於文件名對於系統來說主要來查找inode,而經過dentry能夠查找到inode,因此這裏其實dentry以後就不須要文件名了。經過dentry還能夠定位所屬 的超級塊。該結構中也有個函數表dentry_operations,主要是針對dentry的操做,如增長、刪除dentry。一個目錄下的全部子目錄會造成一個鏈表,d_subdirs是鏈表頭。而d_child做爲一個節點,鏈接到父目錄的子鏈表中。上節已經提到,系統中全部的dentry經過一個hash表維護起來,以便於查找。表頭是全局變量dentry_hashtable.而對於未使用的dentry,內核使用dentry_unused全局鏈表來組織。由於每一個父目錄均會有一條本身子目錄的鏈表,因此係統中還存在一個dentry樹。atom
到目前爲止已經找到了具體的inode,inode記錄文件的真實屬性信息,修改時間,是不是髒,以及在內存中的映射信息。看下inode結構spa
struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void *i_security; #endif /* Stat data, not accessed from path walking */ unsigned long i_ino; /* * Filesystems may only read i_nlink directly. They shall use the * following functions for modification: * * (set|clear|inc|drop)_nlink * inode_(inc|dec)_link_count */ union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; unsigned int i_blkbits; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif /* Misc */ unsigned long i_state; struct mutex i_mutex; unsigned long dirtied_when; /* jiffies of first dirtying */ struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; u64 i_version; atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ struct file_lock *i_flock; struct address_space i_data; #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; #endif struct list_head i_devices; union { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct cdev *i_cdev; }; __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ struct hlist_head i_fsnotify_marks; #endif #ifdef CONFIG_IMA atomic_t i_readcount; /* struct files open RO */ #endif void *i_private; /* fs or device private pointer */ };
inode是一個比較龐大的結構,開頭記錄了文件的權限信息,如用戶、用戶組等。該結構中有個函數表inode_operations,記錄針對inode的一些操做。inode中還有指向當前文件所屬文件系統的超級塊結構。固然一個相當重要的就是address_space 類型的i_mapping指針了。其指向一個address_space 結構,記錄當前文件在內存中的映射狀況。這點等會在分析。除此以外,記錄文件的一些時間信息。前文說過,inode在內存中有三種類型:位於內存中但未使用的;位於內存中正在使用的;位於內存中已經發生變化即須要寫會到磁盤的,前兩種都是全局鏈表,第三種特定於超級塊結構。除此以外,inode還在一個hash表中出現,表頭是inode_hashtable,支持根據inode編號和超級塊快速訪問inode。
到此進程已經找到了具體的inode節點,後來又是如何把文件映射到內存中呢?一個核心結構就是address_space,先看下該結構
struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct rb_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ unsigned long flags; /* error bits/gfp mask */ struct backing_dev_info *backing_dev_info; /* device readahead, etc */ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ } __attribute__((aligned(sizeof(long))));
該結構特定於inode節點存在,多個進程能夠共享同一個文件,因此在特定於進程的file結構中,有一個指向該inode address_space的指針f_mapping。具體的訪問位置記錄在file結構中。address_space僅僅負責對文件的映射,該結構管理了對應文件映射的全部內存區域vm_area_struct實例。上面的i_map做爲一個紅黑樹根,關聯全部的vm_area_struct,而i_mmap_nonliner是一個雙向鏈表,關聯全部非線性映射的vm_area_struct實例。該結構中還記錄了所屬inode節點的指針host,區域包含的虛擬頁面的數量nrpages,固然還有一組操做函數,用於和設備交互,如讀取一個頁或者寫入一個頁,設置頁面爲髒等。關於進程虛擬內存的管理,參考另外一篇文章: