readdir函數解析

函數原型:node

struct dirent *readdir(DIR *dirp);

首先糾正一個不少人都錯誤理解的事實,readdir不是系統調用,它是glibc的封裝函數,並且readdir系統調用是存在的,原型以下:linux

int readdir(unsigend int fd, struct old_linux_dirent *dirp, unsigned int count);

glibc的readdir所調用的系統調用不是readdir而是getdents系統調用。此處說明一下爲何採用封裝getdents而不是readdir系統調用,最重要的一個理由是readdir系統調用每次只會讀入一個目錄項,而getdents會一會兒讀入儘量多的目錄項至緩衝。我先分析readdir系統調用的實現,具體的代碼以下:函數

複製代碼

 1 SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
 2                 struct old_linux_dirent __user *, dirent, unsigned int, count)
 3 {
 4         int error;
 5         struct fd f = fdget(fd);
 6         struct readdir_callback buf = {
 7                 .ctx.actor = fillonedir,
 8                 .dirent = dirent
 9         };
10 
11         if (!f.file)
12                 return -EBADF;
13 
14         error = iterate_dir(f.file, &buf.ctx);
15         if (buf.result)
16                 error = buf.result;
17 
18         fdput(f);
19         return error;
20 }

複製代碼

6-9行:設置目錄項填充函數爲fillonedir,filonedir的具體實現不分析,只需知道每次只填充一個目錄項便可(做爲替代,將會分析更加複雜的filldir函數)ui

14行:iterate_dir是vfs的封裝函數,該函數調用具體的文件系統的iterate函數填充目錄this

    注: 3.11以前並不使用iterate做爲讀目錄的函數而是使用readdir函數spa

總結:readdir系統調用忽略了count參數而且每次只讀一個目錄項.net

接下來分析glibc的readdir函數實現,這個過程可能比較複雜,有興趣的能夠看看,首先給出readdir的實現:設計

複製代碼

 1 DIRENT_TYPE *
 2 __READDIR (DIR *dirp)
 3 {
 4   DIRENT_TYPE *dp;
 5   int saved_errno = errno;
 6 
 7 #ifndef NOT_IN_libc
 8   __libc_lock_lock (dirp->lock);
 9 #endif
10 
11   do
12     {
13       size_t reclen;
14 
15       if (dirp->offset >= dirp->size) 
16         {
17           /* We've emptied out our buffer.  Refill it.  */
18 
19           size_t maxread;
20           ssize_t bytes;
21 
22 #ifndef _DIRENT_HAVE_D_RECLEN
23           /* Fixed-size struct; must read one at a time (see below).  */
24           maxread = sizeof *dp;
25 #else
26           maxread = dirp->allocation;
27 #endif
28 
29           bytes = __GETDENTS (dirp->fd, dirp->data, maxread);
30           if (bytes <= 0)
31             {
32               /* On some systems getdents fails with ENOENT when the
33                  open directory has been rmdir'd already.  POSIX.1
34                  requires that we treat this condition like normal EOF.  */
35               if (bytes < 0 && errno == ENOENT)
36                 bytes = 0;
37 
38               /* Don't modifiy errno when reaching EOF.  */
39               if (bytes == 0)
40                 __set_errno (saved_errno);
41       dp = NULL;
42               break;
43             }
44           dirp->size = (size_t) bytes;
45 
46           /* Reset the offset into the buffer.  */
47           dirp->offset = 0;
48         }
49 
50       dp = (DIRENT_TYPE *) &dirp->data[dirp->offset];
51 
52 #ifdef _DIRENT_HAVE_D_RECLEN
53       reclen = dp->d_reclen;
54 #else
55       /* The only version of `struct dirent*' that lacks `d_reclen'
56          is fixed-size.  */
57       assert (sizeof dp->d_name > 1);
58       reclen = sizeof *dp;
59       /* The name is not terminated if it is the largest possible size.
60          Clobber the following byte to ensure proper null termination.  We
61          read jst one entry at a time above so we know that byte will not
62          be used later.  */
63       dp->d_name[sizeof dp->d_name] = '\0';
64 #endif
65 
66       dirp->offset += reclen;
67 
68 #ifdef _DIRENT_HAVE_D_OFF
69       dirp->filepos = dp->d_off;
70 #else
71       dirp->filepos += reclen;
72 #endif
73 
74       /* Skip deleted files.  */
75     } while (dp->d_ino == 0);
76  #ifndef NOT_IN_libc
77   __libc_lock_unlock (dirp->lock);
78 #endif
79 
80   return dp;
81 }

複製代碼

7-9行:加鎖互斥量orm

11-75行:一個do while循環,該循環用於過濾已經刪除的目錄項ip

15-48行:具體的讀目錄項代碼,調用getdents系統調用盡量多的讀入目錄項至dirp->data緩衝區

總結:代碼並非特別負責,本身閱讀應該能夠理解。。readdir函數的邏輯是分配的一個緩衝區,而後每次儘量多的讀取目錄項至緩衝區,而後從緩衝區讀,讀完了繼續調用getdents讀目錄項至緩衝區

接下來分析最重要的getdents系統調用,代碼以下:

複製代碼

 1 SYSCALL_DEFINE3(getdents, unsigned int, fd,
 2                 struct linux_dirent __user *, dirent, unsigned int, count)
 3 {
 4         struct fd f;
 5         struct linux_dirent __user * lastdirent;
 6         struct getdents_callback buf = {
 7                 .ctx.actor = filldir,
 8                 .count = count,
 9                 .current_dir = dirent
10         };
11         int error;
12 
13         if (!access_ok(VERIFY_WRITE, dirent, count))
14                 return -EFAULT;
15 
16         f = fdget(fd);
17         if (!f.file)
18                 return -EBADF;
19 
20         error = iterate_dir(f.file, &buf.ctx);
21         if (error >= 0)
22                 error = buf.error;
23         lastdirent = buf.previous;
24         if (lastdirent) {
25                 if (put_user(buf.ctx.pos, &lastdirent->d_off))
26                         error = -EFAULT;
27                 else
28                         error = count - buf.count;
29         }
30         fdput(f);
31         return error;
32 }

複製代碼

6-9行:設置填充函數爲filldir,等會分析該函數

20: 調用iterate_dir函數,該函數會調用具體的文件系統中的iterate函數,接下來做爲例子給出PFS的實現(PFS是本人設計的一個文件系統,PFS的linux driver有興趣的能夠去https://sourceforge.net/projects/pfspfs看看)

總結:在分析了iterate和filldir後在分析getdents系統調用

iterate源碼以下:(此處採用pfs的實現)

複製代碼

 1 static int
 2 pfs_readdir(struct file *file, struct dir_context *ctx)
 3 {
 4         int64_t dno;
 5         unsigned long off;
 6         struct buffer_head *bh;
 7         struct pfs_dir_entry *de;
 8         struct inode *inode = file_inode(file);
 9 
10         if(ctx->pos == 0)
11                 ctx->pos = PFS_DIRHASHSIZ * sizeof(int64_t) + sizeof(int64_t);
12         for(off = ctx->pos & (PFS_BLOCKSIZ - 1); ctx->pos < inode->i_size; off = ctx->pos & (PFS_BLOCKSIZ - 1)){
13                 if(!(dno = pfs_get_block_number(inode, pfs_block_number(ctx->pos), 0)))
14                         goto skip;
15                 if(!(bh = sb_bread(inode->i_sb, dno / PFS_STRS_PER_BLOCK))){
16                         pr_err("pfs: device %s: %s: failed to read block %lld of dir %lld\n",
17                                 inode->i_sb->s_id, "pfs_readdir", pfs_block_number(ctx->pos), PFS_I(inode)->i_ino);
18                         goto skip;
19                 }
20                 do{
21                         de = (struct pfs_dir_entry *)((char *)bh->b_data + off);
22                         if(de->d_ino){
23                                 if(!(dir_emit(ctx, pfs_get_de_name(de), de->d_len, (int32_t)le64_to_cpu(de->d_ino), DT_UNKNOWN))){
24                                         brelse(bh);
25                                         return 0;
26                                 }
27                         }
28                         off += pfs_get_de_size(de);
29                         ctx->pos += pfs_get_de_size(de);
30                 }while(off < PFS_BLOCKSIZ && ctx->pos < inode->i_size);
31                 brelse(bh);
32                 continue;
33 skip:
34                 ctx->pos += PFS_BLOCKSIZ - off;
35         }
36         return 0;
37 }

複製代碼

12-30行:代碼完整的分析可能須要讀者熟悉linux的內核,所以此處不給出代碼的具體分析而給出代碼的邏輯,pfs_readdir不斷的讀目錄項,而後調用dir_emit填充目錄項直到dir_emit調用失敗,dir_emit是一個封裝函數,實現爲filldir() == 0,因此在filldir成功時dir_emit返回1,在失敗時返回0

總結:不一樣的文件系統的目錄的iterate都不一樣,不過大致都是差很少的,都是讀目錄項,而後調用dir_emit函數填充至用戶空間

filldir函數的代碼以下:

複製代碼

 1 static int filldir(struct dir_context *ctx, const char *name, int namlen,
 2                    loff_t offset, u64 ino, unsigned int d_type)
 3 {
 4         struct linux_dirent __user * dirent;
 5         struct getdents_callback *buf =
 6                 container_of(ctx, struct getdents_callback, ctx);
 7         unsigned long d_ino;
 8         int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 9                 sizeof(long));
10 
11         buf->error = -EINVAL;   /* only used if we fail.. */
12         if (reclen > buf->count)
13                 return -EINVAL;
14         d_ino = ino;
15         if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
16                 buf->error = -EOVERFLOW;
17                 return -EOVERFLOW;
18         }
19         dirent = buf->previous;
20         if (dirent) {
21                 if (__put_user(offset, &dirent->d_off))
22                         goto efault;
23         }
24         dirent = buf->current_dir;
25         if (__put_user(d_ino, &dirent->d_ino))
26                 goto efault;
27         if (__put_user(reclen, &dirent->d_reclen))
28                 goto efault;
29         if (copy_to_user(dirent->d_name, name, namlen))
30                 goto efault;
31         if (__put_user(0, dirent->d_name + namlen))
32                 goto efault;
33         if (__put_user(d_type, (char __user *) dirent + reclen - 1))
34                 goto efault;
35       buf->previous = dirent;
36         dirent = (void __user *)dirent + reclen;
37         buf->current_dir = dirent;
38         buf->count -= reclen;
39         return 0;
40 efault:
41         buf->error = -EFAULT;
42         return -EFAULT;
43 }

複製代碼

函數解釋:filldir設置上一個填充的目錄項的d_off爲當前的偏移,而後填充當前的目錄項,設置buf->previous爲dirent,而後將buf->current指向下一個可用的空間

總結:最後做爲一個考慮整個過程的示例,給出telldir函數的解釋。telldir返回dirp->filepos, dirp->filepos在glibc的getdents函數中設置,dirp->filepos = dp->d_off,而正如在filldir的20到23行所示,dp->d_off是讀下一個目錄項時的偏移。

相關文章
相關標籤/搜索