函數原型:node
struct dirent *readdir(DIR *dirp);
首先糾正一個不少人都錯誤理解的事實,readdir不是系統調用,它是glibc的封裝函數,並且readdir系統調用是存在的,原型以下:linux
int readdir(unsigend int fd, struct old_linux_dirent *dirp, unsigned int count);
glibc的readdir所調用的系統調用不是readdir而是getdents系統調用。此處說明一下爲何採用封裝getdents而不是readdir系統調用,最重要的一個理由是readdir系統調用每次只會讀入一個目錄項,而getdents會一會兒讀入儘量多的目錄項至緩衝。我先分析readdir系統調用的實現,具體的代碼以下:函數
1 SYSCALL_DEFINE3(old_readdir, unsigned int, fd, 2 struct old_linux_dirent __user *, dirent, unsigned int, count) 3 { 4 int error; 5 struct fd f = fdget(fd); 6 struct readdir_callback buf = { 7 .ctx.actor = fillonedir, 8 .dirent = dirent 9 }; 10 11 if (!f.file) 12 return -EBADF; 13 14 error = iterate_dir(f.file, &buf.ctx); 15 if (buf.result) 16 error = buf.result; 17 18 fdput(f); 19 return error; 20 }
6-9行:設置目錄項填充函數爲fillonedir,filonedir的具體實現不分析,只需知道每次只填充一個目錄項便可(做爲替代,將會分析更加複雜的filldir函數)ui
14行:iterate_dir是vfs的封裝函數,該函數調用具體的文件系統的iterate函數填充目錄this
注: 3.11以前並不使用iterate做爲讀目錄的函數而是使用readdir函數spa
總結:readdir系統調用忽略了count參數而且每次只讀一個目錄項.net
接下來分析glibc的readdir函數實現,這個過程可能比較複雜,有興趣的能夠看看,首先給出readdir的實現:設計
1 DIRENT_TYPE * 2 __READDIR (DIR *dirp) 3 { 4 DIRENT_TYPE *dp; 5 int saved_errno = errno; 6 7 #ifndef NOT_IN_libc 8 __libc_lock_lock (dirp->lock); 9 #endif 10 11 do 12 { 13 size_t reclen; 14 15 if (dirp->offset >= dirp->size) 16 { 17 /* We've emptied out our buffer. Refill it. */ 18 19 size_t maxread; 20 ssize_t bytes; 21 22 #ifndef _DIRENT_HAVE_D_RECLEN 23 /* Fixed-size struct; must read one at a time (see below). */ 24 maxread = sizeof *dp; 25 #else 26 maxread = dirp->allocation; 27 #endif 28 29 bytes = __GETDENTS (dirp->fd, dirp->data, maxread); 30 if (bytes <= 0) 31 { 32 /* On some systems getdents fails with ENOENT when the 33 open directory has been rmdir'd already. POSIX.1 34 requires that we treat this condition like normal EOF. */ 35 if (bytes < 0 && errno == ENOENT) 36 bytes = 0; 37 38 /* Don't modifiy errno when reaching EOF. */ 39 if (bytes == 0) 40 __set_errno (saved_errno); 41 dp = NULL; 42 break; 43 } 44 dirp->size = (size_t) bytes; 45 46 /* Reset the offset into the buffer. */ 47 dirp->offset = 0; 48 } 49 50 dp = (DIRENT_TYPE *) &dirp->data[dirp->offset]; 51 52 #ifdef _DIRENT_HAVE_D_RECLEN 53 reclen = dp->d_reclen; 54 #else 55 /* The only version of `struct dirent*' that lacks `d_reclen' 56 is fixed-size. */ 57 assert (sizeof dp->d_name > 1); 58 reclen = sizeof *dp; 59 /* The name is not terminated if it is the largest possible size. 60 Clobber the following byte to ensure proper null termination. We 61 read jst one entry at a time above so we know that byte will not 62 be used later. */ 63 dp->d_name[sizeof dp->d_name] = '\0'; 64 #endif 65 66 dirp->offset += reclen; 67 68 #ifdef _DIRENT_HAVE_D_OFF 69 dirp->filepos = dp->d_off; 70 #else 71 dirp->filepos += reclen; 72 #endif 73 74 /* Skip deleted files. */ 75 } while (dp->d_ino == 0); 76 #ifndef NOT_IN_libc 77 __libc_lock_unlock (dirp->lock); 78 #endif 79 80 return dp; 81 }
7-9行:加鎖互斥量orm
11-75行:一個do while循環,該循環用於過濾已經刪除的目錄項ip
15-48行:具體的讀目錄項代碼,調用getdents系統調用盡量多的讀入目錄項至dirp->data緩衝區
總結:代碼並非特別負責,本身閱讀應該能夠理解。。readdir函數的邏輯是分配的一個緩衝區,而後每次儘量多的讀取目錄項至緩衝區,而後從緩衝區讀,讀完了繼續調用getdents讀目錄項至緩衝區
接下來分析最重要的getdents系統調用,代碼以下:
1 SYSCALL_DEFINE3(getdents, unsigned int, fd, 2 struct linux_dirent __user *, dirent, unsigned int, count) 3 { 4 struct fd f; 5 struct linux_dirent __user * lastdirent; 6 struct getdents_callback buf = { 7 .ctx.actor = filldir, 8 .count = count, 9 .current_dir = dirent 10 }; 11 int error; 12 13 if (!access_ok(VERIFY_WRITE, dirent, count)) 14 return -EFAULT; 15 16 f = fdget(fd); 17 if (!f.file) 18 return -EBADF; 19 20 error = iterate_dir(f.file, &buf.ctx); 21 if (error >= 0) 22 error = buf.error; 23 lastdirent = buf.previous; 24 if (lastdirent) { 25 if (put_user(buf.ctx.pos, &lastdirent->d_off)) 26 error = -EFAULT; 27 else 28 error = count - buf.count; 29 } 30 fdput(f); 31 return error; 32 }
6-9行:設置填充函數爲filldir,等會分析該函數
20: 調用iterate_dir函數,該函數會調用具體的文件系統中的iterate函數,接下來做爲例子給出PFS的實現(PFS是本人設計的一個文件系統,PFS的linux driver有興趣的能夠去https://sourceforge.net/projects/pfspfs看看)
總結:在分析了iterate和filldir後在分析getdents系統調用
iterate源碼以下:(此處採用pfs的實現)
1 static int 2 pfs_readdir(struct file *file, struct dir_context *ctx) 3 { 4 int64_t dno; 5 unsigned long off; 6 struct buffer_head *bh; 7 struct pfs_dir_entry *de; 8 struct inode *inode = file_inode(file); 9 10 if(ctx->pos == 0) 11 ctx->pos = PFS_DIRHASHSIZ * sizeof(int64_t) + sizeof(int64_t); 12 for(off = ctx->pos & (PFS_BLOCKSIZ - 1); ctx->pos < inode->i_size; off = ctx->pos & (PFS_BLOCKSIZ - 1)){ 13 if(!(dno = pfs_get_block_number(inode, pfs_block_number(ctx->pos), 0))) 14 goto skip; 15 if(!(bh = sb_bread(inode->i_sb, dno / PFS_STRS_PER_BLOCK))){ 16 pr_err("pfs: device %s: %s: failed to read block %lld of dir %lld\n", 17 inode->i_sb->s_id, "pfs_readdir", pfs_block_number(ctx->pos), PFS_I(inode)->i_ino); 18 goto skip; 19 } 20 do{ 21 de = (struct pfs_dir_entry *)((char *)bh->b_data + off); 22 if(de->d_ino){ 23 if(!(dir_emit(ctx, pfs_get_de_name(de), de->d_len, (int32_t)le64_to_cpu(de->d_ino), DT_UNKNOWN))){ 24 brelse(bh); 25 return 0; 26 } 27 } 28 off += pfs_get_de_size(de); 29 ctx->pos += pfs_get_de_size(de); 30 }while(off < PFS_BLOCKSIZ && ctx->pos < inode->i_size); 31 brelse(bh); 32 continue; 33 skip: 34 ctx->pos += PFS_BLOCKSIZ - off; 35 } 36 return 0; 37 }
12-30行:代碼完整的分析可能須要讀者熟悉linux的內核,所以此處不給出代碼的具體分析而給出代碼的邏輯,pfs_readdir不斷的讀目錄項,而後調用dir_emit填充目錄項直到dir_emit調用失敗,dir_emit是一個封裝函數,實現爲filldir() == 0,因此在filldir成功時dir_emit返回1,在失敗時返回0
總結:不一樣的文件系統的目錄的iterate都不一樣,不過大致都是差很少的,都是讀目錄項,而後調用dir_emit函數填充至用戶空間
filldir函數的代碼以下:
1 static int filldir(struct dir_context *ctx, const char *name, int namlen, 2 loff_t offset, u64 ino, unsigned int d_type) 3 { 4 struct linux_dirent __user * dirent; 5 struct getdents_callback *buf = 6 container_of(ctx, struct getdents_callback, ctx); 7 unsigned long d_ino; 8 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, 9 sizeof(long)); 10 11 buf->error = -EINVAL; /* only used if we fail.. */ 12 if (reclen > buf->count) 13 return -EINVAL; 14 d_ino = ino; 15 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { 16 buf->error = -EOVERFLOW; 17 return -EOVERFLOW; 18 } 19 dirent = buf->previous; 20 if (dirent) { 21 if (__put_user(offset, &dirent->d_off)) 22 goto efault; 23 } 24 dirent = buf->current_dir; 25 if (__put_user(d_ino, &dirent->d_ino)) 26 goto efault; 27 if (__put_user(reclen, &dirent->d_reclen)) 28 goto efault; 29 if (copy_to_user(dirent->d_name, name, namlen)) 30 goto efault; 31 if (__put_user(0, dirent->d_name + namlen)) 32 goto efault; 33 if (__put_user(d_type, (char __user *) dirent + reclen - 1)) 34 goto efault; 35 buf->previous = dirent; 36 dirent = (void __user *)dirent + reclen; 37 buf->current_dir = dirent; 38 buf->count -= reclen; 39 return 0; 40 efault: 41 buf->error = -EFAULT; 42 return -EFAULT; 43 }
函數解釋:filldir設置上一個填充的目錄項的d_off爲當前的偏移,而後填充當前的目錄項,設置buf->previous爲dirent,而後將buf->current指向下一個可用的空間
總結:最後做爲一個考慮整個過程的示例,給出telldir函數的解釋。telldir返回dirp->filepos, dirp->filepos在glibc的getdents函數中設置,dirp->filepos = dp->d_off,而正如在filldir的20到23行所示,dp->d_off是讀下一個目錄項時的偏移。