ocfs2_file_aio_write()

這裏僅考慮buffer IO, 因此不少direct IO處理能夠直接略過。先上調用路徑:node

 2)               |  vfs_write() {
 2)               |    do_sync_write() {
 2)               |      ocfs2_file_aio_write() {
 2)   0.441 us    |        ocfs2_rw_lock();
 2)               |        ocfs2_prepare_inode_for_write() {
 2)   0.286 us    |          ocfs2_inode_unlock();
 2)   1.521 us    |        }
 2)               |        generic_file_buffered_write() {
 2) + 10.056 us   |          ocfs2_inode_unlock();
 2) ! 11929.80 us |        }
 2) ! 11933.97 us |      }
 2) ! 11934.52 us |    }
 2) ! 11938.59 us |  }
 //看出來generic_file_buffered_write()很耗時間,其中有個異步等待函數也耗時:
 2)  ocfs2dc-7642  =>  jbd2/sd-7649
 ------------------------------------------
 2) ! 714.081 us  |  filemap_fdatawait_range();
 2) ! 1708.134 us |  filemap_fdatawait_range();
 ...
2231 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2232                                     const struct iovec *iov,
2233                                     unsigned long nr_segs,
2234                                     loff_t pos)
2235 {
2236         int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
2237         int can_do_direct, has_refcount = 0;
2238         ssize_t written = 0;
2239         size_t ocount;          /* original count */
2240         size_t count;           /* after file limit checks */
2241         loff_t old_size, *ppos = &iocb->ki_pos;
2242         u32 old_clusters;
2243         struct file *file = iocb->ki_filp;
2244         struct inode *inode = file->f_path.dentry->d_inode;
2245         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2246         int full_coherency = !(osb->s_mount_opt &
2247                                OCFS2_MOUNT_COHERENCY_BUFFERED);
//一直等於0
2248         int unaligned_dio = 0;
2249 
2250         trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
2251                 (unsigned long long)OCFS2_I(inode)->ip_blkno,
2252                 file->f_path.dentry->d_name.len,
2253                 file->f_path.dentry->d_name.name,
2254                 (unsigned int)nr_segs);
2255 
2256         if (iocb->ki_left == 0)
2257                 return 0;
2258 
2259         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2260 
2261         appending = kiocb_is_append(iocb) ? 1 : 0;
2262         direct_io = kiocb_is_direct(iocb) ? 1 : 0;
2263 
2264         mutex_lock(&inode->i_mutex);
2265 
2266         ocfs2_iocb_clear_sem_locked(iocb);
2267 
2268 relock:
2269         /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2270         if (direct_io) {
2271                 down_read(&inode->i_alloc_sem);
2272                 have_alloc_sem = 1;
2273                 /* communicate with ocfs2_dio_end_io */
2274                 ocfs2_iocb_set_sem_locked(iocb);
2275         }
2276 
2277         /*
2278          * Concurrent O_DIRECT writes are allowed with
2279          * mount_option "coherency=buffered".
2280          */
2281         rw_level = (!direct_io || full_coherency);
2282 //rw_level=1
2283         ret = ocfs2_rw_lock(inode, rw_level);
2284         if (ret < 0) {
2285                 mlog_errno(ret);
2286                 goto out_sems;
2287         }
2288 
2289         /*
2290          * O_DIRECT writes with "coherency=full" need to take EX cluster
2291          * inode_lock to guarantee coherency.
2292          */
2293         if (direct_io && full_coherency) {
2294                 /*
2295                  * We need to take and drop the inode lock to force
2296                  * other nodes to drop their caches.  Buffered I/O
2297                  * already does this in write_begin().
2298                  */
2299                 ret = ocfs2_inode_lock(inode, NULL, 1);
2300                 if (ret < 0) {
2301                         mlog_errno(ret);
2302                         goto out_sems;
2303                 }
2304 
2305                 ocfs2_inode_unlock(inode, 1);
2306         }
2307 
2308         can_do_direct = direct_io;
//計算文件指針,針對-->setattr,refcount, direct io等特殊狀況進行處理;
//通常狀況,只對inode_lock加讀鎖
2309         ret = ocfs2_prepare_inode_for_write(file, ppos,
2310                                             iocb->ki_left, appending,
2311                                             &can_do_direct, &has_refcount);
2312         if (ret < 0) {
2313                 mlog_errno(ret);
2314                 goto out;
2315         }

割...ios

這段,咱們只須要關心generic_file_buffered_write(),拿出來單獨分析。app

2317         if (direct_io && !is_sync_kiocb(iocb))
2318                 unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left,
2319                                                       *ppos);
2320 
2321         /*
2322          * We can't complete the direct I/O as requested, fall back to
2323          * buffered I/O.
2324          */
2325         if (direct_io && !can_do_direct) {
2326                 ocfs2_rw_unlock(inode, rw_level);
2327                 up_read(&inode->i_alloc_sem);
2328 
2329                 have_alloc_sem = 0;
2330                 rw_level = -1;
2331 
2332                 direct_io = 0;
2333                 goto relock;
2334         }
2335 
2336         if (unaligned_dio) {
2337                 /*
2338                  * Wait on previous unaligned aio to complete before
2339                  * proceeding.
2340                  */
2341                 ocfs2_aiodio_wait(inode);
2342 
2343                 /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */
2344                 atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio);
2345                 ocfs2_iocb_set_unaligned_aio(iocb);
2346         }
2347 
2348         /*
2349          * To later detect whether a journal commit for sync writes is
2350          * necessary, we sample i_size, and cluster count here.
2351          */
2352         old_size = i_size_read(inode);
2353         old_clusters = OCFS2_I(inode)->ip_clusters;
2354 
2355         /* communicate with ocfs2_dio_end_io */
2356         ocfs2_iocb_set_rw_locked(iocb, rw_level);
2357 
2358         ret = generic_segment_checks(iov, &nr_segs, &ocount,
2359                                      VERIFY_READ);
2360         if (ret)
2361                 goto out_dio;
2362 
2363         count = ocount;
2364         ret = generic_write_checks2(iocb, ppos, &count,
2365                                    S_ISBLK(inode->i_mode));
2366         if (ret)
2367                 goto out_dio;
2368 
2369         if (direct_io) {
2370                 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
2371                                                     ppos, count, ocount);
2372                 if (written < 0) {
2373                         ret = written;
2374                         goto out_dio;
2375                 }
2376         } else {
2377                 current->backing_dev_info = file->f_mapping->backing_dev_info;
2378                 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
2379                                                       ppos, count, 0);
2380                 current->backing_dev_info = NULL;
2381         }
2382

割...異步


2383 out_dio:
//不支持buffered aio啦
2384         /* buffered aio wouldn't have proper lock coverage today */
2385         BUG_ON(ret == -EIOCBQUEUED && !kiocb_is_direct(iocb));
2386 
2387         if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
2388             ((file->f_flags & O_DIRECT) && !direct_io)) {
//除非inode的I_SYNC置位,即正在回寫inode,咱們才能進來;
//why? don't know yet;
//測了下,沒有進來
2389                 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2390                                                pos + count - 1);
2391                 if (ret < 0)
2392                         written = ret;
2393 
2394                 if (!ret) {
2395                         ret = jbd2_journal_force_commit(osb->journal->j_journal);
2396                         if (ret < 0)
2397                                 written = ret;
2398                 }
2399 
2400                 if (!ret)
2401                         ret = filemap_fdatawait_range(file->f_mapping, pos,
2402                                                       pos + count - 1);
2403         }
2404 
2405         /*
2406          * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2407          * function pointer which is called when o_direct io completes so that
2408          * it can unlock our rw lock.  (it's the clustered equivalent of
2409          * i_alloc_sem; protects truncate from racing with pending ios).
2410          * Unfortunately there are error cases which call end_io and others
2411          * that don't.  so we don't have to unlock the rw_lock if either an
2412          * async dio is going to do it in the future or an end_io after an
2413          * error has already done it.
2414          */
2415         if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
2416                 rw_level = -1;
2417                 have_alloc_sem = 0;
2418                 unaligned_dio = 0;
2419         }
2420 
2421         if (unaligned_dio)
2422                 atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
2423 
2424 out:
2425         if (rw_level != -1)
2426                 ocfs2_rw_unlock(inode, rw_level);
2427 
2428 out_sems:
2429         if (have_alloc_sem) {
2430                 up_read(&inode->i_alloc_sem);
2431                 ocfs2_iocb_clear_sem_locked(iocb);
2432         }
2433 
2434         mutex_unlock(&inode->i_mutex);
2435 
2436         if (written)
2437                 ret = written;
2438         return ret;
2439 }
相關文章
相關標籤/搜索