ocfs2_inode_lock_full_nested()

時間 2019-11-16

標籤 ocfs2 ocfs inode lock nested 简体版

原文原文鏈接

從ftrace來看，不出意外，最耗時間的果真是__ocfs2_cluster_lock()node

 0)               |  ocfs2_inode_lock_full_nested() {
 0)   0.000 us    |    ocfs2_wait_for_recovery();
 0) ! 12026.56 us |    __ocfs2_cluster_lock();
 0)   0.000 us    |    ocfs2_wait_for_recovery();
 0)   0.000 us    |    ocfs2_inode_lock_update();
 0) ! 12026.56 us |  }
 0)   0.000 us    |  ocfs2_inode_unlock();

一樣，簡單過下這個函數：
緩存

2272 /*     
2273  * returns < 0 error if the callback will never be called, otherwise
2274  * the result of the lock will be communicated via the callback.
2275  */    
2276 int ocfs2_inode_lock_full_nested(struct inode *inode,
2277                                  struct buffer_head **ret_bh,
2278                                  int ex,
2279                                  int arg_flags,
2280                                  int subclass)
2281 {   
//arg_flags=0, subclass=IO_LS_NORMAL   
2282         int status, level, acquired;
2283         u32 dlm_flags;
2284         struct ocfs2_lock_res *lockres = NULL;
2285         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2286         struct buffer_head *local_bh = NULL;
2287 
2288         BUG_ON(!inode);
2289 
//從這個log message來看，inode lock也是META lock，那麼問題來了有相應的address_space lock嗎？
//我推測應該沒有，由於節點間共享的是inode,至於page cache節點間是獨立的。
2290         mlog(0, "inode %llu, take %s META lock\n",
2291              (unsigned long long)OCFS2_I(inode)->ip_blkno,
2292              ex ? "EXMODE" : "PRMODE");
2293        
2294         status = 0;
2295         acquired = 0;
2296         /* We'll allow faking a readonly metadata lock for
2297          * rodevices. */
2298         if (ocfs2_is_hard_readonly(osb)) {
2299                 if (ex)
2300                         status = -EROFS;
2301                 goto bail;
2302         }
2303 
2304         if (ocfs2_mount_local(osb))
2305                 goto local;
2306       
//就是說從這兒到local,是cluster特有的處理！ arg_flags傳入時等於0，if必成立
2307         if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2308                 ocfs2_wait_for_recovery(osb);
2309 
//->ip_inode_lockres應該就是META lock, ->ip_rw_lockres就是讀寫，即數據鎖吧
2310         lockres = &OCFS2_I(inode)->ip_inode_lockres;
//level=EX
2311         level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2312         dlm_flags = 0;
2313         if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2314                 dlm_flags |= DLM_LKF_NOQUEUE;
2315 
//dlm_flags=0, args_flags=0, 
2316         status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2317                                       arg_flags, subclass, _RET_IP_);
2318         if (status < 0) {          
2319                 if (status != -EAGAIN && status != -EIOCBRETRY)
2320                         mlog_errno(status);
2321                 goto bail;
2322         }
2323        
2324         /* Notify the error cleanup path to drop the cluster lock. */
2325         acquired = 1;
2326 
2327         /* We wait twice because a node may have died while we were in
2328          * the lower dlm layers. The second time though, we've
2329          * committed to owning this lock so we don't allow signals to
2330          * abort the operation. */ 
2331         if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2332                 ocfs2_wait_for_recovery(osb);
2333 
2334 local:                         
2335         /*
2336          * We only see this flag if we're being called from
2337          * ocfs2_read_locked_inode(). It means we're locking an inode
2338          * which hasn't been populated yet, so clear the refresh flag
2339          * and let the caller handle it.
2340          */                   
2341         if (inode->i_state & I_NEW) {
2342                 status = 0;  
//不能被ftrace?    
2343                 if (lockres)     
2344                         ocfs2_complete_lock_res_refresh(lockres, 0);
2345                 goto bail;
2346         }
2347 
2348         /* This is fun. The caller may want a bh back, or it may
2349          * not. ocfs2_inode_lock_update definitely wants one in, but
2350          * may or may not read one, depending on what's in the
2351          * LVB. The result of all of this is that we've *only* gone to
2352          * disk if we have to, so the complexity is worthwhile. */
//在inode被鎖住的狀況下， 先拋棄已緩存的inode元數據， 而後再調用ocfs2_refresh_inode_from_lvb更新inode一些關鍵字段；
//聽說lvb是經過網絡數據傳輸來更新的
2353         status = ocfs2_inode_lock_update(inode, &local_bh);
2354         if (status < 0) {
2355                 if (status != -ENOENT) 
2356                         mlog_errno(status);
2357                 goto bail;
2358         }               
2359                         
2360         if (ret_bh) {
2361                 status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2362                 if (status < 0) {
2363                         mlog_errno(status);
2364                         goto bail;
2365                 }
2366         }
2367 
2368 bail:
2369         if (status < 0) {
2370                 if (ret_bh && (*ret_bh)) {
2371                         brelse(*ret_bh);
2372                         *ret_bh = NULL;
2373                 }
2374                 if (acquired)
2375                         ocfs2_inode_unlock(inode, ex);
2376         }
2377 
2378         if (local_bh)
2379                 brelse(local_bh);
2380 
2381         return status;
2382 }

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。