ftrace:node
3) | ocfs2_readpage() { 3) | ocfs2_inode_lock_with_page() { 3) 0.126 us | down_read(); 3) 0.098 us | up_read(); 3) + 11.225 us | ocfs2_inode_unlock(); 3) ! 11945.43 us | } 3) ! 11946.01 us | }
代碼分析:app
274 static int ocfs2_readpage(struct file *file, struct page *page) 275 { 276 struct inode *inode = page->mapping->host; 277 struct ocfs2_inode_info *oi = OCFS2_I(inode); 278 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; 279 int ret, unlock = 1; 280 281 trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, 282 (page ? page->index : 0)); 283 /* ocfs2_inode_lock_with_page()是個痛點,爲了不lock inversion又引入了live lock問題,這些 具體問題的細節,暫時還沒理解! 調用該函數時,page已經locked, 試探性申請dlm鎖,若是dlm資源正在被不可兼容的佔用,那麼主動放棄申請, unlock page, 而後再去申請dlm鎖;爲了當好人,讓ocfs2dc得到page lock,若是dlm鎖被寫進程搶走,那麼 在寫鎖降級的時候,要等待刷盤,很浪費時間的;確定還暗藏其它問題,要否則怎麼會浪費那麼多時間! */ 284 ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page); 285 if (ret != 0) { 286 if (ret == AOP_TRUNCATED_PAGE) 287 unlock = 0; 288 mlog_errno(ret); 289 goto out; 290 } 291 292 if (down_read_trylock(&oi->ip_alloc_sem) == 0) { 293 /* 294 * Unlock the page and cycle ip_alloc_sem so that we don't 295 * busyloop waiting for ip_alloc_sem to unlock 296 */ 297 ret = AOP_TRUNCATED_PAGE; 298 unlock_page(page); 299 unlock = 0; 300 down_read(&oi->ip_alloc_sem); 301 up_read(&oi->ip_alloc_sem); 302 goto out_inode_unlock; 303 }
割...異步
305 /* 306 * i_size might have just been updated as we grabed the meta lock. We 307 * might now be discovering a truncate that hit on another node. 308 * block_read_full_page->get_block freaks out if it is asked to read 309 * beyond the end of a file, so we check here. Callers 310 * (generic_file_read, vm_ops->fault) are clever enough to check i_size 311 * and notice that the page they just read isn't needed. 312 * 313 * XXX sys_readahead() seems to get that wrong? 314 */ 315 if (start >= i_size_read(inode)) { 316 zero_user(page, 0, PAGE_SIZE); 317 SetPageUptodate(page); 318 ret = 0; 319 goto out_alloc; 320 } 321 322 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) 323 ret = ocfs2_readpage_inline(inode, page); 324 else 325 ret = block_read_full_page(page, ocfs2_get_block); /* 我以爲unlock不該該設置爲0, 由於block_read_full_page函數有可能沒有unlock page, 因此我改了這一塊,只要page鎖着,就讓unlock等於1,雖然性能上去了,可是讀會發生io錯誤; 可能不像我想得那麼簡單,由於block_read_full_page中可能會調用submit_bh進行IO, 是異步的,後面可能 須要等待IO完成,纔會去unlock page, 難怪讀會發生io錯誤 */ 326 unlock = 0; 327 328 out_alloc: 329 up_read(&OCFS2_I(inode)->ip_alloc_sem); 330 out_inode_unlock: 331 ocfs2_inode_unlock(inode, 0); 332 out: 333 if (unlock) 334 unlock_page(page); 335 return ret; 336 }