從ftrace看到,時間基本消耗在ocfs2_inode_lock_full_nested()函數中。node
3) | ocfs2_write_begin() { 3) ! 11815.92 us | ocfs2_inode_lock_full_nested(); 1) 0.101 us | down_write(); 1) 0.125 us | up_write(); 3) 4.513 us | ocfs2_write_begin_nolock(); 3) ! 11822.76 us | }
在分析generic_perform_write()時,發現R/W分別從兩個節點併發競爭同一個共享文件情景中,write_begin()最消耗時間,意料之中,由於它要請求上鎖!
併發
1895 static int ocfs2_write_begin(struct file *file, struct address_space *mapping, 1896 loff_t pos, unsigned len, unsigned flags, 1897 struct page **pagep, void **fsdata) 1898 { 1899 int ret; 1900 struct buffer_head *di_bh = NULL; 1901 struct inode *inode = mapping->host; 1902 //給inode上EX鎖,這個鎖威力很大,其餘節點讀寫都會被阻塞; //ocfs2_inode_lock是ocfs2_inode_lock_full_nested()的宏 1903 ret = ocfs2_inode_lock(inode, &di_bh, 1); 1904 if (ret) { 1905 mlog_errno(ret); 1906 return ret; 1907 } 1908 1909 /* 1910 * Take alloc sem here to prevent concurrent lookups. That way 1911 * the mapping, zeroing and tree manipulation within 1912 * ocfs2_write() will be safe against ->readpage(). This 1913 * should also serve to lock out allocation from a shared 1914 * writeable region. 1915 */ //註釋有解釋 1916 down_write(&OCFS2_I(inode)->ip_alloc_sem); 1917 //不明白爲何叫_nolock,分明是有lock的阿 1918 ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, 1919 fsdata, di_bh, NULL); 1920 if (ret) { 1921 mlog_errno(ret); 1922 goto out_fail; 1923 } 1924 1925 brelse(di_bh); 1926 1927 return 0; 1928 1929 out_fail: 1930 up_write(&OCFS2_I(inode)->ip_alloc_sem); 1931 1932 brelse(di_bh); 1933 ocfs2_inode_unlock(inode, 1); 1934 1935 return ret; 1936 }