先看下ftrace, 惋惜ocfs2_wait_for_mask不能ftrace,也不知到爲何?不過,它直接調用了wait_for_complete,因此就用這個函數代替了,這個函數浪費的時間最多了!node
0) | __ocfs2_cluster_lock() { 0) | wait_for_completion() { ------------------------------------------ 0) iomaker-10882 => ocfs2dc-10793 ------------------------------------------ 0) 0.000 us | ocfs2_dlm_lock(); ------------------------------------------ 0) ocfs2dc-10793 => iomaker-10882 ------------------------------------------ 0) ! 11609.94 us | } 0) 0.000 us | ocfs2_dlm_lock(); 0) ! 443.137 us | wait_for_completion(); 0) ! 12053.08 us | } /* __ocfs2_cluster_lock */
這個函數分析起來,沒那麼容易,又長又臭。 分片過代碼吧:app
1362 static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1363 struct ocfs2_lock_res *lockres, 1364 int level, 1365 u32 lkm_flags, 1366 int arg_flags, 1367 int l_subclass, 1368 unsigned long caller_ip) 1369 { // lockres是->ip_inode_lockres, level=EX, lkm_flags=0, arg_flags=0, subclass=IO_LS_NORMAL // caller_ip=__RET_IP_,不知道爲何須要這個參數? 1370 struct ocfs2_mask_waiter mw; 1371 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1372 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1373 unsigned long flags; 1374 unsigned int gen; 1375 int noqueue_attempted = 0; 1376 //ocfs2_mask_waiter結構設計的挺巧妙;mask暗指標誌位掩碼,lockres->l_flags有許多標誌位,如OCFS2_LOCK_BUSY, //OCFS2_LOCK_BLOCKED,OCFS2_LOCK_PENDING,etc.,->mw_mask用來指示哪個bit,->mw_goal用來表示但願這個 //bit是0或1; waiter就意味着wait_for_completion這個位變成咱們想要的值。 1377 ocfs2_init_mask_waiter(&mw); 1378 //ocfs2_inode_inode_lops->flags=LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB //因此if成立 1379 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1380 lkm_flags |= DLM_LKF_VALBLK;
割...less
1382 again: //哪些狀況會goto到這兒? //#1493行,從ocfs2_dlm_lock中成功返回,可是BUSY標記尚未清除掉,這意味着ast還沒被調用或返回,由於全部類型的ast都去清除這個標記; //#1520行,由於args_flags=0,#1514行的if語句不成立,因此這行根本執行不到; //#1525行,1524行if語句必定成立,即ret=0 1383 wait = 0; 1384 1385 spin_lock_irqsave(&lockres->l_lock, flags); 1386 1387 if (catch_signals && signal_pending(current)) { 1388 ret = -ERESTARTSYS; 1389 goto unlock; 1390 } 1391 1392 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1393 "Cluster lock called on freeing lockres %s! flags " 1394 "0x%lx\n", lockres->l_name, lockres->l_flags); 1395 1396 /* We only compare against the currently granted level 1397 * here. If the lock is blocked waiting on a downconvert, 1398 * we'll get caught below. */ 1399 if (lockres->l_flags & OCFS2_LOCK_BUSY && 1400 level > lockres->l_level) { //BUSY表示還有dlm lock請求沒有返回,必須等着... 1401 /* is someone sitting in dlm_lock? If so, wait on 1402 * them. */ 1403 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1404 wait = 1; 1405 goto unlock; //unlock處,#1502行,不能理解!!! 總之,很快就進入等待函數了... 1406 } 1407 1408 if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1409 /* 1410 * We've upconverted. If the lock now has a level we can 1411 * work with, we take it. If, however, the lock is not at the 1412 * required level, we go thru the full cycle. One way this could 1413 * happen is if a process requesting an upconvert to PR is 1414 * closely followed by another requesting upconvert to an EX. 1415 * If the process requesting EX lands here, we want it to 1416 * continue attempting to upconvert and let the process 1417 * requesting PR take the lock. 1418 * If multiple processes request upconvert to PR, the first one 1419 * here will take the lock. The others will have to go thru the 1420 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1421 * downconvert request. 1422 */ //這段註釋很是清楚 1423 if (level <= lockres->l_level) 1424 goto update_holders; 1425 } 1426 1427 if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1428 !ocfs2_may_continue_on_blocked_lock(lockres, level)) { //BLOCKED: blocked waiting for downconvert; //ocfs2_may_continue_on...在想要的鎖和->l_locking兼容,返回1; 我猜是爲了不重複等待 1429 /* is the lock is currently blocked on behalf of 1430 * another node */ 1431 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1432 wait = 1; 1433 goto unlock;
割...ide
1436 if (level > lockres->l_level) { //申請的鎖級別要高於當前granted lock level 1437 if (noqueue_attempted > 0) { //noqueue_attempted一直等於0,因此能夠無視這個if語句; 1438 ret = -EAGAIN; 1439 goto unlock; 1440 } //lkm_flags不會將DLM_LKF_NOQUEUE置位,因此也能夠無視這個if語句 1441 if (lkm_flags & DLM_LKF_NOQUEUE) 1442 noqueue_attempted = 1; 1443 //->l_action用來指示ast回調時執行哪一個動做,有OCFS2_AST_ATTACH, OCFS2_AST_CONVERT, OCFS2_AST_DOWNCONVERT; 1444 if (lockres->l_action != OCFS2_AST_INVALID) 1445 mlog(ML_ERROR, "lockres %s has action %u pending\n", 1446 lockres->l_name, lockres->l_action); 1447 1448 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { //若是OCFS2_LOCK_ATTACHED爲0,表示該鎖資源的LVB尚未初始化,也意味着這是初次對該資源加鎖; 1449 lockres->l_action = OCFS2_AST_ATTACH; 1450 lkm_flags &= ~DLM_LKF_CONVERT; 1451 } else { //不然,必定是申請鎖轉換 1452 lockres->l_action = OCFS2_AST_CONVERT; 1453 lkm_flags |= DLM_LKF_CONVERT; 1454 } 1455 1456 lockres->l_requested = level; 1457 lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1458 gen = lockres_set_pending(lockres); 1459 spin_unlock_irqrestore(&lockres->l_lock, flags); 1460 1461 BUG_ON(level == DLM_LOCK_IV); 1462 BUG_ON(level == DLM_LOCK_NL); 1463 1464 mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1465 lockres->l_name, lockres->l_level, level); 1467 /* call dlm_lock to upgrade lock now */ 1468 ret = ocfs2_dlm_lock(osb->cconn, 1469 level, 1470 &lockres->l_lksb, 1471 lkm_flags, 1472 lockres->l_name, 1473 OCFS2_LOCK_ID_MAX_LEN - 1); 1474 lockres_clear_pending(lockres, gen, osb); 1475 if (ret) { 1476 if (!(lkm_flags & DLM_LKF_NOQUEUE) || 1477 (ret != -EAGAIN)) { 1478 ocfs2_log_dlm_error("ocfs2_dlm_lock", 1479 ret, lockres); 1480 } 1481 ocfs2_recover_from_dlm_error(lockres, 1); 1482 goto out; 1483 } 1484 1485 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1486 lockres->l_name); 1487 1488 /* At this point we've gone inside the dlm and need to 1489 * complete our work regardless. */ 1490 catch_signals = 0; 1491 1492 /* wait for busy to clear and carry on */ 1493 goto again; 1494 }
割...函數
1496 update_holders: 1497 /* Ok, if we get here then we're good to go. */ //能走到這一步,說明已經成功拿到了想要的鎖 1498 ocfs2_inc_holders(lockres, level); 1499 1500 ret = 0; 1501 unlock: //#1502行,不清楚要幹什麼? 1502 lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1503 1504 spin_unlock_irqrestore(&lockres->l_lock, flags); 1505 out: 1506 /* 1507 * This is helping work around a lock inversion between the page lock 1508 * and dlm locks. One path holds the page lock while calling aops 1509 * which block acquiring dlm locks. The voting thread holds dlm 1510 * locks while acquiring page locks while down converting data locks. 1511 * This block is helping an aop path notice the inversion and back 1512 * off to unlock its page lock before trying the dlm lock again. 1513 */ //由於args_flags=0,這個if語句不會成立,直接無視 1514 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1515 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1516 wait = 0; 1517 if (lockres_remove_mask_waiter(lockres, &mw)) 1518 ret = -EAGAIN; 1519 else 1520 goto again; 1521 } //資源被佔着,並且鎖不兼容,只能慢慢等了!!! 1522 if (wait) { 1523 ret = ocfs2_wait_for_mask(&mw); 1524 if (ret == 0) 1525 goto again; 1526 mlog_errno(ret); 1527 } 1528 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1542 return ret; 1543 }