1、ceph scrub介紹linux
ceph經過scrub保證數據的一致性,scrub 以PG 爲單位,對於每個pg,ceph 分析該pg下的object, 產生一個相似於元數據信息摘要的數據結構,如對象大小,屬性等,叫scrubmap, 比較主與副scrubmap,來保證是否是有object 丟失或者不匹配。lightscrub(daily)比較object size 和屬性。deep scrub (weekly)讀取數據部分並經過checksum(這裏是CRC32)比較保證數據一致性。 每次scrub 只取chunk(chunk大小能夠經過ceph的配置選項進行配置)數量的object比較,這期間被校驗對象的數據是不能被修改的,因此write請求會被block. scrub操做能夠手動觸發,也會根據配置項和系統負載狀況天天定時觸發。數據結構
2、代碼流程分析dom
一、手動觸發scrubide
a、mon接收到scurb命令進入PGMonitor::preprocess_query函數函數
case MSG_PGSTATS: return preprocess_pg_stats(op); case MSG_MON_COMMAND: //scrub return preprocess_command(op);
b、preprocess_command(op)函數在mon/PGMonitor.cc文件中,給主osd發送scrub指令ui
else if (prefix == "pg scrub" || prefix == "pg repair" || prefix == "pg deep-scrub") { ......... mon->try_send_message(new MOSDScrub(mon->monmap->fsid, pgs, //給主osd發送scrub指令 scrubop == "repair", scrubop == "deep-scrub"), mon->osdmon()->osdmap.get_inst(osd)); ss << "instructing pg " << pgid << " on osd." << osd << " to " << scrubop; r = 0;
c、主osd,接收到MOSDScrub的命令後,在OSD::ms_dispatch函數中調用_dispatch(m);this
do_waiters(); _dispatch(m); //scrub do_waiters();
d、在_dispatch(m)中調用handle_scrub進行處理spa
case MSG_OSD_SCRUB: handle_scrub(static_cast<MOSDScrub*>(m)); break;
e、在handle_scrub中調用pg->reg_next_scrub()註冊要處理的pg,初始化了ScrubJob,ScrubJob中有任務執行須要的信息。.net
if (pg->is_primary()) { pg->unreg_next_scrub(); pg->scrubber.must_scrub = true; pg->scrubber.must_deep_scrub = m->deep || m->repair; pg->scrubber.must_repair = m->repair; pg->reg_next_scrub();
f、在osd初始化的時候註冊了一個定時任務線程
tick_timer.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick(this)); { Mutex::Locker l(tick_timer_lock); tick_timer_without_osd_lock.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick_WithoutOSDLock(this)); }
g、該定時任務會每隔osd_heartbeat_interval時間段,就會觸發定時器回調函數OSD::tick_without_osd_lock()
h、在回調函數中調用 sched_scrub();
if (!scrub_random_backoff()) { sched_scrub(); }
i、在OSD::sched_scrub();中主要檢查時間和系統的負載,並取得ScrubJob中的第一個任務。
if (pg->sched_scrub()) { //調用PG::sched_scrub() pg->unlock(); break;
j、在PG::sched_scrub()中完成資源的預定,調用bool PG::queue_scrub()最後加入OpWq隊列.
scrub_queued = true; osd->queue_for_scrub(this); //osd是OSDService類
k、在OSDService::queue_for_scrub中加入OpWq隊列
void queue_for_scrub(PG *pg) { op_wq.queue( make_pair( pg, PGQueueable( PGScrub(pg->get_osdmap()->get_epoch()), cct->_conf->osd_scrub_cost, pg->get_scrub_priority(), ceph_clock_now(cct), entity_inst_t()))); }
l、op_wq隊列在OSDService類中定義,在OSDService初始化的時候op_wq初始化OSD::op_shardedwq
419 ShardedThreadPool::ShardedWQ < pair <PGRef, PGQueueable> > &op_wq;
在osd/OSD.cc中
216 op_wq(osd->op_shardedwq),
m、op_shardedwq隊列綁定OSD:: ShardedThreadPool osd_op_tp線程池。
1669 op_shardedwq( 1670 cct->_conf->osd_op_num_shards, 1671 this, 1672 cct->_conf->osd_op_thread_timeout, 1673 cct->_conf->osd_op_thread_suicide_timeout, 1674 &osd_op_tp),
n、最後線程池調用pg->scrub處理
pg scrub處理和副本osd處理,流程參考:https://my.oschina.net/linuxhunter/blog/681050
https://blog.csdn.net/younger_china/article/details/75450607