從bgsave命令看redis的RDB持久化機制

redis經過bgSave命令將數據持久化到磁盤上,在啓動的時候,能夠從磁盤上加載bgSave生成的RDB文件,恢復數據java

save命令會阻塞,不推薦使用git

RDB持久化機制簡介

redis的RDB結構大體以下github

以hashtable爲例redis

REDIS|db_version|SELECTDB|0|REDIS_TYPE_HASH|hash_size|key1_len|key1_value|key1_value_len|key1_value|EOF|checksum
複製代碼
  • REDIS:放在文件開頭的標識符
  • db_version:當前RDB的版本
  • SELECTDB:標識符,接下來要讀到的是server中的數據庫下標
  • 0:表示第0個db,默認有16個
  • REDIS_TYPE_HASH:在db中存了hashTable結構
  • hash_size:hashTable中一共有多少個元素
  • key1_len:第一個key佔的字節數
  • key1_value:第一個key的字面值
  • key1_value_len:第一個key對應的value的字節數
  • key1_value:第一個key對應的value的值
  • EOF:沒有數據的標識符
  • checksum:RDB文件的校驗和,校驗內容的完整性

調用bgSave進行存儲

當用戶執行bgSave命令的時候,redis會fork出子進程進行處理,使得其餘命令不會被阻塞執行數據庫

Code.SLICE.source("if ((childpid = fork()) == 0) {" +
" //..." +
" retval = rdbSave(filename,rsi);" +
" if (retval == C_OK) {" +
" //..." +
" server.child_info_data.cow_size = private_dirty;" +
" sendChildInfo(CHILD_INFO_TYPE_RDB);" +
" }" +
" exitFromChild((retval == C_OK) ? 0 : 1);" +
" } else {" +
" /* Parent */" +
" //..." +
" server.rdb_save_time_start = time(NULL);" +
" server.rdb_child_pid = childpid;" +
" server.rdb_child_type = RDB_CHILD_TYPE_DISK;" +
" updateDictResizePolicy();" +
" return C_OK;" +
" }")
.interpretation("建立子進程,子進程負責作rdb相關的處理,父進程記下處理中的子進程ID,返回當前bgsave的執行,也就是說bgsave不會阻塞其它命令的執行");
   
複製代碼

在存儲數據進入RDB的時候,首先會在文件頭寫入 REDIS 字符串,拼上當前RDB的版本bash

Code.SLICE.source("snprintf(magic,sizeof(magic),\"REDIS%04d\",RDB_VERSION);" +
" if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;")
.interpretation("首先在文件中寫下 REDIS字符串和RDB的版本");
複製代碼

緊接着遍歷redis的server中全部的數據庫,一個個的寫入數據,根據數據的類型不一樣,採用不用的TYPE來標識,而後記下對應的長度,再存入值,好比要存儲的對象的值是hashTable運維

Code.SLICE.source("else if (o->type == OBJ_HASH) {" +
" /* Save a hash value */" +
" if (o->encoding == OBJ_ENCODING_ZIPLIST) {" +
" size_t l = ziplistBlobLen((unsigned char*)o->ptr);" +
"" +
" if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;" +
" nwritten += n;" +
"" +
" } else if (o->encoding == OBJ_ENCODING_HT) {" +
" dictIterator *di = dictGetIterator(o->ptr);" +
" dictEntry *de;" +
"" +
" if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) {" +
" dictReleaseIterator(di);" +
" return -1;" +
" }" +
" nwritten += n;" +
"" +
" while((de = dictNext(di)) != NULL) {" +
" sds field = dictGetKey(de);" +
" sds value = dictGetVal(de);" +
"" +
" if ((n = rdbSaveRawString(rdb,(unsigned char*)field," +
" sdslen(field))) == -1)" +
" {" +
" dictReleaseIterator(di);" +
" return -1;" +
" }" +
" nwritten += n;" +
" if ((n = rdbSaveRawString(rdb,(unsigned char*)value," +
" sdslen(value))) == -1)" +
" {" +
" dictReleaseIterator(di);" +
" return -1;" +
" }" +
" nwritten += n;" +
" }" +
" dictReleaseIterator(di);" +
" } else {" +
" serverPanic(\"Unknown hash encoding\");" +
" }" +
" } ")
.interpretation("以hash的編碼方式爲例,看底層的實現")
.interpretation("1: hash的底層實現若是是ziplist,那麼拿到ziplist的長度,將ziplist轉爲字符串存儲")
.interpretation("2: hash的底層實現方式爲 hasttable,那麼一個個的遍歷key,value,將它們分別轉成String的形式再存儲");
複製代碼

當全部數據記錄完成以後,寫入EOF結束標記,最後加上校驗和,至此完成內存數據序列化,存儲到磁盤oop

Code.SLICE.source("if (rdbSaveType(rdb,RDB_OPCODE_EOF) == -1) goto werr;")
        .interpretation("寫入EOF標記,表明全部db的數據都已經寫入了");
Code.SLICE.source("cksum = rdb->cksum;" +
        " memrev64ifbe(&cksum);" +
        " if (rioWrite(rdb,&cksum,8) == 0) goto werr;")
        .interpretation("寫入校驗和,完整的內存數據寫入完畢");
複製代碼

啓動加載

在redis的啓動的過程當中會進行加載,它實質上就是存儲的反序列化過程,首先是讀取字符串 REDIS編碼

Code.SLICE.source("if (rioRead(rdb,buf,9) == 0) goto eoferr;" +
    " buf[9] = '\\0';" +
    " if (memcmp(buf,\"REDIS\",5) != 0)")
    .interpretation("讀取文件的前9個字節,前5個一定是REDIS字符,不然出錯");
複製代碼

接下來即可以按照序列化的規則,進行反序列化,知道讀取完成spa

Code.SLICE.source("while(1) {..." +
"if ((type = rdbLoadType(rdb)) == -1) goto eoferr;" +
"..." +
" else if (type == RDB_OPCODE_EOF) {" +
" /* EOF: End of file, exit the main loop. */" +
" break;" +
"..." +
"else if (type == RDB_OPCODE_RESIZEDB){...}" +
"..." +
"if ((key = rdbLoadStringObject(rdb)) == NULL) goto eoferr;" +
"if ((val = rdbLoadObject(type,rdb)) == NULL) goto eoferr;" +
"}")
.interpretation("循環讀取文件的內容,首先讀到接下來的類型")
.interpretation("1: 讀到EOF結束")
.interpretation("2: 讀取到對應的標記,就繼續讀取後面的字節,直到讀到key")
.interpretation("3: 讀取key,讀取val");
複製代碼

value以hashtable爲例,會構造出對應的結構

Code.SLICE.source("else if (rdbtype == RDB_TYPE_HASH) {" +
    " len = rdbLoadLen(rdb, NULL);" +
    "..." +
    " o = createHashObject();" +
    " /* ... */" +
    " while (o->encoding == OBJ_ENCODING_ZIPLIST && len > 0) {" +
    " len--;" +
    " /* Load raw strings */" +
    " if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))" +
    " == NULL) return NULL;" +
    " if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))" +
    " == NULL) return NULL;" +
    "" +
    " /* Add pair to ziplist */" +
    " o->ptr = ziplistPush(o->ptr, (unsigned char*)field," +
    " sdslen(field), ZIPLIST_TAIL);" +
    " o->ptr = ziplistPush(o->ptr, (unsigned char*)value," +
    " sdslen(value), ZIPLIST_TAIL);" +
    "" +
    " /* Convert to hash table if size threshold is exceeded */" +
    " if (sdslen(field) > server.hash_max_ziplist_value ||" +
    " sdslen(value) > server.hash_max_ziplist_value)" +
    " {" +
    " sdsfree(field);" +
    " sdsfree(value);" +
    " hashTypeConvert(o, OBJ_ENCODING_HT);" +
    " break;" +
    " }" +
    " sdsfree(field);" +
    " sdsfree(value);" +
    " }" +
    " ........"+
    " /* Load remaining fields and values into the hash table */" +
    " while (o->encoding == OBJ_ENCODING_HT && len > 0) {" +
    " len--;" +
    " /* Load encoded strings */" +
    " if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))" +
    " == NULL) return NULL;" +
    " if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))" +
    " == NULL) return NULL;" +
    "" +
    " /* Add pair to hash table */" +
    " ret = dictAdd((dict*)o->ptr, field, value);" +
    " if (ret == DICT_ERR) {" +
    " rdbExitReportCorruptRDB(\"Duplicate keys detected\");" +
    " }" +
    " }" +
    " }")
    .interpretation("以hashtable爲例,讀取到對應的數據長度,建立對象,根據對象的編碼方式,分別解析成ziplist或者是hashtable來存儲");
 
複製代碼

總結

  1. bgsave不會阻塞redis其它命令的運行,經過fork子進程實現;
  2. RDB序列化內存對象的機制是先設定數據的類型表示,而後記下數據量,再記下數據值的長度,再記下數據自己
  3. 啓動加載RDB文件的解析就是按照既定的保存規則進行反序列化

RDB的優點與劣勢

  • 優點:RDB是一個緊湊壓縮的二進制文件,適用於備份,全量複製的場景;它的恢復速度遠快於AOF
  • 劣勢:不適用於實時持久化,實時操做成本高;老版本的Redis服務沒法兼容新版本的Redis產生的RDB文件

注意

AOF和RDB的優先級沒有在這兒說明,後續再新開一篇~

附錄

RDB啓動加載源碼
bgSave執行源碼 書籍:Redis設計與實現、Redis開發與運維

相關文章
相關標籤/搜索