本篇主要講的是 AOF 持久化,瞭解 AOF 的數據組織方式和運做機制。redis 主要在 aof.c 中實現 AOF 的操做。c++
redis AOF 持久化一樣藉助了 struct rio. 詳細內容在《深刻剖析 redis RDB 持久化策略》中有介紹。redis
假設 redis 內存有「name:Jhon」的鍵值對,那麼進行 AOF 持久化後,AOF 文件有以下內容:數組
*2 # 2個參數 $6 # 第一個參數長度爲 6 SELECT # 第一個參數 $1 # 第二參數長度爲 1 8 # 第二參數 *3 # 3個參數 $3 # 第一個參數長度爲 4 SET # 第一個參數 $4 # 第二參數長度爲 4 name # 第二個參數 $4 # 第三個參數長度爲 4 Jhon # 第二參數長度爲 4
因此對上面的內容進行恢復,能獲得熟悉的一條 redis 命令:SELECT 8;SET name Jhon.
能夠想象的是,redis 遍歷內存數據集中的每一個 key-value 對,依次寫入磁盤中;redis 啓動的時候,從 AOF 文件中讀取數據,恢復數據。緩存
和 redis RDB 持久化運做機制不一樣,redis AOF 有後臺執行和邊服務邊備份兩種方式。服務器
1)AOF 後臺執行的方式和 RDB 有相似的地方,fork 一個子進程,主進程仍進行服務,子進程執行 AOF 持久化,數據被 dump 到磁盤上。與 RDB 不一樣的是,後臺子進程持久化過程當中,主進程會記錄期間的全部數據變動(主進程還在服務),並存儲在 server.aof_rewrite_buf_blocks 中;後臺子進程結束後,redis 更新緩存追加到 AOF 文件中,是 RDB 持久化所不具有的。數據結構
來講說更新緩存這個東西。redis 服務器產生數據變動的時候,譬如 set name Jhon,不只僅會修改內存數據集,也會記錄此更新(修改)操做,記錄的方式就是上面所說的數據組織方式。app
更新緩存能夠存儲在 server.aof_buf 中,你能夠把它理解爲一個小型臨時中轉站,全部累積的更新緩存都會先放入這裏,它會在特定時機寫入文件或者插入到 server.aof_rewrite_buf_blocks 下鏈表(下面會詳述);server.aof_buf 中的數據在 propagrate() 添加,在涉及數據更新的地方都會調用 propagrate() 以累積變動。更新緩存也能夠存儲在 server.aof_rewrite_buf_blocks,這是一個元素類型爲 struct aofrwblock 的鏈表,你能夠把它理解爲一個倉庫,當後臺有 AOF 子進程的時候,會將累積的更新緩存(在 server.aof_buf 中)插入到鏈表中,而當 AOF 子進程結束,它會被整個寫入到文件。二者是有關聯的。less
下面是後臺執行的主要代碼:socket
// 啓動後臺子進程,執行 AOF 持久化操做。bgrewriteaofCommand(),startAppendOnly(),serverCron() 中會調用此函數 /* This is how rewriting of the append only file in background works: * * 1) The user calls BGREWRITEAOF * 2) Redis calls this function, that forks(): * 2a) the child rewrite the append only file in a temp file. * 2b) the parent accumulates differences in server.aof_rewrite_buf. * 3) When the child finished '2a' exists. * 4) The parent will trap the exit code, if it's OK, will append the * data accumulated into server.aof_rewrite_buf into the temp file, and * finally will rename(2) the temp file in the actual file name. * The the new file is reopened as the new append only file. Profit! */ int rewriteAppendOnlyFileBackground(void) { pid_t childpid; long long start; // 已經有正在執行備份的子進程 if (server.aof_child_pid != -1) return REDIS_ERR; start = ustime(); if ((childpid = fork()) == 0) { char tmpfile[256]; // 子進程 /* Child */ // 關閉監聽 closeListeningSockets(0); // 設置進程 title redisSetProcTitle("redis-aof-rewrite"); // 臨時文件名 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); // 髒數據,其實就是子進程所消耗的內存大小 if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) { // 獲取髒數據大小 size_t private_dirty = zmalloc_get_private_dirty(); // 記錄髒數據 if (private_dirty) { redisLog(REDIS_NOTICE, "AOF rewrite: %zu MB of memory used by copy-on-write", private_dirty/(1024*1024)); } exitFromChild(0); } else { exitFromChild(1); } } else { /* Parent */ server.stat_fork_time = ustime()-start; if (childpid == -1) { redisLog(REDIS_WARNING, "Can't rewrite append only file in background: fork: %s", strerror(errno)); return REDIS_ERR; } redisLog(REDIS_NOTICE, "Background append only file rewriting started by pid %d",childpid); // AOF 已經開始執行,取消 AOF 計劃 server.aof_rewrite_scheduled = 0; // AOF 最近一次執行的起始時間 server.aof_rewrite_time_start = time(NULL); // 子進程 ID server.aof_child_pid = childpid; updateDictResizePolicy(); // 由於更新緩存都將寫入文件,要強制產生選擇數據集的指令 SELECT ,以防出現數據合併錯誤。 /* We set appendseldb to -1 in order to force the next call to the * feedAppendOnlyFile() to issue a SELECT command, so the differences * accumulated by the parent into server.aof_rewrite_buf will start * with a SELECT statement and it will be safe to merge. */ server.aof_selected_db = -1; replicationScriptCacheFlush(); return REDIS_OK; } return REDIS_OK; /* unreached */ } // AOF 持久化主函數。只在 rewriteAppendOnlyFileBackground() 中會調用此函數 /* Write a sequence of commands able to fully rebuild the dataset into * "filename". Used both by REWRITEAOF and BGREWRITEAOF. * * In order to minimize the number of commands needed in the rewritten * log Redis uses variadic commands when possible, such as RPUSH, SADD * and ZADD. However at max REDIS_AOF_REWRITE_ITEMS_PER_CMD items per time * are inserted using a single command. */ int rewriteAppendOnlyFile(char *filename) { dictIterator *di = NULL; dictEntry *de; rio aof; FILE *fp; char tmpfile[256]; int j; long long now = mstime(); /* Note that we have to use a different temp name here compared to the * one used by rewriteAppendOnlyFileBackground() function. */ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); // 打開文件 fp = fopen(tmpfile,"w"); if (!fp) { redisLog(REDIS_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno)); return REDIS_ERR; } // 初始化 rio 結構體 rioInitWithFile(&aof,fp); // 若是設置了自動備份參數,將進行設置 if (server.aof_rewrite_incremental_fsync) rioSetAutoSync(&aof,REDIS_AOF_AUTOSYNC_BYTES); // 備份每個數據集 for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; // 獲取數據集的迭代器 di = dictGetSafeIterator(d); if (!di) { fclose(fp); return REDIS_ERR; } // 寫入 AOF 操做碼 /* SELECT the new DB */ if (rioWrite(&aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr; // 寫入數據集序號 if (rioWriteBulkLongLong(&aof,j) == 0) goto werr; // 寫入數據集中每個數據項 /* Iterate this DB writing every entry */ while((de = dictNext(di)) != NULL) { sds keystr; robj key, *o; long long expiretime; keystr = dictGetKey(de); o = dictGetVal(de); // 將 keystr 封裝在 robj 裏 initStaticStringObject(key,keystr); // 獲取過時時間 expiretime = getExpire(db,&key); // 若是已通過期,放棄存儲 /* If this key is already expired skip it */ if (expiretime != -1 && expiretime < now) continue; // 寫入鍵值對應的寫操做 /* Save the key and associated value */ if (o->type == REDIS_STRING) { /* Emit a SET command */ char cmd[]="*3\r\n$3\r\nSET\r\n"; if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr; /* Key and value */ if (rioWriteBulkObject(&aof,&key) == 0) goto werr; if (rioWriteBulkObject(&aof,o) == 0) goto werr; } else if (o->type == REDIS_LIST) { if (rewriteListObject(&aof,&key,o) == 0) goto werr; } else if (o->type == REDIS_SET) { if (rewriteSetObject(&aof,&key,o) == 0) goto werr; } else if (o->type == REDIS_ZSET) { if (rewriteSortedSetObject(&aof,&key,o) == 0) goto werr; } else if (o->type == REDIS_HASH) { if (rewriteHashObject(&aof,&key,o) == 0) goto werr; } else { redisPanic("Unknown object type"); } // 寫入過時時間 /* Save the expire time */ if (expiretime != -1) { char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n"; if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr; if (rioWriteBulkObject(&aof,&key) == 0) goto werr; if (rioWriteBulkLongLong(&aof,expiretime) == 0) goto werr; } } // 釋放迭代器 dictReleaseIterator(di); } // 寫入磁盤 /* Make sure data will not remain on the OS's output buffers */ fflush(fp); aof_fsync(fileno(fp)); fclose(fp); // 重寫文件名 /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ if (rename(tmpfile,filename) == -1) { redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno)); unlink(tmpfile); return REDIS_ERR; } redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed"); return REDIS_OK; werr: // 清理工做 fclose(fp); unlink(tmpfile); redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); if (di) dictReleaseIterator(di); return REDIS_ERR; } // 後臺子進程結束後,redis 更新緩存 server.aof_rewrite_buf_blocks 追加到 AOF 文件中 // 在 AOF 持久化結束後會執行這個函數, backgroundRewriteDoneHandler() 主要工做是將 server.aof_rewrite_buf_blocks,即 AOF 緩存寫入文件 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. * Handle this. */ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { ...... // 將 AOF 緩存 server.aof_rewrite_buf_blocks 的 AOF 寫入磁盤 if (aofRewriteBufferWrite(newfd) == -1) { redisLog(REDIS_WARNING, "Error trying to flush the parent diff to the rewritten AOF: %s", strerror(errno)); close(newfd); goto cleanup; } ...... } // 將累積的更新緩存 server.aof_rewrite_buf_blocks 同步到磁盤 /* Write the buffer (possibly composed of multiple blocks) into the specified * fd. If no short write or any other error happens -1 is returned, * otherwise the number of bytes written is returned. */ ssize_t aofRewriteBufferWrite(int fd) { listNode *ln; listIter li; ssize_t count = 0; listRewind(server.aof_rewrite_buf_blocks,&li); while((ln = listNext(&li))) { aofrwblock *block = listNodeValue(ln); ssize_t nwritten; if (block->used) { nwritten = write(fd,block->buf,block->used); if (nwritten != block->used) { if (nwritten == 0) errno = EIO; return -1; } count += nwritten; } } return count; }
2)邊服務邊備份的方式,即 redis 服務器會把全部的數據變動存儲在 server.aof_buf 中,並在特定時機將更新緩存寫入預設定的文件(server.aof_filename)。特定時機有三種:async
redis 無非是不想服務器忽然崩潰終止,致使過多的數據丟失。redis 默認是每兩秒鐘進行一次邊服務邊備份,即隔兩秒將累積的寫入文件。
redis 爲何取消直接在本進程進行 AOF 持久化的方法?緣由多是產生一個 AOF 文件要比 RDB 文件消耗更多的時間;若是在當前進程執行 AOF 持久化,會佔用服務進程(主進程)較多的時間,中止服務的時間也更長(?)
下面是邊服務邊備份的主要代碼:
// 同步磁盤;將全部累積的更新 server.aof_buf 寫入磁盤 /* Write the append only file buffer on disk. * * Since we are required to write the AOF before replying to the client, * and the only way the client socket can get a write is entering when the * the event loop, we accumulate all the AOF writes in a memory * buffer and write it on disk using this function just before entering * the event loop again. * * About the 'force' argument: * * When the fsync policy is set to 'everysec' we may delay the flush if there * is still an fsync() going on in the background thread, since for instance * on Linux write(2) will be blocked by the background fsync anyway. * When this happens we remember that there is some aof buffer to be * flushed ASAP, and will try to do that in the serverCron() function. * * However if force is set to 1 we'll write regardless of the background * fsync. */ void flushAppendOnlyFile(int force) { ssize_t nwritten; int sync_in_progress = 0; // 無數據,無需同步到磁盤 if (sdslen(server.aof_buf) == 0) return; // 建立線程任務,主要調用 fsync() if (server.aof_fsync == AOF_FSYNC_EVERYSEC) sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0; // 若是沒有設置強制同步的選項,可能不會當即進行同步 if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { // 推遲執行 AOF /* With this append fsync policy we do background fsyncing. * If the fsync is still in progress we can try to delay * the write for a couple of seconds. */ if (sync_in_progress) { if (server.aof_flush_postponed_start == 0) { // 設置延遲沖洗時間選項 /* No previous write postponinig, remember that we are * postponing the flush and return. */ server.aof_flush_postponed_start = server.unixtime; // /* Unix time sampled every cron cycle. */ return; // 沒有超過 2s,直接結束 } else if (server.unixtime - server.aof_flush_postponed_start < 2) { /* We were already waiting for fsync to finish, but for less * than two seconds this is still ok. Postpone again. */ return; } // 不然,要強制寫入磁盤 /* Otherwise fall trough, and go write since we can't wait * over two seconds. */ server.aof_delayed_fsync++; redisLog(REDIS_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); } } // 取消延遲沖洗時間設置 /* If you are following this code path, then we are going to write so * set reset the postponed flush sentinel to zero. */ server.aof_flush_postponed_start = 0; /* We want to perform a single write. This should be guaranteed atomic * at least if the filesystem we are writing is a real physical one. * While this will save us against the server being killed I don't think * there is much to do about the whole server stopping for power problems * or alike */ // AOF 文件已經打開了。將 server.aof_buf 中的全部緩存數據寫入文件 nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); if (nwritten != (signed)sdslen(server.aof_buf)) { /* Ooops, we are in troubles. The best thing to do for now is * aborting instead of giving the illusion that everything is * working as expected. */ if (nwritten == -1) { redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno)); } else { redisLog(REDIS_WARNING,"Exiting on short write while writing to " "the append-only file: %s (nwritten=%ld, " "expected=%ld)", strerror(errno), (long)nwritten, (long)sdslen(server.aof_buf)); if (ftruncate(server.aof_fd, server.aof_current_size) == -1) { redisLog(REDIS_WARNING, "Could not remove short write " "from the append-only file. Redis may refuse " "to load the AOF the next time it starts. " "ftruncate: %s", strerror(errno)); } } exit(1); } // 更新 AOF 文件的大小 server.aof_current_size += nwritten; /*當 server.aof_buf 足夠小,從新利用空間,防止頻繁的內存分配。 相反,當 server.aof_buf 佔據大量的空間,採起的策略是釋放空間,可見 redis 對內存很敏感。*/ /* Re-use AOF buffer when it is small enough. The maximum comes from the * arena size of 4k minus some overhead (but is otherwise arbitrary). */ if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) { sdsclear(server.aof_buf); } else { sdsfree(server.aof_buf); server.aof_buf = sdsempty(); } /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ if (server.aof_no_fsync_on_rewrite && (server.aof_child_pid != -1 || server.rdb_child_pid != -1)) return; // sync,寫入磁盤 /* Perform the fsync if needed. */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) { /* aof_fsync is defined as fdatasync() for Linux in order to avoid * flushing metadata. */ aof_fsync(server.aof_fd); /* Let's try to get this data on the disk */ server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { if (!sync_in_progress) aof_background_fsync(server.aof_fd); server.aof_last_fsync = server.unixtime; } }
上面兩次提到了「更新緩存」,它便是 redis 累積的數據變動。
更新緩存能夠存儲在 server.aof_buf 中,能夠存儲在 server.server.aof_rewrite_buf_blocks 連表中。他們的關係是:每一次數據變動記錄都會寫入 server.aof_buf 中,同時若是後臺子進程在持久化,變動記錄還會被寫入 server.server.aof_rewrite_buf_blocks 中。server.aof_buf 會在特定時期寫入指定文件,server.server.aof_rewrite_buf_blocks 會在後臺持久化結束後追加到文件。
redis 源碼中是這麼實現的:propagrate()->feedAppendOnlyFile()->aofRewriteBufferAppend()
註釋:feedAppendOnlyFile() 會把更新添加到 server.aof_buf;接下來會有一個判斷,若是存在 AOF 子進程,則調用 aofRewriteBufferAppend() 將 server.aof_buf 中的全部數據插入到 server.aof_rewrite_buf_blocks 鏈表。
一副能夠緩解視力疲勞的圖片——AOF 持久化運做機制:
下面是主要的代碼:
// 向 AOF 和從機發布數據更新 /* Propagate the specified command (in the context of the specified database id) * to AOF and Slaves. * * flags are an xor between: * + REDIS_PROPAGATE_NONE (no propagation of command at all) * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled) * + REDIS_PROPAGATE_REPL (propagate into the replication link) */ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags) { // AOF 策略須要打開,且設置 AOF 傳播標記,將更新發布給本地文件 if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF) feedAppendOnlyFile(cmd,dbid,argv,argc); // 設置了從機傳播標記,將更新發布給從機 if (flags & REDIS_PROPAGATE_REPL) replicationFeedSlaves(server.slaves,dbid,argv,argc); } // 將數據更新記錄到 AOF 緩存中 void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { sds buf = sdsempty(); robj *tmpargv[3]; /* The DB this command was targeting is not the same as the last command * we appendend. To issue a SELECT command is needed. */ if (dictid != server.aof_selected_db) { char seldb[64]; snprintf(seldb,sizeof(seldb),"%d",dictid); buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", (unsigned long)strlen(seldb),seldb); server.aof_selected_db = dictid; } if (cmd->proc == expireCommand || cmd->proc == pexpireCommand || cmd->proc == expireatCommand) { /* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */ buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) { /* Translate SETEX/PSETEX to SET and PEXPIREAT */ tmpargv[0] = createStringObject("SET",3); tmpargv[1] = argv[1]; tmpargv[2] = argv[3]; buf = catAppendOnlyGenericCommand(buf,3,tmpargv); decrRefCount(tmpargv[0]); buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else { /* All the other commands don't need translation or need the * same translation already operated in the command vector * for the replication itself. */ buf = catAppendOnlyGenericCommand(buf,argc,argv); } // 將生成的 AOF 追加到 server.aof_buf 中。server.在下一次進入事件循環以前,aof_buf 中的內容將會寫到磁盤上 /* Append to the AOF buffer. This will be flushed on disk just before * of re-entering the event loop, so before the client will get a * positive reply about the operation performed. */ if (server.aof_state == REDIS_AOF_ON) server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf)); // 若是已經有 AOF 子進程運行,redis 採起的策略是累積子進程 AOF 備份的數據和內存中數據集的差別。 aofRewriteBufferAppend() 把 buf 的內容追加到 server.aof_rewrite_buf_blocks 數組中 /* If a background append only file rewriting is in progress we want to * accumulate the differences between the child DB and the current one * in a buffer, so that when the child process will do its work we * can append the differences to the new append only file. */ if (server.aof_child_pid != -1) aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf)); sdsfree(buf); } // 將數據更新記錄寫入 server.aof_rewrite_buf_blocks,此函數只由 feedAppendOnlyFile() 調用 /* Append data to the AOF rewrite buffer, allocating new blocks if needed. */ void aofRewriteBufferAppend(unsigned char *s, unsigned long len) { // 尾插法 listNode *ln = listLast(server.aof_rewrite_buf_blocks); aofrwblock *block = ln ? ln->value : NULL; while(len) { /* If we already got at least an allocated block, try appending * at least some piece into it. */ if (block) { unsigned long thislen = (block->free < len) ? block->free : len; if (thislen) { /* The current block is not already full. */ memcpy(block->buf+block->used, s, thislen); block->used += thislen; block->free -= thislen; s += thislen; len -= thislen; } } if (len) { /* First block to allocate, or need another block. */ int numblocks; // 建立新的節點,插到尾部 block = zmalloc(sizeof(*block)); block->free = AOF_RW_BUF_BLOCK_SIZE; block->used = 0; // 尾插法 listAddNodeTail(server.aof_rewrite_buf_blocks,block); /* Log every time we cross more 10 or 100 blocks, respectively * as a notice or warning. */ numblocks = listLength(server.aof_rewrite_buf_blocks); if (((numblocks+1) % 10) == 0) { int level = ((numblocks+1) % 100) == 0 ? REDIS_WARNING : REDIS_NOTICE; redisLog(level,"Background AOF buffer size: %lu MB", aofRewriteBufferSize()/(1024*1024)); } } } }
兩種數據落地的方式,就是 AOF 的兩個主線。所以,redis AOF 持久化機制有兩條主線:後臺執行和邊服務邊備份,抓住這兩點就能理解 redis AOF 了。
這裏有一個疑問,兩條主線都會涉及文件的寫:後臺執行會寫一個 AOF 文件,邊服務邊備份也會寫一個,以哪一個爲準?
後臺持久化的數據首先會被寫入「temp-rewriteaof-bg-%d.aof」,其中「%d」是 AOF 子進程 id;待 AOF 子進程結束後,「temp-rewriteaof-bg-%d.aof」會被以追加的方式打開,繼而寫入 server.aof_rewrite_buf_blocks 中的更新緩存,最後「temp-rewriteaof-bg-%d.aof」文件被命名爲 server.aof_filename,因此以前的名爲 server.aof_filename 的文件會被刪除,也就是說邊服務邊備份寫入的文件會被刪除。邊服務邊備份的數據會被一直寫入到 server.aof_filename 文件中。
所以,確實會產生兩個文件,可是最後都會變成 server.aof_filename 文件。
這裏還有一個疑問,既然有了後臺持久化,爲何還要邊服務邊備份?邊服務邊備份時間長了會產生數據冗餘甚至備份過舊的數據,然後臺持久化能夠消除這些東西。看,這裏是 redis 的雙保險。
AOF 的數據恢復過程設計實在是棒極了,它模擬一個服務過程。redis 首先虛擬一個客戶端,讀取 AOF 文件恢復 redis 命令和參數;而後就像服務客戶端同樣執行命令相應的函數,從而恢復數據。這些過程主要在loadAppendOnlyFile() 中實現。
// 加載 AOF 文件,恢復數據 /* Replay the append log file. On error REDIS_OK is returned. On non fatal * error (the append only file is zero-length) REDIS_ERR is returned. On * fatal error an error message is logged and the program exists. */ int loadAppendOnlyFile(char *filename) { struct redisClient *fakeClient; FILE *fp = fopen(filename,"r"); struct redis_stat sb; int old_aof_state = server.aof_state; long loops = 0; // 文件大小不能爲 0 if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { server.aof_current_size = 0; fclose(fp); return REDIS_ERR; } if (fp == NULL) { redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno)); exit(1); } // 正在執行 AOF 加載操做,因而暫時禁止 AOF 的全部操做,以避免混淆 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI * to the same file we're about to read. */ server.aof_state = REDIS_AOF_OFF; // 虛擬出一個客戶端,即 redisClient fakeClient = createFakeClient(); startLoading(fp); while(1) { int argc, j; unsigned long len; robj **argv; char buf[128]; sds argsds; struct redisCommand *cmd; // 每循環 1000 次,在恢復數據的同時,服務器也爲客戶端服務。aeProcessEvents() 會進入事件循環 /* Serve the clients from time to time */ if (!(loops++ % 1000)) { loadingProgress(ftello(fp)); aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT); } // 可能 aof 文件到告終尾 if (fgets(buf,sizeof(buf),fp) == NULL) { if (feof(fp)) break; else goto readerr; } // 必須以「*」開頭,格式不對,退出 if (buf[0] != '*') goto fmterr; // 參數的個數 argc = atoi(buf+1); // 參數個數錯誤 if (argc < 1) goto fmterr; // 爲參數分配空間 argv = zmalloc(sizeof(robj*)*argc); // 依次讀取參數 for (j = 0; j < argc; j++) { if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr; if (buf[0] != '$') goto fmterr; len = strtol(buf+1,NULL,10); argsds = sdsnewlen(NULL,len); if (len && fread(argsds,len,1,fp) == 0) goto fmterr; argv[j] = createObject(REDIS_STRING,argsds); if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */ } // 找到相應的命令 /* Command lookup */ cmd = lookupCommand(argv[0]->ptr); if (!cmd) { redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", (char*)argv[0]->ptr); exit(1); } // 執行命令,模擬服務客戶端請求的過程,從而寫入數據 /* Run the command in the context of a fake client */ fakeClient->argc = argc; fakeClient->argv = argv; cmd->proc(fakeClient); /* The fake client should not have a reply */ redisAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0); /* The fake client should never get blocked */ redisAssert((fakeClient->flags & REDIS_BLOCKED) == 0); // 釋放虛擬客戶端空間 /* Clean up. Command code may have changed argv/argc so we use the * argv/argc of the client instead of the local variables. */ for (j = 0; j < fakeClient->argc; j++) decrRefCount(fakeClient->argv[j]); zfree(fakeClient->argv); } /* This point can only be reached when EOF is reached without errors. * If the client is in the middle of a MULTI/EXEC, log error and quit. */ if (fakeClient->flags & REDIS_MULTI) goto readerr; // 清理工做 fclose(fp); freeFakeClient(fakeClient); // 恢復舊的 AOF 狀態 server.aof_state = old_aof_state; stopLoading(); // 記錄最近 AOF 操做的文件大小 aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; return REDIS_OK; readerr: // 錯誤,清理工做 if (feof(fp)) { redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file"); } else { redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno)); } exit(1); fmterr: redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>"); exit(1); }
若是對數據比較關心,分秒必爭,能夠用 AOF 持久化,並且 AOF 文件很容易進行分析。
—-
搗亂 2014-3-26