block結構示意圖html
sstable中Block 頭文件以下:數組
class Block { public: // Initialize the block with the specified contents. // Takes ownership of data[] and will delete[] it when done. Block(const char* data, size_t size); ~Block(); size_t size() const { return size_; } Iterator* NewIterator(const Comparator* comparator); private: uint32_t NumRestarts() const; const char* data_; size_t size_; uint32_t restart_offset_; // Offset in data_ of restart array // No copying allowed Block(const Block&); void operator=(const Block&); class Iter; };
重啓點在上個章節已經介紹過了數據結構
"「重啓點」是幹什麼的呢?簡單來講就是進行數據壓縮,減小存儲空間。咱們一再強調,Block內容裏的KV記錄是按照Key大小有序的,這樣的話,相鄰的兩條記錄極可能Key部分存在重疊,好比key i=「the car」,Key i+1=「the color」,那麼二者存在重疊部分「the c」,爲了減小Key的存儲量,Key i+1能夠只存儲和上一條Key不一樣的部分「olor」,二者的共同部分從Key i中能夠得到。記錄的Key在Block內容部分就是這麼存儲的,主要目的是減小存儲開銷。「重啓點」的意思是:在這條記錄開始,再也不採起只記載不一樣的Key部分,而是從新記錄全部的Key值,假設Key i+1是一個重啓點,那麼Key裏面會完整存儲「the color」,而不是採用簡略的「olor」方式。可是若是記錄條數比較多,隨機訪問一條記錄,須要從頭開始一直解析才行,這樣也產生很大的開銷,因此設置了多個重啓點,Block尾部就是指出哪些記錄是這些重啓點的。 "app
//獲取BLOCK中的重啓點數目 inline uint32_t Block::NumRestarts() const { assert(size_ >= 2*sizeof(uint32_t)); return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); //重啓點在block最後8字節(uint32_t)中
}
Block的建立和銷燬ui
Block::Block(const char* data, size_t size) : data_(data), size_(size) { if (size_ < sizeof(uint32_t)) { size_ = 0; // Error marker } else { restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); //重啓點數目1個uint32 每一個重啓點的偏移記錄 uint32 合記共(1+NumRestarts())* sizeof(uint32_t) if (restart_offset_ > size_ - sizeof(uint32_t)) { // The size is too small for NumRestarts() and therefore // restart_offset_ wrapped around. size_ = 0; } } } Block::~Block() { delete[] data_; }
Block中每一個entry的解碼spa
entry結構如上圖的 KeyValuePairrest
static inline const char* DecodeEntry(const char* p, const char* limit, uint32_t* shared, uint32_t* non_shared, uint32_t* value_length) { if (limit - p < 3) return NULL; //至少包含3個 共享字節 *shared = reinterpret_cast<const unsigned char*>(p)[0]; *non_shared = reinterpret_cast<const unsigned char*>(p)[1]; *value_length = reinterpret_cast<const unsigned char*>(p)[2]; if ((*shared | *non_shared | *value_length) < 128) { // Fast path: all three values are encoded in one byte each
//三個記錄的值或操做後 均沒有超過128 即最高位爲0
p += 3; } else { if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL; if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL; if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL; } if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) { return NULL; } return p; }
Block使用的迭代器code
class Block::Iter : public Iterator htm
基本數據結構blog
class Block::Iter : public Iterator { private: const Comparator* const comparator_; const char* const data_; // underlying block contents uint32_t const restarts_; // Offset of restart array (list of fixed32) uint32_t const num_restarts_; // Number of uint32_t entries in restart array // current_ is offset in data_ of current entry. >= restarts_ if !Valid uint32_t current_; uint32_t restart_index_; // Index of restart block in which current_ falls std::string key_; Slice value_; Status status_; inline int Compare(const Slice& a, const Slice& b) const { return comparator_->Compare(a, b); } }
// Return the offset in data_ just past the end of the current entry. //下一個記錄的起點就是當前記錄的末尾偏移 //當前記錄加上記錄的長度 和 BLOCK的起點的差 就是偏移 inline uint32_t NextEntryOffset() const { return (value_.data() + value_.size()) - data_; } uint32_t GetRestartPoint(uint32_t index) { //data_ + restarts_就是記錄各個重啓點偏移的數組 //根據重啓點index 計算偏移data_ + restarts_ ,裏面就是第index個重啓點的偏移 assert(index < num_restarts_); return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); } void SeekToRestartPoint(uint32_t index) { key_.clear(); restart_index_ = index; // current_ will be fixed by ParseNextKey(); //value結束就是KEY的開始 因此使用value_記錄 uint32_t offset = GetRestartPoint(index); value_ = Slice(data_ + offset, 0); }
bool ParseNextKey() { current_ = NextEntryOffset(); //獲取下一個entry的偏移 const char* p = data_ + current_; const char* limit = data_ + restarts_; // 全部BLOCK內數據不可能超過restart if (p >= limit) { // No more entries to return. Mark as invalid. current_ = restarts_; restart_index_ = num_restarts_; return false; } // Decode next entry uint32_t shared, non_shared, value_length; //解析獲取 key的共享字段長度 非共享字段長度和value的長度 p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); if (p == NULL || key_.size() < shared) { CorruptionError(); return false; } else { key_.resize(shared); //key保存了其餘entry的key 可是能夠保留共享長度的字符串 key_.append(p, non_shared); //再添加非共享長度的字符串 就是當前KEY內容 value_ = Slice(p + non_shared, value_length); //value 就是略過key的偏移 //編譯restart點 確認restart點的偏移是離本身最近的 restart_index_< current_ < (restart_index_ + 1) while (restart_index_ + 1 < num_restarts_ && GetRestartPoint(restart_index_ + 1) < current_) { ++restart_index_; } return true; } } };
參考:
https://www.cnblogs.com/itdef/p/9789620.html