紅黑樹的操做:node
1 /* 在rb中插入一個節點,成功返回NULL,不然返回error 2 * Insert a new node into the rbtree. This returns NULL on success, or the 3 * existing node on error. 4 */ 5 #define rb_insert(root, new, member, compar) \ 6 ({ \ 7 struct rb_node **__n = &(root)->rb_node, *__parent = NULL; \ 8 typeof(new) __old = NULL, __data; \ 9 \ 10 while (*__n) { \ 11 __data = rb_entry(*__n, typeof(*new), member); \ 12 int __cmp = compar(new, __data); \ 13 \ 14 __parent = *__n; \ 15 if (__cmp < 0) \ 16 __n = &((*__n)->rb_left); \ 17 else if (__cmp > 0) \ 18 __n = &((*__n)->rb_right); \ 19 else { \ 20 __old = __data; \ 21 break; \ 22 } \ 23 } \ 24 \ 25 if (__old == NULL) { \ 26 /* Add new node and rebalance tree. */ \ 27 rb_link_node(&((new)->member), __parent, __n); \ 28 rb_insert_color(&((new)->member), root); \ 29 } \ 30 \ 31 __old; \ 32 })
在rbtree中查找一個節點:ide
1 /* 2 * Search for a value in the rbtree. When the key is not found in the rbtree, 3 * this returns the next greater node. Note, if key > greatest node, we'll 4 * return first node. 5 * 6 * For an empty tree, we return NULL. 7 */ 8 #define rb_nsearch(root, key, member, compar) \ 9 ({ \ 10 struct rb_node *__n = (root)->rb_node; \ 11 typeof(key) __ret = NULL, __data; \ 12 \ 13 while (__n) { \ 14 __data = rb_entry(__n, typeof(*key), member); \ 15 int __cmp = compar(key, __data); \ 16 \ 17 if (__cmp < 0) { \ 18 __ret = __data; \ 19 __n = __n->rb_left; \ 20 } else if (__cmp > 0) \ 21 __n = __n->rb_right; \ 22 else { \ 23 __ret = __data; \ 24 break; \ 25 } \ 26 } \ 27 if (!__ret && !RB_EMPTY_ROOT(root)) \ 28 __ret = rb_entry(rb_first(root), typeof(*key), member); \ 29 __ret; \ 30 })
建立vdisks:一個物理disk的記錄就是一顆紅黑子樹,若是刪除該disk,那麼對應的是刪除整棵子樹ui
1 static void create_vdisks(const struct disk *disk) 2 { 3 //計算disk路徑的hash值 4 uint64_t hval = sd_hash(disk->path, strlen(disk->path)); 5 const struct sd_node *n = &sys->this_node; 6 uint64_t node_hval; 7 int nr; 8 // 判斷當前使用的模式:Disk mode for cluster 9 if (is_cluster_diskmode(&sys->cinfo)) { 10 // node的hash值 11 node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), io_addr)); 13 hval = fnv_64a_64(node_hval, hval); 14 // nr即將disk按照4G大小劃分後的個數 15 nr = DIV_ROUND_UP(disk->space, WEIGHT_MIN); 16 if (0 == n->nid.port) 17 return; 18 } else 19 // 按照16M空間大小劃分磁盤空間後的個數 20 nr = vdisk_number(disk); 21 // 上述主要是獲取到vdisk的個數,接下來將vdisk信息存儲在rbtree中 22 for (int i = 0; i < nr; i++) { 23 struct vdisk *v = xmalloc(sizeof(*v)); 25 hval = sd_hash_next(hval); 26 v->hash = hval; 27 v->disk = disk; 28 if (unlikely(vdisk_insert(v))) // 在rbtree中插入vdisk,樹的根在md.vroot這樣的全局變量中 29 panic("vdisk hash collison"); 30 } 31 }
添加一個物理磁盤:this
1 /* We don't need lock at init stage */
2 bool md_add_disk(const char *path, bool purge)
3 { 4 struct disk *new; 5 // 是否已經存在 6 if (path_to_disk(path)) { 7 sd_err("duplicate path %s", path); 8 return false; 9 } 10 // 建立相應的文件夾目錄 11 if (xmkdir(path, sd_def_dmode) < 0) { 12 sd_err("can't mkdir for %s, %m", path); 13 return false; 14 } 15 16 new = xmalloc(sizeof(*new)); 17 pstrcpy(new->path, PATH_MAX, path); 18 trim_last_slash(new->path); 19 new->space = init_path_space(new->path, purge); 20 if (!new->space) { 21 free(new); 22 return false; 23 } 24 // 建立相應的vdisks 25 create_vdisks(new); 26 rb_insert(&md.root, new, rb, disk_cmp); 27 md.space += new->space; 28 md.nr_disks++; 29 30 sd_info("%s, vdisk nr %d, total disk %d", new->path, vdisk_number(new),md.nr_disks); 31 return true; 32 }
與之相反的是刪除vdiskspa
1 static inline void vdisk_free(struct vdisk *v) 2 { 3 // 刪除樹的結構 4 rb_erase(&v->rb, &md.vroot); 5 free(v); 6 } 7 8 static void remove_vdisks(const struct disk *disk) 9 { 10 uint64_t hval = sd_hash(disk->path, strlen(disk->path)); 11 const struct sd_node *n = &sys->this_node; 12 uint64_t node_hval; 13 int nr; 14 15 if (is_cluster_diskmode(&sys->cinfo)) { 16 node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), io_addr)); 17 hval = fnv_64a_64(node_hval, hval); 18 nr = DIV_ROUND_UP(disk->space, WEIGHT_MIN); 19 } else 20 nr = vdisk_number(disk); 21 22 for (int i = 0; i < nr; i++) { 23 struct vdisk *v; 24 25 hval = sd_hash_next(hval); 26 v = hval_to_vdisk(hval); 27 sd_assert(v->hash == hval); 28 // 逐個釋放vdisk 29 vdisk_free(v); 30 } 31 }
刪除物理disk操做:code
1 static void remove_vdisks(const struct disk *disk) 2 { 3 uint64_t hval = sd_hash(disk->path, strlen(disk->path)); 4 const struct sd_node *n = &sys->this_node; 5 uint64_t node_hval; 6 int nr; 7 8 if (is_cluster_diskmode(&sys->cinfo)) { 9 node_hval = sd_hash(&n->nid, offsetof(typeof(n->nid), io_addr)); 10 hval = fnv_64a_64(node_hval, hval); 11 nr = DIV_ROUND_UP(disk->space, WEIGHT_MIN); 12 } else 13 nr = vdisk_number(disk); 14 15 for (int i = 0; i < nr; i++) { 16 struct vdisk *v; 17 18 hval = sd_hash_next(hval); 19 v = hval_to_vdisk(hval); 20 sd_assert(v->hash == hval); 21 22 vdisk_free(v); 23 } 24 } 25 26 27 static inline void md_remove_disk(struct disk *disk) 28 { 29 sd_info("%s from multi-disk array", disk->path); 30 rb_erase(&disk->rb, &md.root); 31 md.nr_disks--; 32 remove_vdisks(disk); 33 free(disk); 34 } 35 36 static inline void md_del_disk(const char *path) 37 { 38 struct disk *disk = path_to_disk(path); 39 40 if (!disk) { 41 sd_err("invalid path %s", path); 42 return; 43 } 44 md_remove_disk(disk); 45 }
更新nodes disks,這裏僅在配置爲disk_vnodes模式纔會定義:blog
1 #ifdef HAVE_DISKVNODES 2 void update_node_disks(void) 3 { 4 const struct disk *disk; 5 int i = 0; 6 bool rb_empty = false; 7 8 if (!sys) 9 return; 10 11 memset(sys->this_node.disks, 0, sizeof(struct disk_info) * DISK_MAX); 12 sd_read_lock(&md.lock); 13 rb_for_each_entry(disk, &md.root, rb) { 14 sys->this_node.disks[i].disk_id = 15 sd_hash(disk->path, strlen(disk->path)); 16 sys->this_node.disks[i].disk_space = disk->space; 17 i++; 18 } 19 sd_rw_unlock(&md.lock); 20 21 if (RB_EMPTY_ROOT(&md.vroot)) 22 rb_empty = true; 23 sd_write_lock(&md.lock); 24 rb_for_each_entry(disk, &md.root, rb) { 25 if (!rb_empty) 26 remove_vdisks(disk); 27 create_vdisks(disk); 28 } 29 sd_rw_unlock(&md.lock); 30 } 31 #else 32 void update_node_disks(void) 33 { 34 } 35 #endif
本地磁盤插拔操做:ci
1 static int do_plug_unplug(char *disks, bool plug) 2 { 3 const char *path; 4 int old_nr, new_nr, ret = SD_RES_UNKNOWN; 5 6 sd_write_lock(&md.lock); 7 old_nr = md.nr_disks; 8 path = strtok(disks, ","); 9 do { 10 if (plug) { 11 if (!md_add_disk(path, true)) 12 sd_err("failed to add %s", path); 13 } else { 14 md_del_disk(path); 15 } 16 } while ((path = strtok(NULL, ","))); 17 new_nr = md.nr_disks; 18 19 /* If no disks change, bail out */ 20 if (old_nr == new_nr) 21 goto out; 22 23 ret = SD_RES_SUCCESS; 24 out: 25 sd_rw_unlock(&md.lock); 26 27 if (ret == SD_RES_SUCCESS) { 28 if (new_nr > 0) { 29 update_node_disks(); 30 kick_recover(); 31 } else { 32 sd_warn("no disks plugged, going down"); 33 leave_cluster(); 34 sys->cinfo.status = SD_STATUS_KILLED; 35 } 36 } 37 38 return ret; 39 } 40 41 int md_plug_disks(char *disks) 42 { 43 return do_plug_unplug(disks, true); 44 } 45 46 int md_unplug_disks(char *disks) 47 { 48 return do_plug_unplug(disks, false); 49 }
oid到vdisk的映射:rem
1 /* If v1_hash < hval <= v2_hash, then oid is resident in v2, 在rbtree中尋找其位置*/ 2 static struct vdisk *hval_to_vdisk(uint64_t hval) 3 { 4 struct vdisk dummy = { .hash = hval }; 5 6 return rb_nsearch(&md.vroot, &dummy, rb, vdisk_cmp); 7 } 8 /* 將oid的hash值做爲入參 */ 9 static struct vdisk *oid_to_vdisk(uint64_t oid) 10 { 11 return hval_to_vdisk(sd_hash_oid(oid)); 12 }