前言 - Simple Dynamic Stringsgit
antirez 想統一 Redis,Disque,Hiredis 項目中 SDS 代碼, 所以構建了這個項目github
https://github.com/antirez/sds . 更多介紹的背景知識, 能夠閱讀 README.md. redis
sds 項目是 C 字符串數據結構在實際環境中一種取捨實現, 庫自己是非線程安全的. 下面安全
來帶你們手寫相關代碼, 透徹瞭解這個庫的意圖(antirez 註釋的很棒). 數據結構
#define SDS_MAX_PREALLOC (1024*1024)
/* Note: sdshdr5 is never used, we just access the flags byte directly. * However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[]; }; struct __attribute__ ((__packed__)) sdshdr8 { uint8_t len; /* used */ uint8_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[]; }; struct __attribute__ ((__packed__)) sdshdr16 { uint16_t len; /* used */ uint16_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[]; }; struct __attribute__ ((__packed__)) sdshdr32 { uint32_t len; /* used */ uint32_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[]; }; struct __attribute__ ((__packed__)) sdshdr64 { uint64_t len; /* used */ uint64_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[]; }; #define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
#define SDS_HDR(T, s) ((struct sdshdr##T *)((s) - (sizeof(struct sdshdr##T))))
#define SDS_HDR_VAR(T, s) struct sdshdr##T * sh = SDS_HDR(T, s)
能夠先分析 sdshdr5 sdshdr8 sdshdr16 sdshdr32 sdshdr64, 感覺做者的意圖. 函數
首先這幾個基本結構, 都有 len, alloc, flags, buf 4 個字段, 是否是. 有的同窗會問, sdshdr5 優化
中沒有 alloc 和 len 字段呀, 這個啊 sdshdr5 結構比較特殊, 其 alloc 和 len 都隱含在 flagsui
中, 三者複用一個字段. 能夠從函數宏 SDS_TYPE_5_LEN(f), 能夠看出來. 於是 sdshdr5 表達的字符this
串長度和字符串容量默認相同. 再考究一下 __attribute__ ((__packed__)) 意圖(告訴編譯器取消結spa
構在編譯過程當中的優化對齊, 按照實際佔用字節數進行對齊). 對於取消結構內存編譯對齊優化, 個人考究有
兩點, 1 節約內存, 2 內存可移植變強.
隨後多數是流水線代碼, 很是好理解. 例若有段這樣關係的代碼 sdsalloc() = sdsavail() + sdslen()
inline size_t sdslen(const sds s) { unsigned char flags = s[-1]; switch (flags & SDS_TYPE_MASK) { case SDS_TYPE_5 : return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8 : return SDS_HDR(8 , s)->len; case SDS_TYPE_16: return SDS_HDR(16, s)->len; case SDS_TYPE_32: return SDS_HDR(32, s)->len; case SDS_TYPE_64: return SDS_HDR(64, s)->len; } return 0; } inline size_t sdsavail(const sds s) { unsigned char flags = s[-1]; switch (flags & SDS_TYPE_MASK) { case SDS_TYPE_8 : { SDS_HDR_VAR(8 , s); return sh->alloc - sh->len; } case SDS_TYPE_16: { SDS_HDR_VAR(16, s); return sh->alloc - sh->len; } case SDS_TYPE_32: { SDS_HDR_VAR(32, s); return sh->alloc - sh->len; } case SDS_TYPE_64: { SDS_HDR_VAR(64, s); return sh->alloc - sh->len; } default: return 0; } } /* sdsalloc() = sdsavail() + sdslen() */ inline size_t sdsalloc(const sds s) { unsigned char flags = s[-1]; switch (flags & SDS_TYPE_MASK) { case SDS_TYPE_5 : return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8 : return SDS_HDR(8 , s)->alloc; case SDS_TYPE_16: return SDS_HDR(16, s)->alloc; case SDS_TYPE_32: return SDS_HDR(32, s)->alloc; case SDS_TYPE_64: return SDS_HDR(64, s)->alloc; } return 0; }
是否是一下就理解了 sdsalloc(), sdsavail(), sdslen() 是幹什麼的呢 ❤
正文 - 代碼抽樣講解
1. 重複代碼能夠修的更好
/* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char *s, long long value) { char *p, aux; unsigned long long v; size_t l; /* Generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); if (value < 0) *p++ = '-'; /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; } /* Identical sdsll2str(), but for unsigned long long type. */
int sdsull2str(char *s, unsigned long long v) { char *p, aux; size_t l; /* Generate the string representation, this method produces * an reversed string. */ p = s; do { *p++ = '0'+(v%10); v /= 10; } while(v); /* Compute length and add null term. */ l = p-s; *p = '\0'; /* Reverse the string. */ p--; while(s < p) { aux = *s; *s = *p; *p = aux; s++; p--; } return l; }
long long or unsigned long long convert to char * 中, 功能很簡單. 函數結尾處代碼
重複很多, 是能夠重構複用的.
inline int sdsreverse(char * s, char * p) { /* Compute length and add null term. */ size_t l = p - s; *p = '\0'; p--; while (s < p) { char aux = *s; *s = *p; *p = aux; s++; p--; } return (int)l; } /* Helper for sdscatlonglong() doing the actual number -> string * conversion. 's' must point to a string with room for at least * SDS_LLSTR_SIZE bytes. * * The function returns the length of the null-terminated string * representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char * s, long long value) { char * p; unsigned long long v; /* Generate the string representation, this method produces * an reversed string. */ v = (value < 0) ? -value : value; p = s; do { *p++ = '0' + (v % 10); } while ((v /= 10)); if (value < 0) *p++ = '-'; return sdsreverse(s, p); } /* Identical sdsll2str(), but for unsigned long long type. */
int sdsull2str(char * s, unsigned long long v) { /* Generate the string representation, this method produces * an reversed string. */
char * p = s; do { *p++ = '0' + (v % 10); } while ((v /= 10)); return sdsreverse(s, p); }
是否是顯得老學究氣質凸顯了很多.
2. vsnprintf 用的太硬邦邦
/* Like sdscatprintf() but gets va_list instead of being variadic. */ sds sdscatvprintf(sds s, const char *fmt, va_list ap) { va_list cpy; char staticbuf[1024], *buf = staticbuf, *t; size_t buflen = strlen(fmt)*2; /* We try to start using a static buffer for speed. * If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) { buf = s_malloc(buflen); if (buf == NULL) return NULL; } else { buflen = sizeof(staticbuf); } /* Try with buffers two times bigger every time we fail to * fit the string in the current buffer size. */
while(1) { buf[buflen-2] = '\0'; va_copy(cpy,ap); vsnprintf(buf, buflen, fmt, cpy); va_end(cpy); if (buf[buflen-2] != '\0') { if (buf != staticbuf) s_free(buf); buflen *= 2; buf = s_malloc(buflen); if (buf == NULL) return NULL; continue; } break; } /* Finally concat the obtained string to the SDS string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t; }
由 while vsnprintf 來擴容, 這實在太暴力. 更好操做能夠觀看 man vsnprintf/ 這裏能夠看個人提交
https://github.com/antirez/sds/pull/115/commits/51e2cd78b1a102055979ec9eb83766b8d2cd6927
/* Like sdscatprintf() but gets va_list instead of being variadic. */ sds sdscatvprintf(sds s, const char * fmt, va_list ap) { int size; va_list cpy; char staticbuf[1024], * buf, * t; /* Determine required size */ va_copy(cpy, ap); size = vsnprintf(NULL, 0, fmt, cpy); va_end(cpy); if (size < 0) return NULL; /* For '\0' */ size++; /* We try to start using a static buffer for speed. * If not possible we revert to heap allocation. */
if (size > sizeof(staticbuf)) { buf = s_malloc(size); if (buf == NULL) return NULL; } else { buf = staticbuf; } va_copy(cpy, ap); size = vsnprintf(buf, size, fmt, cpy); va_end(ap); if (size < 0) { if (buf != staticbuf) s_free(buf); return NULL; } /* Finally concat the obtained string to the SDS string and return it. */ t = sdscat(s, buf); if (buf != staticbuf) s_free(buf); return t; }
別問, 問就是上天.
3. sdssplitargs 難以讓人見色起意
/* Helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */
int is_hex_digit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } /* Helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */
int hex_digit_to_int(char c) { switch(c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return 0; } } /* Split a line into arguments, where every argument can be in the * following programming-language REPL-alike form: * * foo bar "newline are supported\n" and "\xff\x00otherstuff" * * The number of arguments is stored into *argc, and an array * of sds is returned. * * The caller should free the resulting array of sds strings with * sdsfreesplitres(). * * Note that sdscatrepr() is able to convert back a string into * a quoted string in the same format sdssplitargs() is able to parse. * * The function returns the allocated tokens on success, even when the * input string is empty, or NULL if the input contains unbalanced * quotes or closed quotes followed by non space characters * as in: "foo"bar or "foo' */ sds *sdssplitargs(const char *line, int *argc) { const char *p = line; char *current = NULL; char **vector = NULL; *argc = 0; while(1) { /* skip blanks */
while(*p && isspace(*p)) p++; if (*p) { /* get a token */
int inq=0; /* set to 1 if we are in "quotes" */
int insq=0; /* set to 1 if we are in 'single quotes' */
int done=0; if (current == NULL) current = sdsempty(); while(!done) { if (inq) { if (*p == '\\' && *(p+1) == 'x' && is_hex_digit(*(p+2)) && is_hex_digit(*(p+3))) { unsigned char byte; byte = (hex_digit_to_int(*(p+2))*16)+ hex_digit_to_int(*(p+3)); current = sdscatlen(current,(char*)&byte,1); p += 3; } else if (*p == '\\' && *(p+1)) { char c; p++; switch(*p) { case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'b': c = '\b'; break; case 'a': c = '\a'; break; default: c = *p; break; } current = sdscatlen(current,&c,1); } else if (*p == '"') { /* closing quote must be followed by a space or * nothing at all. */
if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */
goto err; } else { current = sdscatlen(current,p,1); } } else if (insq) { if (*p == '\\' && *(p+1) == '\'') { p++; current = sdscatlen(current,"'",1); } else if (*p == '\'') { /* closing quote must be followed by a space or * nothing at all. */
if (*(p+1) && !isspace(*(p+1))) goto err; done=1; } else if (!*p) { /* unterminated quotes */
goto err; } else { current = sdscatlen(current,p,1); } } else { switch(*p) { case ' ': case '\n': case '\r': case '\t': case '\0': done=1; break; case '"': inq=1; break; case '\'': insq=1; break; default: current = sdscatlen(current,p,1); break; } } if (*p) p++; } /* add the token to the vector */ vector = s_realloc(vector,((*argc)+1)*sizeof(char*)); vector[*argc] = current; (*argc)++; current = NULL; } else { /* Even on empty input string return something not NULL. */
if (vector == NULL) vector = s_malloc(sizeof(void*)); return vector; } } err: while((*argc)--) sdsfree(vector[*argc]); s_free(vector); if (current) sdsfree(current); *argc = 0; return NULL; } /* Free the result returned by sdssplitlen(), or do nothing if 'tokens' is NULL. */
void sdsfreesplitres(sds *tokens, int count) { if (!tokens) return; while(count--) sdsfree(tokens[count]); s_free(tokens); }
sdssplitargs 函數開始很差理解, 不過我這寫了個 demo 分享給你們
#include <stdio.h> #include "sds.h"
int main(int argc, char * argv[]) { int count; const char * line = " hset name \"name:filed\" \"value:field\" "; sds * tokens = sdssplitargs(line, &count); printf("line = [%s], count = [%d]\n", line, count); for (int i = 0; i < count; i++) { printf("tokens[%d] = [%s]\n", i, tokens[i]); } sdsfreesplitres(tokens, count); return 0; }
輸出
line = [ hset name "name:filed" "value:field" ], count = [4] tokens[0] = [hset] tokens[1] = [name] tokens[2] = [name:filed] tokens[3] = [value:field]
是否是瞬間開悟 -> sdssplitargs 到底要鬧那樣!
總體寫下來, 感覺是 antirez sds 實現的很四平八穩, 沒有什麼花哨的地方.
感興趣的朋友寫寫, 收穫少不了 ~
後記 - 再接再礪
錯誤是不免, 歡迎交流吹水 ~
義不容辭 - https://music.163.com/#/song?id=1363553512
古朗月行(唐·李白)
小時不識月,呼做白玉盤。
又疑瑤臺鏡,飛在青雲端。
仙人垂兩足,桂樹做團團。
白兔搗藥成,問言與誰餐。
蟾蜍蝕圓影,大明夜已殘。
羿昔落九烏,天人清且安。
陰精此淪惑,去去不足觀。
憂來其如何,悽愴摧心肝。