在實際工做中,我須要使用redis的客戶端去鏈接redis,因而選擇了hiredis客戶端(公司強推)。 hiRedis 是 Redis 官方指定的 C 語言客戶端開發包,支持 Redis 完整的命令集、管線以及事件驅動編程。redis
一、情景描述
1.1 使用場景
一個epool模型的服務器不斷接受外界請求,這個服務器框架給用戶預留一個回調函數(多線程),回調函數爲用戶本身去實現的業務邏輯,其中redis的使用就須要在這個回調函數內部實現。編程
1.2 初步實現方案
在程序啓動的時候,我就初始化redis的鏈接,得到hiredis句柄。而後把hiredis句柄傳入到線程函數裏面。讓其作相應的業務邏輯。vim
1.3 結果
很不幸,一次請求都沒問題,作壓力測試,同時開20個線程訪問,程序當即出core。服務器
線上出core以下:網絡
02 |
#0 0x000000302af2e2ed in raise () from /lib64/tls/libc.so.6 |
03 |
#1 0x000000302af2fa3e in abort () from /lib64/tls/libc.so.6 |
04 |
#2 0x000000302af62db1 in __libc_message () from /lib64/tls/libc.so.6 |
05 |
#3 0x000000302af6888e in _int_free () from /lib64/tls/libc.so.6 |
06 |
#4 0x000000302af6a12d in _int_realloc () from /lib64/tls/libc.so.6 |
07 |
#5 0x000000302af6b39c in realloc () from /lib64/tls/libc.so.6 |
08 |
#6 0x0000000000dc2269 in sdscatlen (s=Variable "s" is not available. |
10 |
#7 0x0000000000dc1d40 in __redisAppendCommand (c=0x16fa1d0, cmd=Variable "cmd" is not available. |
12 |
#8 0x0000000000dc1d97 in redisvAppendCommand (c=0x16fa1d0, format=Variable "format" is not available. |
14 |
#9 0x0000000000dc1eed in redisvCommand (c=0x16fa1d0, format=Variable "format" is not available. |
16 |
#10 0x0000000000dc1fb6 in redisCommand (c=Variable "c" is not available. |
18 |
#11 0x0000002b1a8e6310 in Default_Handler::get_batch_redis (this=0x1ff41f0, redis_ins=0x175a7d0, dataid=6202, buf_num=12, res_num=6, key_sign=0x2bd67cb3c8, |
19 |
res_lens=0x2bd5f54208, res_buf=0x2bd5f54398 "" ) at default_handler.cpp:659 |
20 |
#12 0x0000002b1a9134df in Default_Ms_Handler::get_digest (this=0x1ff41f0) at default_ms_handler.cpp:646 |
21 |
#13 0x000000000092910c in do_proc () at gss_work.cpp:1107 |
22 |
#14 0x000000000091c91f in thread_main () at gss_net.cpp:188 |
23 |
#15 0x0000000000bc10e9 in default_native () at ubserver_app.cpp:283 |
24 |
#16 0x0000000000bbc676 in eppool_consume (pool=0x2230b90, data=0x22188f0) at eppool.cpp:649 |
25 |
#17 0x0000000000bbc4d1 in _eppool_workers (param=0x22188f0) at eppool.cpp:604 |
26 |
#18 0x000000302b80610a in start_thread () from /lib64/tls/libpthread.so.0 |
27 |
#19 0x000000302afc6003 in clone () from /lib64/tls/libc.so.6 |
28 |
#20 0x0000000000000000 in ?? () |
當時通過屢次嘗試。把鏈接放入到了每一個線程中。那麼就不會出core了。多線程
二、線下復現
由於不方便公開公司代碼,因此我寫一個相似的代碼來複現這個case。app
2.1 代碼
代碼主要有testredis.cpp和Makefile(本身指定hiredis目錄)。用法是 ./redis -n [num] -h [host] -p [port], n爲host數目,多個host用"-"進行分割。框架
testredis.cpp異步
001 |
/*************************************************************************** |
003 |
* Copyright (c) 2014 Baidu.com, Inc. All Rights Reserved |
005 |
**************************************************************************/ |
010 |
* @file redistest.cpp |
011 |
* @author liujun05(com@baidu.com) |
012 |
* @date 2014/02/25 10:28:44 |
026 |
#define uint32 unsigned int |
029 |
#define MAX_REDIS_SERVER_CNT 10 |
030 |
#define MAX_REDIS_IPS 1024 |
032 |
typedef struct _redis_conf_t |
035 |
char redis_ips[MAX_REDIS_IPS]; |
036 |
char redis_ip_array[MAX_REDIS_SERVER_CNT][MAX_REDIS_IPS]; |
041 |
typedef struct _redis_data_t |
044 |
redisContext *rc[MAX_REDIS_SERVER_CNT]; |
052 |
printf ( "usage: ./redis -n [num] -h [host] -p [port]\n" ); |
056 |
int main_parse_option( int argc, char **argv) |
059 |
//reset 獲取參數的位置,屢次調用時這個會出現問題 |
060 |
while ((c = getopt(argc, argv, "h:p:n:" )) != -1) |
065 |
sprintf (g_cfg.redis_ips, optarg); |
068 |
g_cfg.redis_port = atoi (optarg); |
071 |
g_cfg.redis_num = atoi (optarg); |
082 |
void * test_thread1( void * data) |
084 |
redis_data* redis_ins = (redis_data*)data; |
086 |
for ( int i=0; i<redis_ins->redis_num; i++) |
088 |
reply = (redisReply *)redisCommand( redis_ins->rc[i] , "SET %s %s" , "foo" , "hello world" ); |
089 |
freeReplyObject(reply); |
095 |
g_data.redis_num = 0; |
096 |
struct timeval timeout = { 1, 500000 }; // 1.5 seconds |
099 |
char *part = strtok_r(g_cfg.redis_ips, "-" , &ptok); |
103 |
strcpy (g_cfg.redis_ip_array[num++], part); |
104 |
part = strtok_r(NULL, "-" , &ptok); |
107 |
if (num != g_cfg.redis_num || num > MAX_REDIS_SERVER_CNT) |
109 |
printf ( "ip num[%d] not equal redis_num[%d] or not vaild\n" , num, g_cfg.redis_num); |
112 |
g_data.redis_num = (num > MAX_REDIS_SERVER_CNT ) ? MAX_REDIS_SERVER_CNT : num; |
115 |
for (i=0; i<g_data.redis_num; i++) |
117 |
g_data.rc[i] = redisConnectWithTimeout( g_cfg.redis_ip_array[i], g_cfg.redis_port , timeout); |
118 |
if ( g_data.rc[i] == NULL || g_data.rc[i]->err) |
120 |
printf ( "content to redis server[%s:%u], error[%s]\n" , |
121 |
g_cfg.redis_ip_array[i], g_cfg.redis_port, g_data.rc[i]->errstr |
129 |
for ( int j=0; j<i; j++) |
131 |
if (g_data.rc[j] != NULL) |
133 |
redisFree(g_data.rc[j]); |
142 |
for ( int j=0; j<g_data.redis_num; j++) |
144 |
if (g_data.rc[j] != NULL) |
146 |
redisFree(g_data.rc[j]); |
151 |
int main( int argc, char ** argv) |
153 |
g_cfg.redis_ips[0] = '\0' ; |
154 |
g_cfg.redis_port = 6379; |
156 |
if ( 0 != main_parse_option(argc, argv) ) |
162 |
if ( 0 == g_cfg.redis_num || g_cfg.redis_num > MAX_REDIS_SERVER_CNT ) |
164 |
printf ( "the reids num[%u] is not vaild\n" , g_cfg.redis_num); |
169 |
int ret = init_data(); |
172 |
printf ( "init num fail\n" ); |
178 |
for ( int i=0; i<100; i++) |
180 |
pthread_create(&t[i], NULL, test_thread1, &g_data); |
183 |
for ( int i=0; i<100; i++) |
185 |
pthread_join(t[i], NULL); |
194 |
/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ |
Makefile函數
2 |
g++ -g testredis.cpp -I./hiredis -L./hiredis -lhiredis -lpthread -o redis |
2.2 編譯執行
1 |
liujun05@cq01-rdqa-dev012.cq01:~/ test /hiredis$ ./redis -n2 -h10.48.46.26-10.46.175.102 |
2 |
*** glibc detected *** double free or corruption (!prev): 0x000000000050aa80 *** |
能夠看到出core了
02 |
#0 0x000000302af2e2ed in raise () from /lib64/tls/libc.so.6 |
03 |
#1 0x000000302af2fa3e in abort () from /lib64/tls/libc.so.6 |
04 |
#2 0x000000302af62db1 in __libc_message () from /lib64/tls/libc.so.6 |
05 |
#3 0x000000302af6888e in _int_free () from /lib64/tls/libc.so.6 |
06 |
#4 0x000000302af68bd6 in free () from /lib64/tls/libc.so.6 |
07 |
#5 0x0000000000403c75 in redisBufferWrite (c=0x50a010, done=0x571c008c) at hiredis.c:1162 |
08 |
#6 0x0000000000403d3e in redisGetReply (c=0x50a010, reply=0x571c00b8) at hiredis.c:1195 |
09 |
#7 0x0000000000403f62 in redisvCommand (c=0x50a010, format=Variable "format" is not available. |
11 |
#8 0x0000000000404006 in redisCommand (c=Variable "c" is not available. |
13 |
#9 0x00000000004013e7 in test_thread1 (data=0x509ba0) at testredis.cpp:88 |
14 |
#10 0x000000302b80610a in start_thread () from /lib64/tls/libpthread.so.0 |
15 |
#11 0x000000302afc6003 in clone () from /lib64/tls/libc.so.6 |
16 |
#12 0x0000000000000000 in ?? () |
雖然出core位置不一致,可是通過查看代碼,出core的緣由應該是一致的。
2.3 緣由分析
從堆棧5能夠看到 hiredis.c的1162行出的core,打開hiredis.c
1 |
1160 } else if (nwritten > 0) { |
2 |
1161 if (nwritten == ( signed )sdslen(c->obuf)) { |
4 |
1163 c->obuf = sdsempty(); |
6 |
1165 c->obuf = sdsrange(c->obuf,nwritten,-1); |
能夠看到的確在1152行對c->obuf進行了一次free致使出core。
咱們分析下調用關係,首先調用redisCommand.
1 |
1309 void *redisCommand(redisContext *c, const char *format, ...) { |
3 |
1311 void *reply = NULL; |
4 |
1312 va_start (ap,format); |
5 |
1313 reply = redisvCommand(c,format,ap); |
而後調用redisvCommand
1 |
1303 void *redisvCommand(redisContext *c, const char *format, va_list ap) { |
2 |
1304 if (redisvAppendCommand(c,format,ap) != REDIS_OK) |
4 |
1306 return __redisBlockForReply(c); |
接着調用redisvAppendCommand
01 |
<span></span>1233 int redisvAppendCommand(redisContext *c, const char *format, va_list ap) { |
05 |
1237 len = redisvFormatCommand(&cmd,format,ap); |
07 |
1239 __redisSetError(c,REDIS_ERR_OOM, "Out of memory" ); |
08 |
1240 return REDIS_ERR; |
11 |
1243 if (__redisAppendCommand(c,cmd,len) != REDIS_OK) { |
13 |
1245 return REDIS_ERR; |
這裏,咱們須要care調用__redisAppendCommand.
01 |
1220 int __redisAppendCommand(redisContext *c, char *cmd, size_t len) { |
04 |
1223 newbuf = sdscatlen(c->obuf,cmd,len); |
05 |
1224 if (newbuf == NULL) { |
06 |
1225 __redisSetError(c,REDIS_ERR_OOM, "Out of memory" ); |
07 |
1226 return REDIS_ERR; |
10 |
1229 c->obuf = newbuf; |
問題出現了。
對於任意一個多線程,他傳入的redisContext* c都是一個,那麼他們也公用同一個c->obuf,這裏很明顯,線程數據是耦合的。
當一個線程調用sdsfree c->obuf,其餘任意一個線程使用c->obuf都會致使出core. 這也是我所謂的hiredis對多線程支持的很差的地方。
3. 終極解決方案
那麼,若是我必定要在多線程中經過hiredis客戶端調用redis呢。有沒有方案了,答案確定是有,只不過性能稍差。
原先的作法是先得到hiredis鏈接句柄,而後把句柄傳入到多線程中,讓多線程使用。如今改爲在線程裏面鏈接得到hiredis句柄,而後再進行使用。固然,代價是對於每一個請求,都須要去鏈接redis服務器,加大了網絡開銷的同時還加大了redis的請求。
redis是單線程異步模型,hiredis這個客戶端看來也只支持單線程。但願後續有redis的相關程序猿來改進相應問題,在hiredis使用多線程須要慎重。