redis read error on connection和Redis server went away錯誤排查

       最近每小時都會收到幾條redis報警,雖然不是什麼大問題但仍是要排查一下緣由。經過捕獲redis的兩條拋錯信息redis read error on connection和Redis server went away,咱們打開phpredis擴展的源碼找到
static zend_always_inline RedisSock *
redis_sock_get_instance(zval *id TSRMLS_DC, int no_throw)
{
    redis_object *redis;php

    if (Z_TYPE_P(id) == IS_OBJECT) {
#if (PHP_MAJOR_VERSION < 7)
        redis = (redis_object *)zend_objects_get_address(id TSRMLS_CC);
#else
        redis = (redis_object *)((char *)Z_OBJ_P(id) - XtOffsetOf(redis_object, std));
#endif
        if (redis->sock) {
            return redis->sock;
        }
    }
    // Throw an exception unless we've been requested not to
    if (!no_throw) {
        zend_throw_exception(redis_exception_ce, "Redis server went away", 0 TSRMLS_CC);
    }
    return NULL;
}redis

這個方法的判斷邏輯是 redis結構體裏的socket成員變量爲空就會拋出這個錯誤,這個方法被調用的地方不少,須要從頭看起。網絡

首先在php中初始化連接,會調用app

PHP_METHOD(Redis, connect)
{
    if (redis_connect(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0) == FAILURE) {
        RETURN_FALSE;
    } else {
        RETURN_TRUE;
    }
}less

這是connect方法,調用redis_connect並傳入所有參數。socket


PHP_REDIS_API int
redis_connect(INTERNAL_FUNCTION_PARAMETERS, int persistent)
{
    zval *object;
    char *host = NULL, *persistent_id = NULL;
    zend_long port = -1, retry_interval = 0;
    strlen_t host_len, persistent_id_len;
    double timeout = 0.0, read_timeout = 0.0;
    redis_object *redis;oop

#ifdef ZTS
    /* not sure how in threaded mode this works so disabled persistence at
     * first */
    persistent = 0;
#endifui

    if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(),
                                     "Os|ldsld", &object, redis_ce, &host,
                                     &host_len, &port, &timeout, &persistent_id,
                                     &persistent_id_len, &retry_interval,
                                     &read_timeout) == FAILURE)
    {
        return FAILURE;
    } else if (!persistent) {
        persistent_id = NULL;
    }this

    if (timeout < 0L || timeout > INT_MAX) {
        zend_throw_exception(redis_exception_ce,
            "Invalid connect timeout", 0 TSRMLS_CC);
        return FAILURE;
    }spa

    if (read_timeout < 0L || read_timeout > INT_MAX) {
        zend_throw_exception(redis_exception_ce,
            "Invalid read timeout", 0 TSRMLS_CC);
        return FAILURE;
    }

    if (retry_interval < 0L || retry_interval > INT_MAX) {
        zend_throw_exception(redis_exception_ce, "Invalid retry interval",
            0 TSRMLS_CC);
        return FAILURE;
    }

    /* If it's not a unix socket, set to default */
    if(port == -1 && host_len && host[0] != '/') {
        port = 6379;
    }

#if (PHP_MAJOR_VERSION < 7)
    redis = (redis_object *)zend_objects_get_address(object TSRMLS_CC);
#else
    redis = (redis_object *)((char *)Z_OBJ_P(object) - XtOffsetOf(redis_object, std));
#endif
    /* if there is a redis sock already we have to remove it */
    if (redis->sock) {
        redis_sock_disconnect(redis->sock TSRMLS_CC);
        redis_free_socket(redis->sock);
    }

    redis->sock = redis_sock_create(host, host_len, port, timeout, read_timeout, persistent,
        persistent_id, retry_interval, 0);

    if (redis_sock_server_open(redis->sock TSRMLS_CC) < 0) {
        redis_free_socket(redis->sock);
        redis->sock = NULL;
        return FAILURE;
    }

    return SUCCESS;
}

redis建立了一個socket連接並將資源句柄存入redis->socket,下面是socket建立socket過程
/**
 * redis_sock_create
 */
PHP_REDIS_API RedisSock*
redis_sock_create(char *host, int host_len, unsigned short port,
                  double timeout, double read_timeout,
                  int persistent, char *persistent_id,
                  long retry_interval, zend_bool lazy_connect)
{
    RedisSock *redis_sock;

    redis_sock         = ecalloc(1, sizeof(RedisSock));
    redis_sock->host   = estrndup(host, host_len);
    redis_sock->stream = NULL;
    redis_sock->status = REDIS_SOCK_STATUS_DISCONNECTED;
    redis_sock->watching = 0;
    redis_sock->dbNumber = 0;
    redis_sock->retry_interval = retry_interval * 1000;
    redis_sock->persistent = persistent;
    redis_sock->lazy_connect = lazy_connect;
    redis_sock->persistent_id = NULL;

    if(persistent_id) {
        redis_sock->persistent_id = estrdup(persistent_id);
    }

    redis_sock->port    = port;
    redis_sock->timeout = timeout;
    redis_sock->read_timeout = read_timeout;

    redis_sock->serializer = REDIS_SERIALIZER_NONE;
    redis_sock->mode = ATOMIC;
    redis_sock->head = NULL;
    redis_sock->current = NULL;

    redis_sock->pipeline_cmd = NULL;
    redis_sock->pipeline_len = 0;

    redis_sock->err = NULL;
    redis_sock->err_len = 0;

    redis_sock->scan = REDIS_SCAN_NORETRY;

    redis_sock->readonly = 0;

    return redis_sock;
}

 

若是在php中使用mget會調用


/* {{{ proto array Redis::getMultiple(array keys)
 */
PHP_METHOD(Redis, getMultiple)
{
    zval *object, *z_args, *z_ele;
    HashTable *hash;
    RedisSock *redis_sock;
    smart_string cmd = {0};
    int arg_count;

    /* Make sure we have proper arguments */
    if(zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa",
                                    &object, redis_ce, &z_args) == FAILURE) {
        RETURN_FALSE;
    }

    /* We'll need the socket */
    if ((redis_sock = redis_sock_get(object TSRMLS_CC, 0)) == NULL) {
        RETURN_FALSE;
    }

    /* Grab our array */
    hash = Z_ARRVAL_P(z_args);

    /* We don't need to do anything if there aren't any keys */
    if((arg_count = zend_hash_num_elements(hash)) == 0) {
        RETURN_FALSE;
    }

    /* Build our command header */
    redis_cmd_init_sstr(&cmd, arg_count, "MGET", 4);

    /* Iterate through and grab our keys */
    ZEND_HASH_FOREACH_VAL(hash, z_ele) {
        zend_string *zstr = zval_get_string(z_ele);
        redis_cmd_append_sstr_key(&cmd, zstr->val, zstr->len, redis_sock, NULL);
        zend_string_release(zstr);
    } ZEND_HASH_FOREACH_END();

    /* Kick off our command */
    REDIS_PROCESS_REQUEST(redis_sock, cmd.c, cmd.len);
    IF_ATOMIC() {
        if(redis_sock_read_multibulk_reply(INTERNAL_FUNCTION_PARAM_PASSTHRU,
                                           redis_sock, NULL, NULL) < 0) {
            RETURN_FALSE;
        }
    }
    REDIS_PROCESS_RESPONSE(redis_sock_read_multibulk_reply);
}
 

若是redis_sock_get爲空會返回false,再看下redis_sock_get


PHP_REDIS_API RedisSock *
redis_sock_get(zval *id TSRMLS_DC, int no_throw)
{
    RedisSock *redis_sock;

    if ((redis_sock = redis_sock_get_instance(id TSRMLS_CC, no_throw)) == NULL) {
        return NULL;
    }

    if (redis_sock->lazy_connect) {
        redis_sock->lazy_connect = 0;
        if (redis_sock_server_open(redis_sock TSRMLS_CC) < 0) {
            return NULL;
        }
    }

    return redis_sock;
}
 

結果發現redis_sock_get_instance就是文章開始拋錯的方法,是什麼緣由致使了redis結構體存儲的socket丟失了呢,若是剛存入socket就丟失顯然是不符合邏輯的,極可能是在傳輸過程當中丟失的,咱們看下PHP_METHOD(Redis, getMultiple)中的redis_sock_read_multibulk_reply方法:


/**
 * redis_sock_read_multibulk_reply
 */
PHP_REDIS_API int redis_sock_read_multibulk_reply(INTERNAL_FUNCTION_PARAMETERS,
                                           RedisSock *redis_sock, zval *z_tab,
                                           void *ctx)
{
    char inbuf[4096];
    int numElems;
    size_t len;

    if (redis_sock_gets(redis_sock, inbuf, sizeof(inbuf) - 1, &len TSRMLS_CC) < 0) {
        return -1;
    }

    if(inbuf[0] != '*') {
        IF_NOT_ATOMIC() {
            add_next_index_bool(z_tab, 0);
        } else {
            if (inbuf[0] == '-') {
                redis_sock_set_err(redis_sock, inbuf+1, len);
            }
            RETVAL_FALSE;
        }
        return -1;
    }
    numElems = atoi(inbuf+1);
    zval zv, *z_multi_result = &zv;
#if (PHP_MAJOR_VERSION < 7)
    MAKE_STD_ZVAL(z_multi_result);
#endif
    array_init(z_multi_result); /* pre-allocate array for multi's results. */

    redis_mbulk_reply_loop(INTERNAL_FUNCTION_PARAM_PASSTHRU, redis_sock,
        z_multi_result, numElems, UNSERIALIZE_ALL);

    IF_NOT_ATOMIC() {
        add_next_index_zval(z_tab, z_multi_result);
    } else {
        RETVAL_ZVAL(z_multi_result, 0, 1);
    }
    /*zval_copy_ctor(return_value); */
    return 0;
}

再看下redis_sock_gets方法:


PHP_REDIS_API int
redis_sock_gets(RedisSock *redis_sock, char *buf, int buf_size,
                size_t *line_size TSRMLS_DC)
{
    // Handle EOF
    if(-1 == redis_check_eof(redis_sock, 0 TSRMLS_CC)) {
        return -1;
    }

    if(php_stream_get_line(redis_sock->stream, buf, buf_size, line_size)
                           == NULL)
    {
        // Close, put our socket state into error
        REDIS_STREAM_CLOSE_MARK_FAILED(redis_sock);

        // Throw a read error exception
        zend_throw_exception(redis_exception_ce, "read error on connection",
            0 TSRMLS_CC);
        return -1;
    }

    /* We don't need \r\n */
    *line_size-=2;
    buf[*line_size]='\0';

    /* Success! */
    return 0;
}
 

若是php_stream_get_line讀取stream數據爲NUll的時候就會拋出read error on connection這個錯誤。咱們在php源碼裏找到php_stream_get_line方法:


/* If buf == NULL, the buffer will be allocated automatically and will be of an
 * appropriate length to hold the line, regardless of the line length, memory
 * permitting */
PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen,
        size_t *returned_len)
{
    size_t avail = 0;
    size_t current_buf_size = 0;
    size_t total_copied = 0;
    int grow_mode = 0;
    char *bufstart = buf;

    if (buf == NULL) {
        grow_mode = 1;
    } else if (maxlen == 0) {
        return NULL;
    }

    /*
     * If the underlying stream operations block when no new data is readable,
     * we need to take extra precautions.
     *
     * If there is buffered data available, we check for a EOL. If it exists,
     * we pass the data immediately back to the caller. This saves a call
     * to the read implementation and will not block where blocking
     * is not necessary at all.
     *
     * If the stream buffer contains more data than the caller requested,
     * we can also avoid that costly step and simply return that data.
     */

    for (;;) {
        avail = stream->writepos - stream->readpos;

        if (avail > 0) {
            size_t cpysz = 0;
            char *readptr;
            const char *eol;
            int done = 0;

            readptr = (char*)stream->readbuf + stream->readpos;
            eol = php_stream_locate_eol(stream, NULL);

            if (eol) {
                cpysz = eol - readptr + 1;
                done = 1;
            } else {
                cpysz = avail;
            }

            if (grow_mode) {
                /* allow room for a NUL. If this realloc is really a realloc
                 * (ie: second time around), we get an extra byte. In most
                 * cases, with the default chunk size of 8K, we will only
                 * incur that overhead once.  When people have lines longer
                 * than 8K, we waste 1 byte per additional 8K or so.
                 * That seems acceptable to me, to avoid making this code
                 * hard to follow */
                bufstart = erealloc(bufstart, current_buf_size + cpysz + 1);
                current_buf_size += cpysz + 1;
                buf = bufstart + total_copied;
            } else {
                if (cpysz >= maxlen - 1) {
                    cpysz = maxlen - 1;
                    done = 1;
                }
            }

            memcpy(buf, readptr, cpysz);

            stream->position += cpysz;
            stream->readpos += cpysz;
            buf += cpysz;
            maxlen -= cpysz;
            total_copied += cpysz;

            if (done) {
                break;
            }
        } else if (stream->eof) {
            break;
        } else {
            /* XXX: Should be fine to always read chunk_size */
            size_t toread;

            if (grow_mode) {
                toread = stream->chunk_size;
            } else {
                toread = maxlen - 1;
                if (toread > stream->chunk_size) {
                    toread = stream->chunk_size;
                }
            }

            php_stream_fill_read_buffer(stream, toread);

            if (stream->writepos - stream->readpos == 0) {
                break;
            }
        }
    }

    if (total_copied == 0) {
        if (grow_mode) {
            assert(bufstart == NULL);
        }
        return NULL;
    }

    buf[0] = '\0';
    if (returned_len) {
        *returned_len = total_copied;
    }

    return bufstart;
}
只有bufstart=NULL的時候纔會返回NULL,bufstart=NULL說明並未在buf緩衝和stream中接收到任何數據,包括終止符,只能推測在接收過程當中網絡斷開或者其餘緣由致使的數據未收到,可是經過網絡監測未發現網絡斷開問題,最終在redis_sock_create方法中發現有redis_sock->read_timeout = read_timeout;設置,最終定位是接收數據的超時致使php_stream_get_line爲空,默認的超時時間是4秒,超時而後會調用 REDIS_STREAM_CLOSE_MARK_FAILED(redis_sock)方法,在頭文件找到

這個方法就會關閉socket連接並置空,再執行其餘命令調用redis_sock_get_instance時候,判斷soket爲空就會拋出Redis server went away的錯誤。

定位到緣由後,對php程序排查後發如今某些特定狀況下使用mget取出的數據太大,超過500M,網卡帶寬的限制下傳輸超過4秒是很正常的事情,比較好的解決方法就是調整php代碼或者在redis連接的時候設置延長read_timeout時間。咱們再看下redis_connect接收的參數

在php中redis連接的時候能夠這麼設置$redis->connect($host, $port, $timeout, $persistent_id, $read_timeout);$read_timeout單位是秒。

轉載請註明出處:https://my.oschina.net/u/554660/blog/1358156

相關文章
相關標籤/搜索