mysql優化器在統計全表掃描的代價時的方法

innodb 的彙集索引 的葉子結點 存放的 是 索引值以及數據頁的偏移量mysql

那麼在計算全表掃描的代價是怎麼計算的呢?sql

咱們知道代價 爲 cpu代價+io代價app

cpu代價 就是 每5條記錄比對 計算一個代價 (這裏的記錄並非咱們數據記錄,而是索引記錄) 是數據記錄個數函數

又是如何取出全表的總記錄呢 (即全表的總索引記錄)ui

具體方法是 經過索引能拿到葉子結點的page數,page頁默認16K ,那麼總容量爲 leaf page num * 16kthis

再計算這個索引的長度,由於索引多是由多個字段構成,所以要遍歷,假設爲 mspa

 total_records = leaf page num * 16k /m 就是 索引記錄個數了, 一條聚焦索引記錄對應一條數據記錄,因此這裏是總的記錄數code

仍是有問題 這個leaf page是數據頁,而m是主鍵的長度,上面的total_records計算出來的結果 並非準確的記錄個數,按理說m爲一條記錄的長度,但代碼裏是主鍵的長度orm

那麼cpu cost 就是 total_records/5+1blog

  io cost 就是  (double) (prebuilt->table->stat_clustered_index_size(聚簇索引葉頁面數);

/******************************************************************//**
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes.
@return    estimated time measured in disk seeks */
UNIV_INTERN
double
ha_innobase::read_time(
/*===================*/
    uint    index,    /*!< in: key number */
    uint    ranges,    /*!< in: how many ranges */
    ha_rows rows)    /*!< in: estimated number of rows in the ranges */
{
    ha_rows total_rows;
    double    time_for_scan;

    if (index != table->s->primary_key) {
        /* Not clustered */
        return(handler::read_time(index, ranges, rows));
    }

    if (rows <= 2) {

        return((double) rows);
    }

    /* Assume that the read time is proportional to the scan time for all
    rows + at most one seek per range. */

    time_for_scan = scan_time();
    
    //estimate_rows_upper_bound這裏就是計算全表總記錄的函數
    if ((total_rows = estimate_rows_upper_bound()) < rows) {

        return(time_for_scan);
    }

    return(ranges + (double) rows / (double) total_rows * time_for_scan);
}

/*********************************************************************//**
Gives an UPPER BOUND to the number of rows in a table. This is used in
filesort.cc.
@return    upper bound of rows */
UNIV_INTERN
ha_rows
ha_innobase::estimate_rows_upper_bound(void)
/*======================================*/
{
    dict_index_t*    index;
    ulonglong    estimate;
    ulonglong    local_data_file_length;
    ulint        stat_n_leaf_pages;


    //取得該表的第一個索引,就是彙集索引
    index = dict_table_get_first_index(prebuilt->table);
    
    //聚焦索引的葉子結點個數
    stat_n_leaf_pages = index->stat_n_leaf_pages;

    //大小爲 葉子結點個數*16k
    local_data_file_length = ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;


    /* Calculate a minimum length for a clustered index record and from
    that an upper bound for the number of rows. Since we only calculate
    new statistics in row0mysql.c when a table has grown by a threshold
    factor, we must add a safety factor 2 in front of the formula below. */
    
    //計算這個彙集索引的大小
    // 2* 總葉子個數*16K / 聚焦索引大小 獲得彙集索引記錄個數
    estimate = 2 * local_data_file_length /
                     dict_index_calc_min_rec_len(index);

    DBUG_RETURN((ha_rows) estimate);
}

/*********************************************************************//**
Calculates the minimum record length in an index. */
UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
    const dict_index_t*    index)    /*!< in: index */
{
    ulint    sum    = 0;
    ulint    i;

    //記錄爲compack 緊湊模式,由於有可能這個索引是由多個字段組成,要遍歷,求出總字節數
    ulint    comp    = dict_table_is_comp(index->table);

    if (comp) {
        ulint nullable = 0;
        sum = REC_N_NEW_EXTRA_BYTES;
        for (i = 0; i < dict_index_get_n_fields(index); i++) {
            const dict_col_t*    col
                = dict_index_get_nth_col(index, i);
            ulint    size = dict_col_get_fixed_size(col, comp);
            sum += size;
            if (!size) {
                size = col->len;
                sum += size < 128 ? 1 : 2;
            }
            if (!(col->prtype & DATA_NOT_NULL)) {
                nullable++;
            }
        }

        /* round the NULL flags up to full bytes */
        sum += UT_BITS_IN_BYTES(nullable);

        return(sum);
    }
}

 

結構體dict_index_t

/** InnoDB B-tree index */
typedef struct dict_index_struct dict_index_t;

/** Data structure for an index.  Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
struct dict_index_struct{
    index_id_t    id;    /*!< id of the index */
    mem_heap_t*    heap;    /*!< memory heap */
    const char*    name;    /*!< index name */
    const char*    table_name;/*!< table name */
    dict_table_t*    table;    /*!< back pointer to table */ //
#ifndef UNIV_HOTBACKUP
    unsigned    space:32;
                /*!< space where the index tree is placed */
    unsigned    page:32;/*!< index tree root page number */
#endif /* !UNIV_HOTBACKUP */
    unsigned    type:DICT_IT_BITS;
                /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
                DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
#define MAX_KEY_LENGTH_BITS 12
    unsigned    trx_id_offset:MAX_KEY_LENGTH_BITS;
                /*!< position of the trx id column
                in a clustered index record, if the fields
                before it are known to be of a fixed size,
                0 otherwise */
#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
#endif
    unsigned    n_user_defined_cols:10;
                /*!< number of columns the user defined to
                be in the index: in the internal
                representation we add more columns */
    unsigned    n_uniq:10;/*!< number of fields from the beginning
                which are enough to determine an index
                entry uniquely */
    unsigned    n_def:10;/*!< number of fields defined so far */
    unsigned    n_fields:10;/*!< number of fields in the index */
    unsigned    n_nullable:10;/*!< number of nullable fields */
    unsigned    cached:1;/*!< TRUE if the index object is in the
                dictionary cache */
    unsigned    to_be_dropped:1;
                /*!< TRUE if this index is marked to be
                dropped in ha_innobase::prepare_drop_index(),
                otherwise FALSE. Protected by
                dict_sys->mutex, dict_operation_lock and
                index->lock.*/
    dict_field_t*    fields;    /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP
    UT_LIST_NODE_T(dict_index_t)
            indexes;/*!< list of indexes of the table */
    btr_search_t*    search_info; /*!< info used in optimistic searches */
    /*----------------------*/
    /** Statistics for query optimization */
    /* @{ */
    ib_int64_t*    stat_n_diff_key_vals;
                /*!< approximate number of different
                key values for this index, for each
                n-column prefix where n <=
                dict_get_n_unique(index); we
                periodically calculate new
                estimates */
    ib_int64_t*    stat_n_non_null_key_vals;
                /* approximate number of non-null key values
                for this index, for each column where
                n < dict_get_n_unique(index); This
                is used when innodb_stats_method is
                "nulls_ignored". */ ulint stat_index_size; /*!< approximate index size in database pages */
    ulint stat_n_leaf_pages;  /*!< approximate number of leaf pages in the index tree */
    /* @} */
    rw_lock_t    lock;    /*!< read-write lock protecting the
                upper levels of the index tree */
    trx_id_t    trx_id; /*!< id of the transaction that created this
                index, or 0 if the index existed
                when InnoDB was started up */
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_BLOB_DEBUG
    mutex_t        blobs_mutex;
                /*!< mutex protecting blobs */
    void*        blobs;    /*!< map of (page_no,heap_no,field_no)
                to first_blob_page_no; protected by
                blobs_mutex; @see btr_blob_dbg_t */
#endif /* UNIV_BLOB_DEBUG */
#ifdef UNIV_DEBUG
    ulint        magic_n;/*!< magic number */
/** Value of dict_index_struct::magic_n */
# define DICT_INDEX_MAGIC_N    76789786
#endif
};
相關文章
相關標籤/搜索