xhprof 源碼分析

XHProf 簡要概念

  • 從新封裝zend的原生方法php

  • 若是要檢測CPU的話,會有5ms的延遲,由於須要計算cpu頻率html

  • 內部使用了鏈表數組

  • 源碼地址:/root/Downloads/xhprof/extension/xhprof.capp

最重要的兩個結構體

/* Xhprof's global state.
 *
 * This structure is instantiated once.  Initialize defaults for attributes in
 * 這個結構體只初始化一次
 * hp_init_profiler_state() Cleanup/free attributes in
 * hp_clean_profiler_state() */
typedef struct hp_global_t {

  /*       ----------   Global attributes:  -----------       */

  /* Indicates if xhprof is currently enabled 是否當前可用 */
  int              enabled;

  /* Indicates if xhprof was ever enabled during this request 在本次請求過程當中是否其用過xhprof */
  int              ever_enabled;

  /* Holds all the xhprof statistics */
  zval            *stats_count;

  /* Indicates the current xhprof mode or level 當前的運行模式和等級*/
  int              profiler_level;

  /* Top of the profile stack 堆棧中的第一個*/
  hp_entry_t      *entries;

  /* freelist of hp_entry_t chunks for reuse... */
  hp_entry_t      *entry_free_list;

  /* Callbacks for various xhprof modes 表明不一樣模式的回調麼?*/
  hp_mode_cb       mode_cb;

  /*       ----------   Mode specific attributes:  -----------       */

  /* Global to track the time of the last sample in time and ticks */
  struct timeval   last_sample_time;
  uint64           last_sample_tsc;
  /* XHPROF_SAMPLING_INTERVAL in ticks */
  uint64           sampling_interval_tsc;

  /* This array is used to store cpu frequencies for all available logical
   * cpus.  For now, we assume the cpu frequencies will not change for power
   * saving or other reasons. If we need to worry about that in the future, we
   * can use a periodical timer to re-calculate this arrary every once in a
   * while (for example, every 1 or 5 seconds). 處理器的執行頻率?*/
  double *cpu_frequencies;

  /* The number of logical CPUs this machine has. 邏輯cpu的數量*/
  uint32 cpu_num;

  /* The saved cpu affinity. */
  cpu_set_t prev_mask;

  /* The cpu id current process is bound to. (default 0) 當前進程在的處理器的id*/
  uint32 cur_cpu_id;

  /* XHProf flags */
  uint32 xhprof_flags;

  /* counter table indexed by hash value of function names.  方法的調用次數的表*/
  uint8  func_hash_counters[256];

  /* Table of ignored function names and their filter 忽略統計的方法的表格*/
  char  **ignored_function_names;
  uint8   ignored_function_filter[XHPROF_IGNORED_FUNCTION_FILTER_SIZE];

} hp_global_t;
typedef struct hp_entry_t {
  char                   *name_hprof;                       /* function name 方法名稱*/
  int                     rlvl_hprof;        /* recursion level for function 方法的遞歸層級*/
  uint64                  tsc_start;         /* start value for TSC counter  開始的時鐘週期*/
  long int                mu_start_hprof;                    /* memory usage 內存使用量*/
  long int                pmu_start_hprof;              /* peak memory usage 內存使用峯值*/
  struct rusage           ru_start_hprof;             /* user/sys time start */
  struct hp_entry_t      *prev_hprof;    /* ptr to prev entry being profiled 指向上一個被分析的指針*/
  uint8                   hash_code;     /* hash_code for the function name  每一個方法名稱對應的hash*/
} hp_entry_t;

XHProf 在php中的使用

咱們先看下XHProf的使用方法dom

<?php
// 開啓xhprof監控
//xhprof_enable(XHPROF_FLAGS_CPU | XHPROF_FLAGS_MEMORY);
xhprof_enable();
// 執行函數,這個地方會調用zend_execute
test();
// 將字符串處理成命令解析 會調用zend_compile_string
eval('test();');
$data = xhprof_disable();

include './xhprof_lib/utils/xhprof_lib.php';
include './xhprof_lib/utils/xhprof_runs.php';
$xhprof_runs = new XHProfRuns_Default();
$run_id = $xhprof_runs->save_run($data,'test');
// 我這裏直接將可視化的連接地址打印了出來,方便調試
echo "<a href='http://192.168.186.136/xhprof_html/index.php?run=".$run_id."&source=test'>test</a>";


function test() {
        $a = range(0,10000);
        foreach($a as $item) {
                // pass
        }
}

執行結果以下:(能夠直接跳過結果,看下面,可是要記住有ct、wt這兩個值)ide

array(7) {
  ["test==>range"]=>
  array(2) {
    ["ct"]=>
    int(2)
    ["wt"]=>
    int(4463)
  }
  ["main()==>test"]=>
  array(2) {
    ["ct"]=>
    int(1)
    ["wt"]=>
    int(3069)
  }
  ["main()==>eval::/var/www/html/index2.php(9) : eval()'d code"]=>
  array(2) {
    ["ct"]=>
    int(1)
    ["wt"]=>
    int(16)
  }
  ["eval==>test"]=>
  array(2) {
    ["ct"]=>
    int(1)
    ["wt"]=>
    int(2614)
  }
  ["main()==>eval"]=>
  array(2) {
    ["ct"]=>
    int(1)
    ["wt"]=>
    int(2617)
  }
  ["main()==>xhprof_disable"]=>
  array(2) {
    ["ct"]=>
    int(1)
    ["wt"]=>
    int(0)
  }
  ["main()"]=>
  array(2) {
    ["ct"]=>
    int(1)
    ["wt"]=>
    int(5716)
  }
}

XHProf 源碼

xhprof_enable()

首先咱們來看xhprof_enable(),這個方法定義了要接受的三個參數,而且將這三個參數分別傳遞給兩個方法使用,其中最重要的是hp_begin()函數

/**
 * Start XHProf profiling in hierarchical mode.
 *
 * @param  long $flags  flags for hierarchical mode
 * @return void
 * @author kannan
 */
PHP_FUNCTION(xhprof_enable) {
  long  xhprof_flags = 0;                                    /* XHProf flags */
  zval *optional_array = NULL;         /* optional array arg: for future use */

  /* 
    獲取參數而且容許傳遞一個l 和z的可選參數  分別表明xhprof_flags 和 optional_array
    關於TSRMLS_CC 能夠看http://www.laruence.com/2008/08/03/201.html
    另外關於zend_parse_parameters的返回值failure 表明參數的處理是否成功
   */
  if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC,
                            "|lz", &xhprof_flags, &optional_array) == FAILURE) {
    return;
  }

  /*
    從參數中獲取須要被忽略的方法
    參照手冊參數的說明 http://php.net/manual/zh/function.xhprof-enable.php
   */
  hp_get_ignored_functions_from_arg(optional_array);


  hp_begin(XHPROF_MODE_HIERARCHICAL, xhprof_flags TSRMLS_CC);
}

hp_begin()

這個方法看起來很長,可是世界上邏輯很簡單,主要是進行了一些初始化。
下面一共進行了四次replace,用來封裝zend的方法。性能

下面這四個從新替換封裝很是重要,具體的方法做用已經在下面的代碼註釋中寫明瞭。ui

  1. zend_compile_file => hp_compile_filethis

  2. zend_compile_string => hp_compile_string

  3. zend_execute => hp_execute

  4. zend_execute_internal => hp_execute_internal

/**
 * This function gets called once when xhprof gets enabled.
 * 這個方法在enable的時候調用一次
 * It replaces all the functions like zend_execute, zend_execute_internal,
 * etc that needs to be instrumented with their corresponding proxies.
 * 他用來替換zend的一些須要被代理的方法意思就是xhprof劫持了原生方法
 * hp_begin(XHPROF_MODE_HIERARCHICAL, xhprof_flags TSRMLS_CC);
 *
 * level 等級
 * xhprof_flags 運行方式
 */
static void hp_begin(long level, long xhprof_flags TSRMLS_DC) {
  /*
    若是xhprof 沒有開啓,也就是沒有調用enable方法,那麼走這裏買的邏輯,這個是經過hp_globals來判斷的,
   */
  if (!hp_globals.enabled) {
    int hp_profile_flag = 1;

    hp_globals.enabled      = 1; /* 這裏修改了enbale狀態,保證enable在整個請求過程當中只會被第一次調用觸發 */
    hp_globals.xhprof_flags = (uint32)xhprof_flags; /* 格式化爲32位的無符號整數 */

    /*
      下面一共進行了四次replace,用來封裝zend的方法
      1. zend_compile_file => hp_compile_file
      zend_compile_file負責將要執行的腳本文件編譯成由ZE的基本指令序列構成的op codes , 而後將op codes交由zend_execute執行,從而獲得咱們腳本的結果。
      http://www.laruence.com/2008/08/14/250.html

      2. zend_compile_string => hp_compile_string
      這個是把php代碼編譯成爲opcode的過程
      http://www.phpchina.com/portal.php?mod=view&aid=40347

      3. zend_execute => hp_execute
      zend_compile_file() zend_compile_file() is the wrapper for the lexer, parser, and code generator. It compiles a file and returns a zend_op_array.
      zend_execute() After a file is compiled, its zend_op_array is executed by zend_execute(). 
      http://php.find-info.ru/php/016/ch23lev1sec2.html

      4. zend_execute_internal => hp_execute_internal
      There is also a companion zend_execute_internal() function, which executes internal functions.

     */

    /* Replace zend_compile with our proxy 先對其進行了備份_,經過加入_下劃線的方式,而後使用hp_compile_file來替換*/
    _zend_compile_file = zend_compile_file;
    zend_compile_file  = hp_compile_file;

    /* Replace zend_compile_string with our proxy */
    _zend_compile_string = zend_compile_string;
    zend_compile_string = hp_compile_string;

    /* Replace zend_execute with our proxy */
#if PHP_VERSION_ID < 50500
    _zend_execute = zend_execute;
    zend_execute  = hp_execute;
#else
    _zend_execute_ex = zend_execute_ex;
    zend_execute_ex  = hp_execute_ex;
#endif

    /* Replace zend_execute_internal with our proxy */
    _zend_execute_internal = zend_execute_internal;
    /* 
      XHPROF_FLAGS_NO_BUILTINGS 是用來標識,不須要統計內置函數性能 
      經過位運算&來判斷是否用戶傳遞的flags包含了NO_BUILTINGS

      除此以外還包含一下三種flags
      1. HPROF_FLAGS_NO_BUILTINS (integer) 使得跳過全部內置(內部)函數。
      2. XHPROF_FLAGS_CPU (integer) 使輸出的性能數據中添加 CPU 數據。
      3. XHPROF_FLAGS_MEMORY (integer) 使輸出的性能數據中添加內存數據。
    */
    if (!(hp_globals.xhprof_flags & XHPROF_FLAGS_NO_BUILTINS)) {
      /* if NO_BUILTINS is not set (i.e. user wants to profile builtins),
       * then we intercept internal (builtin) function calls.
       * 若是沒有設置的話,那麼就表明用戶想分析內置函數性能,而且咱們就會攔截內置的方法請求
       */
      zend_execute_internal = hp_execute_internal;
    }

    /* Initialize with the dummy mode first Having these dummy callbacks saves
     * us from checking if any of the callbacks are NULL everywhere. 
     * 首先來初始化一下這些方法,能夠避免在回調方法爲NULL的時候*/
    hp_globals.mode_cb.init_cb     = hp_mode_dummy_init_cb;
    hp_globals.mode_cb.exit_cb     = hp_mode_dummy_exit_cb;
    hp_globals.mode_cb.begin_fn_cb = hp_mode_dummy_beginfn_cb;
    hp_globals.mode_cb.end_fn_cb   = hp_mode_dummy_endfn_cb;

    /* Register the appropriate callback functions Override just a subset of
     * all the callbacks is OK. 根據不一樣的處理模式,簡單仍是詳細*/
    switch(level) {
      /* 通常都是使用的這個模式,因此咱們專一看這個mode */
      case XHPROF_MODE_HIERARCHICAL:
        hp_globals.mode_cb.begin_fn_cb = hp_mode_hier_beginfn_cb;
        hp_globals.mode_cb.end_fn_cb   = hp_mode_hier_endfn_cb;
        break;
      case XHPROF_MODE_SAMPLED:
        hp_globals.mode_cb.init_cb     = hp_mode_sampled_init_cb;
        hp_globals.mode_cb.begin_fn_cb = hp_mode_sampled_beginfn_cb;
        hp_globals.mode_cb.end_fn_cb   = hp_mode_sampled_endfn_cb;
        break;
    }

    /* one time initializations 初始化分析器,內部搞定了cpu頻率、initcb、可忽略的方法*/
    hp_init_profiler_state(level TSRMLS_CC);

    /* start profiling from fictitious main() */
    BEGIN_PROFILING(&hp_globals.entries, ROOT_SYMBOL, hp_profile_flag);
  }
}

hp_init_profiler_state()

/**
 * Initialize profiler state
 * 初始化分析器狀態
 *
 * 這裏最開始的時候傳遞進來的level是XHPROF_MODE_HIERARCHICAL
 * 
 * @author kannan, veeve
 */
void hp_init_profiler_state(int level TSRMLS_DC) {
  /* Setup globals */
  if (!hp_globals.ever_enabled) {
    /* 若是以前沒有開啓過xhprof,那麼將這個值初始化爲1,如今就算開啓了 */
    hp_globals.ever_enabled  = 1;
    /* 堆棧的第一個設置空 */
    hp_globals.entries = NULL;
  }
  /* 分析器的等級 */
  hp_globals.profiler_level  = (int) level;

  /* Init stats_count 初始化統計數量 */
  if (hp_globals.stats_count) {
    /* 釋放這個內存 */
    zval_dtor(hp_globals.stats_count);
    /* 通知垃圾回收機制來回收這個內存 */
    FREE_ZVAL(hp_globals.stats_count);
  }
  /* 建立一個zval變量,而且初始化爲數組 參考 http://www.cunmou.com/phpbook/8.3.md */
  MAKE_STD_ZVAL(hp_globals.stats_count);
  array_init(hp_globals.stats_count);

  /* NOTE(cjiang): some fields such as cpu_frequencies take relatively longer
   * to initialize, (5 milisecond per logical cpu right now), therefore we
   * calculate them lazily. 一些字段初始化起來要花費很是長的時間,那麼咱們要懶計算,就是放到後面計算*/
  if (hp_globals.cpu_frequencies == NULL) {
    get_all_cpu_frequencies();
    restore_cpu_affinity(&hp_globals.prev_mask);
  }

  /* bind to a random cpu so that we can use rdtsc instruction. 這裏居然是隨機綁定一個cpu*/
  bind_to_cpu((int) (rand() % hp_globals.cpu_num));

  /* Call current mode's init cb  根據不一樣的模式,調用初始方法,看line:1933*/
  hp_globals.mode_cb.init_cb(TSRMLS_C);

  /* Set up filter of functions which may be ignored during profiling 設置被過濾的方法*/
  hp_ignored_functions_filter_init();
}

get_cpu_frequency()

在上面的方法中調用了一個get_all_cpu_frequencies(),這個方法內部調用了一個get_cpu_frequency頗有意思,由於這個方法將致使若是開啓CPU的檢測,那麼會有5ms的延遲

/**
 * This is a microbenchmark to get cpu frequency the process is running on. The
 * returned value is used to convert TSC counter values to microseconds.
 *
 * @return double.
 * @author cjiang
 */
static double get_cpu_frequency() {
  struct timeval start;
  struct timeval end;

  /* gettimeofday 獲取當前的時間,而且放到start中 */
  if (gettimeofday(&start, 0)) {
    perror("gettimeofday");
    return 0.0;
  }
  uint64 tsc_start = cycle_timer();
  /* Sleep for 5 miliseconds. Comparaing with gettimeofday's  few microseconds
   * execution time, this should be enough. 
   * 這個是爲了獲取CPU的執行頻率,用5000微秒的時間中cpu的執行次數,來獲得每秒cpu能執行的頻率
   * TSC 自從啓動CPU開始記錄的時鐘週期
   * */
  usleep(5000);
  if (gettimeofday(&end, 0)) {
    perror("gettimeofday");
    return 0.0;
  }
  uint64 tsc_end = cycle_timer();
  /* 時鐘週期的數量除以微秒時間間隔的數量獲得cpu頻率 */
  return (tsc_end - tsc_start) * 1.0 / (get_us_interval(&start, &end));
}

BEGIN_PROFILING 重要!

這個就是分析的邏輯,他的要點在於生成了一個單項鍊表。

/*
 * Start profiling - called just before calling the actual function
 * 開始分析,只在正式方法調用以前要調用
 * NOTE:  PLEASE MAKE SURE TSRMLS_CC IS AVAILABLE IN THE CONTEXT
 *        OF THE FUNCTION WHERE THIS MACRO IS CALLED.
 *        TSRMLS_CC CAN BE MADE AVAILABLE VIA TSRMLS_DC IN THE
 *        CALLING FUNCTION OR BY CALLING TSRMLS_FETCH()
 *        TSRMLS_FETCH() IS RELATIVELY EXPENSIVE.
 * entries 這裏傳遞進來的是hp_entry_t的一個指向指針的地址
 * 這個地方實際上生成的是一個單鏈表,都是用prev_hprof 來進行關聯
 *
 * 這裏do while(0) 是用來封裝宏的
 * 
 */
#define BEGIN_PROFILING(entries, symbol, profile_curr)                  \
  do {                                                                  \
    /* Use a hash code to filter most of the string comparisons. */     \
    uint8 hash_code  = hp_inline_hash(symbol);                          \
    /* 判斷這個方法是不是須要忽略的方法,若是不是須要被忽略的,那麼進行分析 */     \
    profile_curr = !hp_ignore_entry(hash_code, symbol);                 \
    if (profile_curr) {                                                 \
      /* 返回一個指針(地址),開闢了一個內存空間給cur_entry,包括了hash_code、方法名稱、堆棧指針 */     \
      hp_entry_t *cur_entry = hp_fast_alloc_hprof_entry();              \
      (cur_entry)->hash_code = hash_code;                               \
      (cur_entry)->name_hprof = symbol;                                 \
      /* 這裏的*entries 指向的是指針hp_global_t.entires 堆棧的首地址  */     \
      (cur_entry)->prev_hprof = (*(entries));                           \
      /* Call the universal callback*/                                 \
      hp_mode_common_beginfn((entries), (cur_entry) TSRMLS_CC);         \
      /* Call the mode's beginfn callback 這個方法除卻cpu和mem 只是設置了tsc_Start */                            \
      hp_globals.mode_cb.begin_fn_cb((entries), (cur_entry) TSRMLS_CC); \
      /* Update entries linked list */                                  \
      (*(entries)) = (cur_entry);                                       \
    }                                                                   \
  } while (0)

咱們能夠看上面的鏈表在生成的過程當中,調用了 hp_globals.mode_cb.begin_fn_cb方法。咱們這裏不考慮CPU和內存,那麼發現給每隔current設置了一個tsc的起始時鐘週期。

/**
 * XHPROF_MODE_HIERARCHICAL's begin function callback
 *
 * @author kannan
 */
void hp_mode_hier_beginfn_cb(hp_entry_t **entries,
                             hp_entry_t  *current  TSRMLS_DC) {
  /* Get start tsc counter */
  current->tsc_start = cycle_timer();

  /* Get CPU usage 若是要計算cpu的話*/
  if (hp_globals.xhprof_flags & XHPROF_FLAGS_CPU) {
    getrusage(RUSAGE_SELF, &(current->ru_start_hprof));
  }

  /* Get memory usage 若是要計算內存的話*/
  if (hp_globals.xhprof_flags & XHPROF_FLAGS_MEMORY) {
    current->mu_start_hprof  = zend_memory_usage(0 TSRMLS_CC);
    current->pmu_start_hprof = zend_memory_peak_usage(0 TSRMLS_CC);
  }
}

hp_execute 代碼執行部分

每次有代碼執行的時候,都會走這個地方,這段代碼主要是在執行zend_execute的先後,粉分別調用了BEGIN_PROFILINGEND_PROFILING

#if PHP_VERSION_ID < 50500
ZEND_DLEXPORT void hp_execute (zend_op_array *ops TSRMLS_DC) {
#else
ZEND_DLEXPORT void hp_execute_ex (zend_execute_data *execute_data TSRMLS_DC) {
  zend_op_array *ops = execute_data->op_array;
#endif
  char          *func = NULL;
  int hp_profile_flag = 1;

  func = hp_get_function_name(ops TSRMLS_CC);
  if (!func) {
#if PHP_VERSION_ID < 50500
    _zend_execute(ops TSRMLS_CC);
#else
    _zend_execute_ex(execute_data TSRMLS_CC);
#endif
    return;
  }

  BEGIN_PROFILING(&hp_globals.entries, func, hp_profile_flag);
#if PHP_VERSION_ID < 50500
  _zend_execute(ops TSRMLS_CC);
#else
  _zend_execute_ex(execute_data TSRMLS_CC);
#endif
  if (hp_globals.entries) {
    END_PROFILING(&hp_globals.entries, hp_profile_flag);
  }
  efree(func);
}

END_PROFILING

hp_globals.mode_cb.end_fn_cb((entries) TSRMLS_CC);

這段代碼最終指向了hp_mode_hier_endfn_cb,這段代碼中主要構成了一個'==>'數據格式,而且計算了每一個方法的調用次數。

void hp_mode_hier_endfn_cb(hp_entry_t **entries  TSRMLS_DC) {
  /* 整個堆棧的最後一個調用 */
  hp_entry_t   *top = (*entries);
  zval            *counts;
  struct rusage    ru_end;
  char             symbol[SCRATCH_BUF_LEN];
  long int         mu_end;
  long int         pmu_end;

  /* Get the stat array */
  hp_get_function_stack(top, 2, symbol, sizeof(symbol));
  if (!(counts = hp_mode_shared_endfn_cb(top,
                                         symbol  TSRMLS_CC))) {
    return;
  }

  if (hp_globals.xhprof_flags & XHPROF_FLAGS_CPU) {
    /* Get CPU usage */
    getrusage(RUSAGE_SELF, &ru_end);

    /* Bump CPU stats in the counts hashtable */
    hp_inc_count(counts, "cpu", (get_us_interval(&(top->ru_start_hprof.ru_utime),
                                              &(ru_end.ru_utime)) +
                              get_us_interval(&(top->ru_start_hprof.ru_stime),
                                              &(ru_end.ru_stime)))
              TSRMLS_CC);
  }

  if (hp_globals.xhprof_flags & XHPROF_FLAGS_MEMORY) {
    /* Get Memory usage */
    mu_end  = zend_memory_usage(0 TSRMLS_CC);
    pmu_end = zend_memory_peak_usage(0 TSRMLS_CC);

    /* Bump Memory stats in the counts hashtable */
    hp_inc_count(counts, "mu",  mu_end - top->mu_start_hprof    TSRMLS_CC);
    hp_inc_count(counts, "pmu", pmu_end - top->pmu_start_hprof  TSRMLS_CC);
  }
}

hp_mode_shared_endfn_cb

這個方法統計了調用次數和消耗時間,實際上最終全部的數據都存儲在hp_entry_t所構造的鏈表中

zval * hp_mode_shared_endfn_cb(hp_entry_t *top,
                               char          *symbol  TSRMLS_DC) {
  zval    *counts;
  uint64   tsc_end;

  /* Get end tsc counter */
  tsc_end = cycle_timer();

  /* Get the stat array */
  if (!(counts = hp_hash_lookup(symbol TSRMLS_CC))) {
    return (zval *) 0;
  }

  /* Bump stats in the counts hashtable */
  hp_inc_count(counts, "ct", 1  TSRMLS_CC);

  hp_inc_count(counts, "wt", get_us_from_tsc(tsc_end - top->tsc_start,
        hp_globals.cpu_frequencies[hp_globals.cur_cpu_id]) TSRMLS_CC);
  return counts;
}
相關文章
相關標籤/搜索