Postgresql_根據執行計劃優化SQL

時間 2019-11-19

標籤 postgresql 根據執行計劃優化 sql 欄目 Postgre SQL 简体版

原文原文鏈接

執行計劃路徑選擇

postgresql查詢規劃過程當中，查詢請求的不一樣執行方案是經過創建不一樣的路徑來表達的，在生成許多符合條件的路徑以後，要從中選擇出代價最小的路徑，把它轉化爲一個計劃，傳遞給執行器執行，規劃器的核心工做就是生成多條路徑，而後從中找出最優的那一條。html

代價評估

評估路徑優劣的依據是用系統表pg_statistic中的統計信息估算出來的不一樣路徑的代價(cost)，PostgreSQL估計計劃成本的方式：基於統計信息估計計劃中各個節點的成本。PostgreSQL會分析各個表來獲取一個統計信息樣本（這個操做一般是由autovacuum這個守護進程週期性的執行analyze，來收集這些統計信息，而後保存到pg_statistic和pg_class裏面）。git

用於估算代價的參數postgresql.conf

# - Planner Cost Constants -

#seq_page_cost = 1.0   # measured on an arbitrary scale 順序磁盤掃描時單個頁面的開銷 #random_page_cost = 4.0   # same scale as above  隨機磁盤訪問時單頁面的讀取開銷 #cpu_tuple_cost = 0.01   # same scale as above cpu處理每一行的開銷 #cpu_index_tuple_cost = 0.005  # same scale as above cpu處理每一個索引行的開銷 #cpu_operator_cost = 0.0025  # same scale as above cpu處理每一個運算符或者函數調用的開銷 #parallel_tuple_cost = 0.1  # same scale as above 計算並行處理的成本，若是成本高於非並行，則不會開啓並行處理。 #parallel_setup_cost = 1000.0 # same scale as above #min_parallel_relation_size = 8MB #effective_cache_size = 4GB 再一次索引掃描中可用的文件系統內核緩衝區有效大小  也可使用 show all的方式查看

路徑的選擇

--查看錶信息
db_jcxxglpt=# \d t_jcxxgl_tjaj
               Table "db_jcxx.t_jcxxgl_tjaj"   Column   |             Type             | Modifiers --------------+--------------------------------+----------- c_bh         | character(32)                 | not null c_xzdm       | character varying(300)         | c_jgid       | character(32)                 | c_ajbm       | character(22)                 | ... Indexes:   "t_jcxxgl_tjaj_pkey" PRIMARY KEY, btree (c_bh)   "idx_ttjaj_cah" btree (c_ah)   "idx_ttjaj_dslrq" btree (d_slrq)  首先更新統計信息vacuum analyze t_jcxxgl_tjaj，許多時候可能由於統計信息的不許確致使了不正常的執行計劃 --執行計劃，全表掃描 db_jcxxglpt=# explain (analyze,verbose,costs,buffers,timing)select c_bh,c_xzdm,c_jgid,c_ajbm from t_jcxxgl_tjaj where d_slrq >='2018-03-18';                                                   QUERY PLAN                                               ------------------------------------------------------------------------------------------------------------ Seq Scan on db_jcxx.t_jcxxgl_tjaj (cost=0.00..9.76 rows=3 width=96) (actual time=1.031..1.055 rows=3 loops =1)   Output: c_bh, c_xzdm, c_jgid, c_ajbm   Filter: (t_jcxxgl_tjaj.d_slrq >= '2018-03-18'::date)   Rows Removed by Filter: 138   Buffers: shared hit=8 Planning time: 6.579 ms Execution time: 1.163 ms (7 rows) --執行計劃，關閉全表掃描 db_jcxxglpt=# set session enable_seqscan = off; SET db_jcxxglpt=# explain (analyze,verbose,costs,buffers,timing)select c_bh,c_xzdm,c_jgid,c_ajbm from t_jcxxgl_tjaj where d_slrq >='2018-03-18';                                                               QUERY PLAN                                                               ------------------------------------------------------------------------------------------------------------ Index Scan using idx_ttjaj_dslrq on db_jcxx.t_jcxxgl_tjaj (cost=0.14..13.90 rows=3 width=96) (actual time=0.012..0.026 rows=3 loops=1)   Output: c_bh, c_xzdm, c_jgid, c_ajbm   Index Cond: (t_jcxxgl_tjaj.d_slrq >= '2018-03-18'::date)   Buffers: shared hit=4 Planning time: 0.309 ms Execution time: 0.063 ms (6 rows)  d_slrq上面有btree索引，可是查看執行計劃並無走索引，這是爲何呢？ 代價計算： 一個路徑的估算由三部分組成：啓動代價(startup cost)，總代價(totalcost)，執行結果的排序方式(pathkeys) 代價估算公式：總代價=啓動代價+I/O代價+CPU代價（cost=S+P+W*T） P:執行時要訪問的頁面數，反應磁盤的I/O次數 T:表示在執行時所要訪問的元組數，反映了cpu開銷 W:表示磁盤I/O代價和CPU開銷建的權重因子 統計信息：統計信息的其中一部分是每一個表和索引中項的總數，以及每一個表和索引佔用的磁盤塊數。這些信息保存在pg_class表的reltuples和relpages列中。咱們能夠這樣查詢相關信息：  --查看統計信息 db_jcxxglpt=# select relpages,reltuples from pg_class where relname ='t_jcxxgl_tjaj'; relpages | reltuples ----------+-----------       8 |       141 (1 row)  total_cost = 1(seq_page_cost)*8(磁盤總頁數)+0.01(cpu_tuple_cost)*141(表的總記錄數)+0.0025(cpu_operation_cost)*141(表的總記錄數)=9.7625  能夠看到走索引的cost=13.90比全表掃描cost=9.76要大。在表較小的狀況下，全表掃描比索引掃描更有效， index scan 至少要發生兩次I/O，一次是讀取索引塊，一次是讀取數據塊。

seq_scan源碼

/*
 * cost_seqscan * Determines and returns the cost of scanning a relation sequentially. * * 'baserel' is the relation to be scanned * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL */ void cost_seqscan(Path *path, PlannerInfo *root,    RelOptInfo *baserel, ParamPathInfo *param_info) {  Cost  startup_cost = 0;  Cost  cpu_run_cost;  Cost  disk_run_cost;  double  spc_seq_page_cost;  QualCost qpqual_cost;  Cost  cpu_per_tuple;   /* Should only be applied to base relations */  Assert(baserel->relid > 0);  Assert(baserel->rtekind == RTE_RELATION);   /* Mark the path with the correct row estimate */  if (param_info)   path->rows = param_info->ppi_rows;  else   path->rows = baserel->rows;   if (!enable_seqscan)   startup_cost += disable_cost;   /* fetch estimated page cost for tablespace containing table */  get_tablespace_page_costs(baserel->reltablespace, NULL,&spc_seq_page_cost);   /*  * disk costs  */  disk_run_cost = spc_seq_page_cost * baserel->pages;   /* CPU costs */  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);   startup_cost += qpqual_cost.startup;  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;  cpu_run_cost = cpu_per_tuple * baserel->tuples;  /* tlist eval costs are paid per output row, not per tuple scanned */  startup_cost += path->pathtarget->cost.startup;  cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;   /* Adjust costing for parallelism, if used. */  if (path->parallel_workers > 0)  {   double  parallel_divisor = get_parallel_divisor(path);    /* The CPU cost is divided among all the workers. */   cpu_run_cost /= parallel_divisor;    /*   * It may be possible to amortize some of the I/O cost, but probably   * not very much, because most operating systems already do aggressive   * prefetching. For now, we assume that the disk run cost can't be   * amortized at all.   */    /*   * In the case of a parallel plan, the row count needs to represent   * the number of tuples processed per worker.   */   path->rows = clamp_row_est(path->rows / parallel_divisor);  }   path->startup_cost = startup_cost;  path->total_cost = startup_cost + cpu_run_cost + disk_run_cost; }

一個SQL優化實例

慢SQL：
select c_ajbh, c_ah, c_cbfy, c_cbrxm, d_larq, d_jarq, n_dbjg, c_yqly from db_zxzhld.t_zhld_db dbxx join db_zxzhld.t_zhld_ajdbxx dbaj  on dbxx.c_bh = dbaj.c_dbbh where dbxx.n_valid=1 and dbxx.n_state in (1,2,3) and dbxx.c_dbztbh='1003'  and dbaj.c_zblx='1003' and dbaj.c_dbfy='0' and dbaj.c_gy = '2550'  and c_ajbh in (select distinct c_ajbh from db_zxzhld.t_zhld_zbajxx where n_dbzt = 1 and c_zblx = '1003' and c_gy = '2550' )  order by d_larq asc, c_ajbh asc limit 15 offset 0 慢sql耗時：7s 咋們先過下這個sql是幹什麼的、首先dbxx和dbaj的一個join鏈接而後dbaj.c_ajbh要包含在zbaj表裏面，作了個排序，取了15條記錄、大概就這樣。 Sql有個缺點就是我不知道查詢的字段是從那個表裏面取的、建議加上表別名.字段。 查看該sql的表的數據量： db_zxzhld.t_zhld_db ：1311 db_zxzhld.t_zhld_ajdbxx ：341296 db_zxzhld.t_zhld_zbajxx ：1027619   執行計劃： 01 Limit (cost=36328.67..36328.68 rows=1 width=107) (actual time=88957.677..88957.729 rows=15 loops=1) 02   -> Sort (cost=36328.67..36328.68 rows=1 width=107) (actual time=88957.653..88957.672 rows=15 loops=1) 03         Sort Key: dbaj.d_larq, dbaj.c_ajbh 04         Sort Method: top-N heapsort Memory: 27kB 05         -> Nested Loop Semi Join (cost=17099.76..36328.66 rows=1 width=107) (actual time=277.794..88932.662 rows=8605 loops=1) 06               Join Filter: ((dbaj.c_ajbh)::text = (t_zhld_zbajxx.c_ajbh)::text) 07               Rows Removed by Join Filter: 37018710 08               -> Nested Loop (cost=0.00..19200.59 rows=1 width=107) (actual time=199.141..601.845 rows=8605 loops=1) 09                     Join Filter: (dbxx.c_bh = dbaj.c_dbbh) 10                     Rows Removed by Join Filter: 111865 11                     -> Seq Scan on t_zhld_ajdbxx dbaj (cost=0.00..19117.70 rows=219 width=140) (actual time=198.871..266.182 rows=8605 loops=1) 12                           Filter: ((n_valid = 1) AND ((c_zblx)::text = '1003'::text) AND ((c_dbfy)::text = '0'::text) AND ((c_gy)::text = '2550'::text)) 13                           Rows Removed by Filter: 332691 14                     -> Materialize (cost=0.00..66.48 rows=5 width=33) (actual time=0.001..0.017 rows=14 loops=8605) 15                           -> Seq Scan on t_zhld_db dbxx (cost=0.00..66.45 rows=5 width=33) (actual time=0.044..0.722 rows=14 loops=1) 16                                 Filter: ((n_valid = 1) AND ((c_dbztbh)::text = '1003'::text) AND (n_state = ANY ('{1,2,3}'::integer[]))) 17                                 Rows Removed by Filter: 1297 18               -> Materialize (cost=17099.76..17117.46 rows=708 width=32) (actual time=0.006..4.890 rows=4303 loops=8605) 19                     -> HashAggregate (cost=17099.76..17106.84 rows=708 width=32) (actual time=44.011..54.924 rows=8605 loops=1) 20                           Group Key: t_zhld_zbajxx.c_ajbh 21                           -> Bitmap Heap Scan on t_zhld_zbajxx (cost=163.36..17097.99 rows=708 width=32) (actual time=5.218..30.278 rows=8605 loops=1) 22                                 Recheck Cond: ((n_dbzt = 1) AND ((c_zblx)::text = '1003'::text)) 23                                 Filter: ((c_gy)::text = '2550'::text) 24                                 Rows Removed by Filter: 21849 25                                 Heap Blocks: exact=960 26                                 -> Bitmap Index Scan on i_tzhldzbajxx_zblx_dbzt (cost=0.00..163.19 rows=5876 width=0) (actual time=5.011..5.011 rows=30458 loops=1) 27                                       Index Cond: ((n_dbzt = 1) AND ((c_zblx)::text = '1003'::text)) 28 Planning time: 1.258 ms 29 Execution time: 88958.029 ms 執行計劃解讀： 1：第27->21行，經過索引i_tzhldzbajxx_zblx_dbzt過濾表t_zhld_zbajxx的數據，而後根據過濾條件(c_gy)::text = '2550'::text過濾最終返回8605條數據 2：第17->15行，根據條件過濾t_zhld_db表的數據，最終返回了14條數據 3：第20->19行，對錶t_zhld_zbajxx作group by的操做 4：第13->11行，全表掃描t_zhld_ajdbxx 最終返回了8605條數據 5：第08行，根據t_zhld_ajdbxx返回的8605條結果集做爲驅動表和t_zhld_db的結果集(14條)作嵌套循環，t_zhld_db的結果集被循環了8605次。而後過濾掉了其中的111865條記錄，那麼最終將獲得（8605*14-111865） = 8605 6：第07->05行，根據第08和18行返回的結果集最終作了Nested Loop Semi Join，第18行的4303條結果集被循環了8605次,(4303*8605-37018710)=8605 7: 第04->02行，對最終的8605條記錄進行排序 8：第01行，limit最終獲取15條記錄  整個執行計劃中耗時最長的地方在05行Nested Loop Semi Join，actual time=277.794..88932.662， 表db_zxzhld.t_zhld_db dbxx和db_zxzhld.t_zhld_ajdbxx均是全表掃描

具體優化步驟

查看索引頁並無索引，建立c_ajbh，c_dbbh等邏輯外鍵的索引
drop index  if exists I_T_ZHLD_AJDBXX_AJBH;
create index I_T_ZHLD_AJDBXX_AJBH on T_ZHLD_AJDBXX (c_ajbh); commit; drop index if exists I_T_ZHLD_AJDBXX_DBBH; create index I_T_ZHLD_AJDBXX_DBBH on T_ZHLD_AJDBXX (c_dbbh); commit; 建立d_larq,c_ajbh的排序索引： drop index if exists I_T_ZHLD_AJDBXX_m6; create index I_T_ZHLD_AJDBXX_m6 on T_ZHLD_AJDBXX (c_zblx,c_dbfy,c_gy,d_larq asc,c_ajbh asc); commit; drop index   if exists I_T_ZHLD_ZBAJXX_h3 ; create index I_T_ZHLD_ZBAJXX_h3 on db_zxzhld.t_zhld_zbajxx (n_dbzt,c_zblx,c_gy,c_gy); commit;  建立索引後執行計劃有了改變，原來的dbaj表和dbxx表先作nestedloop變成了zbaj和dbaj表先作了nestedloop join,總的cost也從36328.68降到了12802.87， Limit (cost=12802.87..12802.87 rows=1 width=107) (actual time=4263.598..4263.648 rows=15 loops=1) -> Sort (cost=12802.87..12802.87 rows=1 width=107) (actual time=4263.592..4263.609 rows=15 loops=1)       Sort Key: dbaj.d_larq, dbaj.c_ajbh       Sort Method: top-N heapsort Memory: 27kB       -> Nested Loop (cost=2516.05..12802.86 rows=1 width=107) (actual time=74.240..4239.723 rows=8605 loops=1)             Join Filter: (dbaj.c_dbbh = dbxx.c_bh)             Rows Removed by Join Filter: 111865             -> Nested Loop (cost=2516.05..12736.34 rows=1 width=140) (actual time=74.083..327.974 rows=8605 loops=1)                   -> HashAggregate (cost=2515.62..2522.76 rows=714 width=32) (actual time=74.025..90.185 rows=8605 loops=1)                         Group Key: ("ANY_subquery".c_ajbh)::text                         -> Subquery Scan on "ANY_subquery" (cost=2499.56..2513.84 rows=714 width=32) (actual time=28.782..59.823 rows=8605 loops=1)                               -> HashAggregate (cost=2499.56..2506.70 rows=714 width=32) (actual time=28.778..39.968 rows=8605 loops=1)                                     Group Key: zbaj.c_ajbh                                     -> Index Scan using i_t_zhld_zbajxx_h3 on t_zhld_zbajxx zbaj (cost=0.42..2497.77 rows=715 width=32) (actual time=0.062..15.104 rows=8605 loops=1)                                           Index Cond: ((n_dbzt = 1) AND ((c_zblx)::text = '1003'::text) AND ((c_gy)::text = '2550'::text))                   -> Index Scan using i_t_zhld_ajdbxx_ajbh on t_zhld_ajdbxx dbaj (cost=0.42..14.29 rows=1 width=140) (actual time=0.015..0.021 rows=1 loops=8605)                         Index Cond: ((c_ajbh)::text = ("ANY_subquery".c_ajbh)::text)                         Filter: (((c_zblx)::text = '1003'::text) AND ((c_dbfy)::text = '0'::text) AND ((c_gy)::text = '2550'::text))                         Rows Removed by Filter: 1             -> Seq Scan on t_zhld_db dbxx (cost=0.00..66.45 rows=5 width=33) (actual time=0.015..0.430 rows=14 loops=8605)                   Filter: ((n_valid = 1) AND ((c_dbztbh)::text = '1003'::text) AND (n_state = ANY ('{1,2,3}'::integer[])))                   Rows Removed by Filter: 1298 Planning time: 1.075 ms Execution time: 4263.803 ms

執行的時間仍是要4s左右仍然不知足需求，而且沒有使用上I_T_ZHLD_AJDBXX_m6這個索引。github

等價改寫SQL(1)

等價改寫：將排序條件加入db_zxzhld.t_zhld_ajdbxx讓其先排序，再和t_zhld_db錶鏈接。
修改後sql：
Select dbaj.c_ajbh, dbaj.c_ah, dbaj.c_cbfy, dbaj.c_cbrxm, dbaj.d_larq, dbaj.d_jarq, dbaj.n_dbjg, dbaj.c_yqly from (select * from db_zxzhld.t_zhld_db where n_valid=1 and n_state in (1,2,3) and c_dbztbh='1003' )dbxx join (select * from db_zxzhld.t_zhld_ajdbxx where n_valid=1 and c_zblx='1003' and c_dbfy='0' and c_gy = '2550' and c_ajbh in (select distinct c_ajbh from db_zxzhld.t_zhld_zbajxx where n_dbzt = 1 and c_zblx = '1003' and c_gy = '2550' ) order by d_larq asc, c_ajbh asc)dbaj on dbxx.c_bh = dbaj.c_dbbh limit 15 offset 0 再次查看執行計劃： Limit (cost=3223.92..3231.97 rows=1 width=107) (actual time=127.291..127.536 rows=15 loops=1) -> Nested Loop (cost=3223.92..3231.97 rows=1 width=107) (actual time=127.285..127.496 rows=15 loops=1)       -> Sort (cost=3223.64..3223.65 rows=1 width=140) (actual time=127.210..127.225 rows=15 loops=1)             Sort Key: t_zhld_ajdbxx.d_larq, t_zhld_ajdbxx.c_ajbh             Sort Method: quicksort Memory: 2618kB             -> Hash Semi Join (cost=2523.19..3223.63 rows=1 width=140) (actual time=55.913..107.265 rows=8605 loops=1)                   Hash Cond: ((t_zhld_ajdbxx.c_ajbh)::text = (t_zhld_zbajxx.c_ajbh)::text)                   -> Index Scan using i_t_zhld_ajdbxx_m6 on t_zhld_ajdbxx (cost=0.42..700.28 rows=219 width=140) (actual time=0.065..22.005 rows=8605 loops=1)                         Index Cond: (((c_zblx)::text = '1003'::text) AND ((c_dbfy)::text = '0'::text) AND ((c_gy)::text = '2550'::text))                   -> Hash (cost=2513.84..2513.84 rows=714 width=32) (actual time=55.802..55.802 rows=8605 loops=1)                         Buckets: 16384 (originally 1024) Batches: 1 (originally 1) Memory Usage: 675kB                         -> HashAggregate (cost=2499.56..2506.70 rows=714 width=32) (actual time=30.530..43.275 rows=8605 loops=1)                               Group Key: t_zhld_zbajxx.c_ajbh                               -> Index Scan using i_t_zhld_zbajxx_h3 on t_zhld_zbajxx (cost=0.42..2497.77 rows=715 width=32) (actual time=0.043..15.552 rows=8605 loops=1)                                     Index Cond: ((n_dbzt = 1) AND ((c_zblx)::text = '1003'::text) AND ((c_gy)::text = '2550'::text))       -> Index Scan using t_zhld_db_pkey on t_zhld_db (cost=0.28..8.30 rows=1 width=33) (actual time=0.009..0.011 rows=1 loops=15)             Index Cond: (c_bh = t_zhld_ajdbxx.c_dbbh)             Filter: (((c_dbztbh)::text = '1003'::text) AND (n_state = ANY ('{1,2,3}'::integer[]))) Planning time: 1.154 ms Execution time: 127.734 ms 這一次能夠看出，ajdbxx和zbajxx表作了hash semi join 消除了nestedloop,cost降到了3231.97。而且使用上了i_t_zhld_ajdbxx_m6

子查詢中in的結果集有一萬多條數據，嘗試使用exists等價改寫in，看可否有更好的結果sql

等價改寫SQL(2)

等價改寫：將in替換爲exists：
select c_ajbh, c_ah, c_cbfy, c_cbrxm, d_larq, d_jarq, n_dbjg, c_yqly
from (select c_bh from db_zxzhld.t_zhld_db where n_state in (1,2,3) and c_dbztbh='1003' )dbxx join (select c_ajbh, c_ah, c_cbfy, c_cbrxm, d_larq, d_jarq, n_dbjg, c_yqly,c_dbbh from db_zxzhld.t_zhld_ajdbxx   ajdbxx where c_zblx='1003' and c_dbfy='0' and c_gy = '2550' and exists (select distinct c_ajbh from db_zxzhld.t_zhld_zbajxx zbajxx where ajdbxx.c_ajbh = zbajxx.c_ajbh and n_dbzt = 1 and c_zblx = '1003' and c_gy = '2550' ) order by d_larq asc, c_ajbh asc)dbaj on dbxx.c_bh = dbaj.c_dbbh limit 15 offset 0 再次查看執行計劃： Limit (cost=1.12..2547.17 rows=1 width=107) (actual time=0.140..0.727 rows=15 loops=1) -> Nested Loop (cost=1.12..2547.17 rows=1 width=107) (actual time=0.136..0.689 rows=15 loops=1)       -> Nested Loop Semi Join (cost=0.85..2538.84 rows=1 width=140) (actual time=0.115..0.493 rows=15 loops=1)             -> Index Scan using i_t_zhld_ajdbxx_m6 on t_zhld_ajdbxx t2 (cost=0.42..700.28 rows=219 width=140) (actual time=0.076..0.127 rows=15 loops=1)                   Index Cond: (((c_zblx)::text = '1003'::text) AND ((c_dbfy)::text = '0'::text) AND ((c_gy)::text = '2550'::text))             -> Index Scan using i_t_zhld_zbajxx_c_ajbh on t_zhld_zbajxx t3 (cost=0.42..8.40 rows=1 width=32) (actual time=0.019..0.019 rows=1 loops=15)                   Index Cond: ((c_ajbh)::text = (t2.c_ajbh)::text)                   Filter: (((c_zblx)::text = '1003'::text) AND ((c_gy)::text = '2550'::text) AND (n_dbzt = 1))       -> Index Scan using t_zhld_db_pkey on t_zhld_db (cost=0.28..8.30 rows=1 width=33) (actual time=0.007..0.008 rows=1 loops=15)             Index Cond: (c_bh = t2.c_dbbh)             Filter: (((c_dbztbh)::text = '1003'::text) AND (n_state = ANY ('{1,2,3}'::integer[]))) Planning time: 1.268 ms Execution time: 0.859 ms  能夠看出使用exist效果更好，最終cost 2547.17 (1).少了t_zhld_zbajxx表的group by操做：Sort Key: t_zhld_ajdbxx.d_larq, t_zhld_ajdbxx.c_ajbh。(這一步是由於使用了索引中的排序) (2).少了分組的操做：Group Key: t_zhld_zbajxx.c_ajbh。  第(2)爲何這個查詢消除了t_zhld_zbajxx表的group by操做呢？ 緣由是exists替換了distinct的功能，一旦知足條件則馬上返回。因此使用exists的時候子查詢能夠直接去掉distinct。  優化無止境、、、