使用 https://github.com/taowen/es-monitor 能夠用 SQL 進行 elasticsearch 的查詢。下鑽未必必定是GROUP BY,每記過一層GROUP BY,桶的數量就會增長一層。還有一種下鑽是用 filter 來下鑽,這種下鑽方式會使下一層的 match的文檔數量變少,可是桶的數量不變。git
SQLgithub
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 WITH all_symbols AS (SELECT MAX(market_cap) AS max_all_times FROM symbol); WITH year_2000 AS (SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols WHERE ipo_year=2000) EOF
{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0}
能夠看到桶的數量仍是同一個,只是增長了一個max_at_2000結果字段。
Elasticsearchsql
{ "aggs": { "year_2000": { "filter": { "term": { "ipo_year": 2000 } }, "aggs": { "max_at_2000": { "max": { "field": "market_cap" } } } }, "max_all_times": { "max": { "field": "market_cap" } } }, "size": 0 }
{ "hits": { "hits": [], "total": 6714, "max_score": 0.0 }, "_shards": { "successful": 1, "failed": 0, "total": 1 }, "took": 3, "aggregations": { "year_2000": { "max_at_2000": { "value": 20310000000.0 }, "doc_count": 58 }, "max_all_times": { "value": 522690000000.0 } }, "timed_out": false }
Profileelasticsearch
[ { "query": [ { "query_type": "TermQuery", "lucene": "ipo_year:`P", "time": "0.3492430000ms", "breakdown": { "score": 0, "create_weight": 220149, "next_doc": 0, "match": 0, "build_scorer": 95037, "advance": 34057 } }, { "query_type": "MatchAllDocsQuery", "lucene": "*:*", "time": "0.1883710000ms", "breakdown": { "score": 0, "create_weight": 3980, "next_doc": 169730, "match": 0, "build_scorer": 14661, "advance": 0 } } ], "rewrite_time": 3158, "collector": [ { "name": "MultiCollector", "reason": "search_multi", "time": "8.789594000ms", "children": [ { "name": "TotalHitCountCollector", "reason": "search_count", "time": "0.2768050000ms" }, { "name": "BucketCollector: [[year_2000, max_all_times]]", "reason": "aggregation", "time": "7.667765000ms" } ] } ] } ]
從Profile的結果來看,實際上是一次性作了兩個查詢,一個是所有文檔,一個是ipo_year=2000的文檔,而後統一聚合。ui
SQLcode
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 WITH all_symbols AS (SELECT MAX(market_cap) AS max_all_times FROM symbol); WITH year_2000 AS (SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols WHERE ipo_year=2000); WITH year_2001 AS (SELECT MAX(market_cap) AS max_at_2001 FROM all_symbols WHERE ipo_year=2001) EOF
這個寫法其實和 CASE WHEN 很相似,可是其表達能力更強大,更靈活。ip
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 SELECT per_ipo_year, MAX(market_cap) AS max_all_times FROM symbol GROUP BY CASE WHEN ipo_year=2000 THEN 'year_2000' WHEN ipo_year=2001 THEN 'year_2001' END AS per_ipo_year EOF
上面的sql查詢結果是文檔
{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0, "max_at_2001": 8762940000.0}
Elasticsearchget
{ "aggs": { "year_2001": { "filter": { "term": { "ipo_year": 2001 } }, "aggs": { "max_at_2001": { "max": { "field": "market_cap" } } } }, "year_2000": { "filter": { "term": { "ipo_year": 2000 } }, "aggs": { "max_at_2000": { "max": { "field": "market_cap" } } } }, "max_all_times": { "max": { "field": "market_cap" } } }, "size": 0 }
{ "hits": { "hits": [], "total": 6714, "max_score": 0.0 }, "_shards": { "successful": 1, "failed": 0, "total": 1 }, "took": 2, "aggregations": { "year_2001": { "max_at_2001": { "value": 8762940000.0 }, "doc_count": 38 }, "year_2000": { "max_at_2000": { "value": 20310000000.0 }, "doc_count": 58 }, "max_all_times": { "value": 522690000000.0 } }, "timed_out": false }
Profileit
[ { "query": [ { "query_type": "TermQuery", "lucene": "ipo_year:`Q", "time": "0.2518270000ms", "breakdown": { "score": 0, "create_weight": 186032, "next_doc": 0, "match": 0, "build_scorer": 48664, "advance": 17131 } }, { "query_type": "TermQuery", "lucene": "ipo_year:`P", "time": "0.1200760000ms", "breakdown": { "score": 0, "create_weight": 77254, "next_doc": 0, "match": 0, "build_scorer": 25184, "advance": 17638 } }, { "query_type": "MatchAllDocsQuery", "lucene": "*:*", "time": "0.1968800000ms", "breakdown": { "score": 0, "create_weight": 3573, "next_doc": 180136, "match": 0, "build_scorer": 13171, "advance": 0 } } ], "rewrite_time": 4250, "collector": [ { "name": "MultiCollector", "reason": "search_multi", "time": "2.459413000ms", "children": [ { "name": "TotalHitCountCollector", "reason": "search_count", "time": "0.2160950000ms" }, { "name": "BucketCollector: [[year_2001, year_2000, max_all_times]]", "reason": "aggregation", "time": "1.455703000ms" } ] } ] } ]
SQL
$ cat << EOF | ./es_query.py http://127.0.0.1:9200 WITH SELECT MAX(market_cap) AS max_all_times FROM symbol AS all_symbols; WITH SELECT MAX(market_cap) AS max_at_2000 FROM all_symbols WHERE ipo_year=2000 AS year_2000; WITH SELECT MAX(market_cap) AS max_at_2001_finance FROM year_2000 WHERE sector='Finance' AS year_2000_finance EOF
{"max_at_2000": 20310000000.0, "max_all_times": 522690000000.0, "max_at_2001_finance": 985668354.0}
Elasticsearch
{ "aggs": { "year_2000": { "filter": { "term": { "ipo_year": 2000 } }, "aggs": { "max_at_2000": { "max": { "field": "market_cap" } }, "year_2000_finance": { "filter": { "term": { "sector": "Finance" } }, "aggs": { "max_at_2001_finance": { "max": { "field": "market_cap" } } } } } }, "max_all_times": { "max": { "field": "market_cap" } } }, "size": 0 }
{ "hits": { "hits": [], "total": 6714, "max_score": 0.0 }, "_shards": { "successful": 1, "failed": 0, "total": 1 }, "took": 2, "aggregations": { "year_2000": { "max_at_2000": { "value": 20310000000.0 }, "year_2000_finance": { "max_at_2001_finance": { "value": 985668354.0 }, "doc_count": 2 }, "doc_count": 58 }, "max_all_times": { "value": 522690000000.0 } }, "timed_out": false }
Profile
[ { "query": [ { "query_type": "TermQuery", "lucene": "ipo_year:`P", "time": "0.1897790000ms", "breakdown": { "score": 0, "create_weight": 145762, "next_doc": 0, "match": 0, "build_scorer": 26216, "advance": 17801 } }, { "query_type": "TermQuery", "lucene": "sector:Finance", "time": "0.2380290000ms", "breakdown": { "score": 0, "create_weight": 57770, "next_doc": 0, "match": 0, "build_scorer": 55497, "advance": 124762 } }, { "query_type": "MatchAllDocsQuery", "lucene": "*:*", "time": "0.1965630000ms", "breakdown": { "score": 0, "create_weight": 3500, "next_doc": 178347, "match": 0, "build_scorer": 14716, "advance": 0 } } ], "rewrite_time": 4190, "collector": [ { "name": "MultiCollector", "reason": "search_multi", "time": "2.466917000ms", "children": [ { "name": "TotalHitCountCollector", "reason": "search_count", "time": "0.2712430000ms" }, { "name": "BucketCollector: [[year_2000, max_all_times]]", "reason": "aggregation", "time": "1.370663000ms" } ] } ] } ]
有了 GROUP BY 下鑽和 FILTER 下鑽,不少複雜的查詢能夠一條就查詢出來。而這種邊下鑽邊聚合指標的查詢能力甚至是傳統SQL都不具有的。並且稍微訓練一下,就會很是習慣這種下鑽的思惟方式,寫查詢也會很天然。