【09】把 Elasticsearch 當數據庫使：HAVING與Pipeline Aggregation

時間 2019-11-07

標籤 elasticsearch 數據庫 having pipeline aggregation 欄目日誌分析简体版

原文原文鏈接

使用 https://github.com/taowen/es-monitor 能夠用 SQL 進行 elasticsearch 的查詢。Elasticsearch 2.0引入的一個重大特性是支持了PipelineAggregation。在有這個特性以前，elasticsearch聚合以後能夠作的計算僅僅是對TermsAggregation的結果作一個排尋，並取個TOP N。初此以外什麼計算都作不了。而SQL裏一個重要的特性是HAVING字句，用其過濾咱們不關心的桶，以減小結果的數量。今天咱們就來看看如何用Pipeline Aggregation實現HAVING。git

HAVING ipo_count > 300

SQLgithub

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
    SELECT ipo_year, COUNT(*) AS ipo_count FROM symbol 
    GROUP BY ipo_year HAVING ipo_count > 200
EOF

{"ipo_count": 390, "ipo_year": 2014}
{"ipo_count": 334, "ipo_year": 2015}
{"ipo_count": 253, "ipo_year": 2013}

Elasticsearchexpress

{
  "aggs": {
    "ipo_year": {
      "terms": {
        "field": "ipo_year", 
        "size": 0
      }, 
      "aggs": {
        "having": {
          "bucket_selector": {
            "buckets_path": {
              "ipo_count": "_count"
            }, 
            "script": {
              "lang": "expression", 
              "inline": " ipo_count > 200"
            }
          }
        }
      }
    }
  }, 
  "size": 0
}

這裏bucket_selector使用的語法和前面GROUP BY ipo_year % 5的語法是相似的。不一樣之處在於，以前的script是從document裏取值。而這裏的script是從當前bucket裏取值。elasticsearch

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 3, 
  "aggregations": {
    "ipo_year": {
      "buckets": [
        {
          "key": 2014, 
          "doc_count": 390
        }, 
        {
          "key": 2015, 
          "doc_count": 334
        }, 
        {
          "key": 2013, 
          "doc_count": 253
        }
      ], 
      "sum_other_doc_count": 0, 
      "doc_count_error_upper_bound": 0
    }
  }, 
  "timed_out": false
}

Profileui

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.9405890000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 17443,
          "next_doc": 753268,
          "match": 0,
          "build_scorer": 169878,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 7177,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "6.110736000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.8071620000ms"
          },
          {
            "name": "LongTermsAggregator: [ipo_year]",
            "reason": "aggregation",
            "time": "2.416942000ms"
          }
        ]
      }
    ]
  }
]

Having看來是Lucene計算完以後在Elasticsearch的內存裏本身過濾的，因此沒有體如今Profile的結果裏。code

HAVING ipo_count > 100 AND max_last_sale <= 10000

SQLip

$ cat << EOF | ./es_query.py http://127.0.0.1:9200
    SELECT ipo_year, COUNT(*) AS ipo_count, MAX(last_sale) AS max_last_sale FROM symbol 
    GROUP BY ipo_year HAVING ipo_count > 100 AND max_last_sale <= 10000
EOF

{"max_last_sale": 6178.0, "ipo_count": 390, "ipo_year": 2014}

Elasticsearch內存

{
  "aggs": {
    "ipo_year": {
      "terms": {
        "field": "ipo_year", 
        "size": 0
      }, 
      "aggs": {
        "max_last_sale": {
          "max": {
            "field": "last_sale"
          }
        }, 
        "having": {
          "bucket_selector": {
            "buckets_path": {
              "max_last_sale": "max_last_sale", 
              "ipo_count": "_count"
            }, 
            "script": {
              "lang": "expression", 
              "inline": " ipo_count > 100 && max_last_sale <= 10000"
            }
          }
        }
      }
    }
  }, 
  "size": 0
}

{
  "hits": {
    "hits": [], 
    "total": 6714, 
    "max_score": 0.0
  }, 
  "_shards": {
    "successful": 1, 
    "failed": 0, 
    "total": 1
  }, 
  "took": 3, 
  "aggregations": {
    "ipo_year": {
      "buckets": [
        {
          "max_last_sale": {
            "value": 6178.0
          }, 
          "key": 2014, 
          "doc_count": 390
        }
      ], 
      "sum_other_doc_count": 0, 
      "doc_count_error_upper_bound": 0
    }
  }, 
  "timed_out": false
}

Profileget

[
  {
    "query": [
      {
        "query_type": "MatchAllDocsQuery",
        "lucene": "*:*",
        "time": "0.2386400000ms",
        "breakdown": {
          "score": 0,
          "create_weight": 7620,
          "next_doc": 204982,
          "match": 0,
          "build_scorer": 26038,
          "advance": 0
        }
      }
    ],
    "rewrite_time": 2379,
    "collector": [
      {
        "name": "MultiCollector",
        "reason": "search_multi",
        "time": "1.955767000ms",
        "children": [
          {
            "name": "TotalHitCountCollector",
            "reason": "search_count",
            "time": "0.2170820000ms"
          },
          {
            "name": "LongTermsAggregator: [ipo_year]",
            "reason": "aggregation",
            "time": "0.9893530000ms"
          }
        ]
      }
    ]
  }
]