filebeat: 6.2.3
mysql: 5.6.38node
{ "_index": "mysql-slow-2018.04.17", "_type": "doc", "_id": "AWLRiDqYhjFMCbqrK5ez", "_version": 1, "_score": null, "_source": { "@timestamp": "2018-04-17T02:56:22.823Z", "offset": 100619865, "beat": { "hostname": "test-db1", "name": "test-db1", "version": "6.2.3" }, "prospector": { "type": "log" }, "source": "/var/log/mysql_3306/mysql-slow.log", "fileset": { "module": "mysql", "name": "slowlog" }, "message": "# User@Host: test_db[test_table] @ [10.10.10.10] Id: 1874266\n# Query_time: 2.088465 Lock_time: 0.000086 Rows_sent: 67 Rows_examined: 18862\nSET timestamp=1523933781;\nselect id, dct, mh, topcolor, bit_count(dct^1144174128272565460) as dist from image_feature where topcolor=\"278522176103c518c774fe2a73b20569\" and created_at<\"2018-04-17 10:54:16\" and id not in (120251270,120251181,120251202,120251209,120251221,120251229,120251240,120251252,120251259,120251270,120251278) having dist<=20 order by dist;", "error": { "message": "Provided Grok expressions do not match field value: [# User@Host: test_db[test_table] @ [10.10.10.10] Id: 1874266\\n# Query_time: 2.088465 Lock_time: 0.000086 Rows_sent: 67 Rows_examined: 18862\\nSET timestamp=1523933781;\\nselect id, dct, mh, topcolor, bit_count(dct^1144174128272565460) as dist from image_feature where topcolor=\\\"278522176103c518c774fe2a73b20569\\\" and created_at<\\\"2018-04-17 10:54:16\\\" and id not in (120251270,120251181,120251202,120251209,120251221,120251229,120251240,120251252,120251259,120251270,120251278) having dist<=20 order by dist;]" } }, "fields": { "@timestamp": [ 1523933782823 ] }, "highlight": { "beat.name": [ "@kibana-highlighted-field@test-db1@/kibana-highlighted-field@" ] }, "sort": [ 1523933782823 ] }
{ "_index": "mysql-slow-2018.04.17", "_type": "doc", "_id": "AWLRb2nl6-SuKroP98i-", "_version": 1, "_score": null, "_source": { "@timestamp": "2018-04-17T02:29:21.535Z", "offset": 100614853, "beat": { "hostname": "test-db1", "name": "test-db1", "version": "6.2.3" }, "prospector": { "type": "log" }, "source": "/var/log/mysql_3306/mysql-slow.log", "message": "# Time: 180417 10:29:18", "fileset": { "module": "mysql", "name": "slowlog" }, "error": { "message": "Provided Grok expressions do not match field value: [# Time: 180417 10:29:18]" } }, "fields": { "@timestamp": [ 1523932161535 ] }, "highlight": { "error.message": [ "Provided Grok expressions do not match field value: [# @kibana-highlighted-field@Time@/kibana-highlighted-field@: 180417 10:29:18]" ] }, "sort": [ 1523932161535 ] }
上面的這些信息能夠在Kibana的Discover中能夠查詢獲得。mysql
從上面的JSON信息咱們能夠很明確的獲取獲得倆個信息:git
修改以前: exclude_lines: ['^[\/\w\.]+, Version: .* started with:.*'] # Exclude the header 修改以後: exclude_lines: ['^[\/\w\.]+, Version: .* started with:.*','^# Time.*'] # Exclude the header
修改module/mysql/slowlog/ingest/pipeline.jsongithub
修改以前: "patterns":[ "^# User@Host: %{USER:mysql.slowlog.user}(\\[[^\\]]+\\])? @ %{HOSTNAME:mysql.slowlog.host} \\[(%{IP:mysql.slowlog.ip})?\\](\\s*Id:\\s* %{NUMBER:mysql.slowlog.id})?\n# Query_time: %{NUMBER:mysql.slowlog.query_time.sec}\\s* Lock_time: %{NUMBER:mysql.slowlog.lock_time.sec}\\s* Rows_sent: %{NUMBER:mysql.slowlog.rows_sent}\\s* Rows_examined: %{NUMBER:mysql.slowlog.rows_examined}\n(SET timestamp=%{NUMBER:mysql.slowlog.timestamp};\n)?%{GREEDYMULTILINE:mysql.slowlog.query}" ] 修改以後: "patterns":[ "^# User@Host: %{USER:mysql.slowlog.user}(\\[[^\\]]+\\])? @ %{HOSTNAME:mysql.slowlog.host} \\[(IP:mysql.slowlog.ip)?\\](\\s*Id:\\s* %{NUMBER:mysql.slowlog.id})?\n# Query_time: %{NUMBER:mysql.slowlog.query_time.sec}\\s* Lock_time: %{NUMBER:mysql.slowlog.lock_time.sec}\\s* Rows_sent: %{NUMBER:mysql.slowlog.rows_sent}\\s* Rows_examined: %{NUMBER:mysql.slowlog.rows_examined}\n(SET timestamp=%{NUMBER:mysql.slowlog.timestamp};\n)?%{GREEDYMULTILINE:mysql.slowlog.query}" ],
wget https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-6.2.3-x86_64.rpm
sudo yum install filebeat-6.2.3-x86_64.rpm
index.number_of_shards: 5 output.elasticsearch.hosts: ["localhost:9200"] output.elasticsearch.index: "mysql-slow-%{+yyyy.MM.dd}" setup.tempate.name: "mysql-slow" setup.template.pattern: "mysql-slow*"
4.開啓MySQL modulesql
filebeat modules list filebeat modules enable mysql filebeat modules list
5.設置mysql slow日誌路徑 modules.d/mysql.ymlexpress
var.paths: ["/var/log/mysql_3306/mysql-slow.log"]
提交的BUG: json
https://github.com/elastic/beats/issues/6882
filebeat 6.2.3的版本雖然做者寫了是測試了MySQL5.5到M有SQL5.7的版本,可是在我在測試MySQL5.7.19的慢日誌的時候,發現filebeat仍是沒法解析MySQL慢日誌信息,由於相對於MySQL5.6,MySQL5.7.19的慢日誌格式已經作了更改。在module/mysql/slowlog/ignest/pipeline.json裏面有作更改,可是比較奇怪的是,在執行慢日誌測試的時候,在Kibana上面帶有Time語句信息,有些並無。ruby
MySQL 5.6.38 慢日誌格式: # Time: 180308 16:01:16 # User@Host: test[zabbix] @ [10.10.10.10] Id: 934 # Query_time: 1.760919 Lock_time: 0.000052 Rows_sent: 2 Rows_examined: 5141130 SET timestamp=1520496076; select passed, count(*) count from inscount where user='gu1417' and created_at>='2018-03-08' and team=1 group by passed;
MySQL 5.7.19慢日誌格式: # Time: 2018-04-17T17:59:40.019149+08:00 # User@Host: root[root] @ localhost [] Id: 576311 # Query_time: 1.726471 Lock_time: 0.000111 Rows_sent: 0 Rows_examined: 896862 SET timestamp=1523959180; SELECT source, reason, COUNT(id) AS report_count FROM report WHERE type = 'circle_article' AND deleted_at IS NULL GROUP BY source, reason HAVING report_count > 9 ORDER BY report_count DESC;
比較奇怪的一點就是在於,當我執行慢SQL很頻繁的時候,在監聽的filebeat日誌中能夠比較明顯的看到message信息發送過去的帶有"# Time..."信息,可是假如執行慢SQL通常的時候就是頻率不高的時候,發送的message信息中就沒有"# Time..."。elasticsearch
~~ 在監聽了一段時間的filebeat日誌信息,在結合查看源碼的一些信息。我的比較傾向於,filebeat監聽慢日誌是一段的時間內去探測慢日誌的inode信息,而後再將更改的信息處理完以後發送給elasticsearch。在探測的時間間隔內假如只有單條慢日誌信息,filebeat可以正常的解析,可是慢日誌多條的話,filebeat就會解析出現問題。具體的驗證和測試方案我也不知如何下手,但願看到這篇文章的朋友有方案的能夠一塊兒交流。~~tcp
在上面以前作實驗的時候我測試的慢SQL的時候忽略了一個很重要的點,就是我通常執行慢SQL的時候都是在本地執行的,並無在remote_server上面執行慢SQL,在測試遠程的慢SQL的時候觀察慢日誌又出現了新的狀況:
在本地執行慢SQL:
# Time: 180418 17:29:36 # User@Host: root[root] @ localhost [] Id: 165675344 # Query_time: 2.012857 Lock_time: 0.000039 Rows_sent: 1 Rows_examined: 7138643 SET timestamp=1524043776; SELECT count(id) as total FROM user WHERE deleted_at=0;
在remote_server執行慢SQL:
# Time: 180418 17:33:26 # User@Host: remix[remix] @ [10.10.10.10] Id: 165674369 # Query_time: 5.317408 Lock_time: 0.000118 Rows_sent: 653 Rows_examined: 3569801 use remix_liveroom; SET timestamp=1524044006; SELECT count(id) as total FROM user WHERE deleted_at=0;
上面的慢SQL都是基於MySQL 5.6的版本信息。能夠很明顯的看獲得在# User@Host
這塊倆者明顯不同..因此仍是須要更改filebeat的pipeline.json的patterns。
最後嘗試了不少次,可是仍是沒法解決這個問題。由於更改基於filebeat的grok的語法,暫時尚未找到能夠在線驗證的UI,在filebeat將massage推送到elasticsearch的時候只會拋出grok匹配失敗,致使我一直在不停的調試grok的語法。因此最後和同事們商量將全部的filebeat收集的日誌導入logstash,在logstash裏面進行分析和聚合。
filebeat.yml的配置:
filebeat.prospectors: - type: log enabled: true paths: - /var/log/mysql/mysql-slow.log multiline.pattern: "^# User@Host:" multiline.negate: true multiline.match: after output.logstash: hosts: ["10.10.10.10:5044"] logging.level: debug logging.to_files: true logging.files: path: /var/log/filebeat name: filebeat keepfiles: 1 permissions: 0644
logstash的配置
input { beats { port => 5044 } #tcp { # port => 1928 #} } filter { grok { match => [ "message", "(?m)^# User@Host: %{USER:query_user}\[[^\]]+\] @ (?:(?<query_host>\S*) )?\[(?:%{IP:query_ip})?\](?:\s*Id: %{NUMBER:id:int})?\s+# Query_time: %{NUMBER:query_time:float}\s+Lock_time: %{NUMBER:lock_time:float}\s+Rows_sent: %{NUMBER:rows_sent:int}\s+Rows_examined: %{NUMBER:rows_examined:int}\s*(?:use %{DATA:database};\s*)?SET timestamp=%{NUMBER:timestamp};\s*(?<query>(?<action>\w+)\s+.*)" ] } grok { match => { "message" => "# Time: " } add_tag => [ "drop" ] tag_on_failure => [] } if "drop" in [tags] { drop {} } date { match => ["mysql.slowlog.timestamp", "UNIX", "YYYY-MM-dd HH:mm:ss"] target => "@timestamp" timezone => "Asia/Chongqing" } ruby { code => "event.set('[@metadata][today]', Time.at(event.get('@timestamp').to_i).localtime.strftime('%Y.%m.%d'))" } mutate { remove_field => [ "message" ] } } output { #stdout { codec => rubydebug } elasticsearch { hosts => ["10.10.10.10:9200"] index => "mysql-slow-%{[@metadata][today]}" document_type => "mysql-slow" template_overwrite => true } }
我首先是將slow日誌信息在本地聚合成多行在發送給logstash,而後在logstash裏面進行二次聚合。上面的配置grok語法能夠匹配mysql5.1 , 5.6,5.7的版本。
上週須要將filebeat收集的MySQL的慢日誌整合到ELK的日誌平臺上面,主要方案是將filebeat看成client客戶端收集數據,將數據發送到logstash進行聚合分析,在將聚合以後的數據發送到elasticsearch。爲此從新建立了一個logstash節點,在建立新的節點的時候logstash的配置文件中設置了template_overwrite => true這個參數,致使在啓動logstash的節點的時候建立了一個默認的模版logstash-*(索引會默認和同名的模版進行正則匹配,例如index mysql-slow-* 會和mysql-*匹配),因爲qba的日誌信息匹配的模版名是logstash-qba-*,致使qba的日誌信息會優先匹配logstash-*(由於qba的index索引名是logstash-qba-*),可是logstash-*默認模版和qba的日誌信息字段信息沒法匹配致使日誌沒法寫入。qba的日誌信息沒法寫入elk,它就會不停的去重試一直到可以正確的寫入,在這個過程當中日誌會將數據存放到隊列中,而且會落地。因爲咱們使用elastic集羣,日誌在傳輸到elastic先進行路由再進行模版匹配寫入,因此致使整個集羣的負載和內存產生告警。