[root@ ~]# tar xf jdk-8u71-linux-x64.tar.gz [root@ ~]# mv jdk1.8.0_71 /usr/local/java_1.8.0 [root@ ~]# ln -s /usr/local/java_1.8.0/bin/java /usr/bin/java [root@ ~]# vim /etc/profile # 添加: export JAVA_HOME=/usr/local/java_1.8.0 export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib export PATH=$PATH:$JAVA_HOME/bin [root@ ~]# source /etc/profile
直接使用rpm包安裝:logstash-5.1.1.rpmhtml
[root@ ~]# rpm -ivh logstash-5.1.1.rpm
logstash安裝後的所在目錄爲:/usr/share/logstash/,這裏爲了方便統一的管理,作規範的配置java
# 統一配置目錄,/etc/logstash在安裝logstash後就存在了配置文件,啓動後又去/usr/share/logstash/config找,why? [root@ ~]# ln -s /etc/logstash /usr/share/logstash/config [root@ ~]# ln -s /usr/share/logstash/bin/* /usr/local/bin/ # 調整jvm使用內存 [root@ ~]# vim /etc/logstash/jvm.options -Xms128m -Xmx256m # 修改logstash基本配置 [root@ ~]# vim /etc/logstash/logstash.yml pipeline: workers: 4 batch: size: 125 delay: 5 path.config: /etc/logstash/conf.d path.logs: /data/logs/logstash http.port: 9600 http.host: "192.168.31.140"
作個簡單的測試node
[root@ ~]# logstash -e 'input{stdin{}}output{stdout{codec=>rubydebug}}' Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties The stdin plugin is now waiting for input: hello world { "@timestamp" => 2017-06-21T06:55:00.471Z, "@version" => "1", "host" => "baseos-1", "message" => "hello world", "tags" => [] }
以上的例子從標準輸入中獲取數據,再從標準輸出中輸出。mysql
在輸出的結果中,"@timestamp"標記事件發生的時間點,"host"標記事件發生的主機,"tags"標記事件的某個方面的屬性(這是一個數組,能夠有多個屬性),"type"標記事件的惟一類型。linux
Es 統一採用UTC時間存成長整型數據,會比北京時間晚八個小時。nginx
數組能夠是單個或者多個字符串值。git
path => [ "/var/log/messages", "/var/log/*.log" ] path => "/data/mysql/mysql.log"
若是指定了屢次,追加數組。此實例path數組包含三個字符串元素。github
布爾值必須是TRUE或者false。true和false不能有引號。redis
ssl_enable => true
指定字節單位。支持的單位有SI (k M G T P E Z Y) 和 Binary (Ki Mi Gi Ti Pi Ei Zi Yi)。Binary單位基於1024,SI單位基於1000。不區分大小寫和忽略值與單位之間的空格。若是沒有指定單位,默認是byte。sql
my_bytes => "1113" # 1113 bytes my_bytes => "10MiB" # 10485760 bytes my_bytes => "100kib" # 102400 bytes my_bytes => "180 mb" # 180000000 bytes
logstash編碼名稱用來表示數據編碼。用於input和output段。便於數據的處理。若是input和output使用合適的編碼,就無需單獨的filter對數據進行處理。
codec => "json"
鍵值對,注意多個鍵值對用空格分隔,而不是逗號。
match => { "field1" => "value1" "field2" => "value2" ... }
必須是有效的數值,浮點數或者整數。
port => 33
一個單獨的字符串。
my_password => "password"
一個表明有效的操做系統路徑。
my_path => "/tmp/logstash"
name => "Hello world" name => 'It\'s a beautiful day'
Logstash必需要有input和output。
這裏只介紹 stdin、file 和beats 插件
一個標準輸入配置實例:
input { stdin { add_field => {"key" => "value"} codec => "plain" tags => ["add"] type => "stdin" } }
輸出結果:
The stdin plugin is now waiting for input: hello { "@timestamp" => 2017-06-21T07:34:57.899Z, "@version" => "1", "host" => "baseos-1", "message" => "hello", "type" => "stdin", "key" => "value", "tags" => [ [0] "add" ] }
參數:
文件讀取插件主要用來抓取文件的變化信息,將變化信息封裝成Event進程處理或者傳遞。
Logstash使用一個叫FileWatch的 Ruby Gem 庫來監聽文件變化。這個庫支持glob展開文件路勁(只支持絕對路徑,不會自動遞歸目錄),並且會記錄一個隱藏的數據文件來跟蹤被監聽的日誌文件的當前讀取位置(默認這個文件叫.sincedb)。該數據庫文件記錄了每一個被監聽文件的inode、major number、minor number 和 pos。
一個讀取文件配置實例:
input { file { path => ["/data/logs/nginx/*.log"] exclude => ["/data/logs/nginx/nginx.pid","/data/logs/nginx/error.log"] discover_interval => 5 sincedb_path => "/data/database/logstash/.sincedb_nginx_log" sincedb_write_interval => 10 type => "nginx_log" start_position => "beginning" } }
參數:
詳細參考:https://www.elastic.co/guide/en/logstash/5.0/plugins-inputs-file.html
[root@ ~]# vim /etc/logstash/conf.d/nginx_access.conf input { file { path => ["/data/logs/nginx/*.log"] exclude => ["/data/logs/nginx/nginx.pid","/data/logs/nginx/error.log"] discover_interval => 5 sincedb_path => "/data/database/logstash/.sincedb_nginx_log" sincedb_write_interval => 10 type => "nginx_log" start_position => "beginning" } } output { stdout { codec => rubydebug } }
測試效果:
[root@baseos-1_192.168.31.140 ~]# logstash -f /etc/logstash/conf.d/nginx_access.conf Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties { "path" => "/data/logs/nginx/logstash.wangshenjin.com_access.log", "@timestamp" => 2017-06-21T07:10:36.270Z, "@version" => "1", "host" => "baseos-1", "message" => "192.168.31.140 - - [21/Jun/2017:15:10:36 +0800] \"GET / HTTP/1.1\" 200 33 \"-\" \"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.27.1 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\" \"-\"", "type" => "nginx_log", "tags" => [] }
Beats插件用於創建監聽服務,接收Filebeat或者其餘beat發送的Events
filebeat 是基於原先 logstash-forwarder 的源碼改造出來的。換句話說:filebeat 就是新版的 logstash-forwarder,也會是 Elastic Stack 在 shipper 端的第一選擇。
filebeat 配置:
filebeat: prospectors: - paths: - /usr/local/nginx/logs/*.com.log input_type: log document_type: nginx-access tail_files: true output: logstash: hosts: ["192.168.31.140:5044"] shipper: tags: ["nginx_log"]
logstash 配置:
input { beats { port => 5044 } } output { stdout { codec => rubydebug } }
效果:
[root@ conf.d]# logstash -f filebeat.conf Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties { "@timestamp" => 2017-06-21T10:49:12.596Z, "offset" => 605, "@version" => "1", "input_type" => "log", "beat" => { "hostname" => "salt-master", "name" => "salt-master", "version" => "5.1.1" }, "host" => "salt-master", "source" => "/usr/local/nginx/logs/access.log", "message" => "192.168.31.1 - - [24/Feb/2017:17:00:59 +0800] \"GET / HTTP/1.1\" 301 184 \"-\" \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36\"", "type" => "nginx-access", "tags" => [ [0] "beats_input_codec_plain_applied" ] }
參數:
詳細參考:https://www.elastic.co/guide/en/logstash/5.0/plugins-inputs-beats.html
這裏採用 easyrsa 生成logstash證書。
若是logstash使用IP訪問,證書裏的subjectAltName字段須要包含logstash的IP。easyrsa簽署證書時,能夠採用--subject-alt-name來添加:
./easyrsa gen-req logstash_server nopass # 生成證書籤署文件 ./easyrsa --subject-alt-name="IP:192.168.31.140" sign server logstash_server # 簽署證書
Logstash使用grok模塊對任意文本解析並結構化輸出,Logstash默認帶有120中匹配模式。
grok的語法格式爲 %{SYNTAX:SEMANTIC},前面是grok-pattrens中定義的變量,後面能夠自定義變量的名稱, 若是有雙引號""或者中括號[],須要加 進行轉義。
SYNTAX 是要匹配的模式,例如3.14匹配 NUMBER 模式,127.0.0.1 匹配 IP 模式。
SEMANTIC 是匹配到的文本片斷的標識,例如 「3.14」 能夠是一個時間的持續時間,因此能夠簡單地叫作"duration" ,字符串"55.3.244.1"能夠被標識爲「client」。
因此,grok過濾器表達式能夠寫成: %{NUMBER:duration} %{IP:client}
默認狀況下,全部的SEMANTIC是以字符串的方式保存,若是想要轉換一個SEMANTIC的數據類型,例如轉換一個字符串爲整形,能夠寫成以下的方式: %{NUMBER:num:int}
一個例子,示例日誌以下所示:
55.3.244.1 GET /index.html 15824 0.043
filter 配置以下所示:
filter { grok { match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" } } }
效果以下所示:
55.3.244.1 GET /index.html 15824 0.043 { "duration" => "0.043", "request" => "/index.html", "@timestamp" => 2017-06-22T01:49:27.773Z, "method" => "GET", "bytes" => "15824", "@version" => "1", "host" => "baseos-1", "client" => "55.3.244.1", "message" => "55.3.244.1 GET /index.html 15824 0.043", "tags" => [] }
nginx日誌格式:
'$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"';
filter 配置以下所示:
filter { if [type] == "nginx-access-log" { grok { match => { "message" => "%{IPORHOST:remote_addr} - %{USERNAME:remote_user} \[%{HTTPDATE:[@metadata][timestamp]}\] \"%{DATA:request}\" %{INT:status} %{INT :body_bytes_sent} \"%{DATA:http_referer}\" \"%{DATA:http_user_agent}\" \"%{USERNAME:http_x_forwarded_for}\"" } } } }
效果以下所示:
Sending Logstash's logs to /data/logs/logstash which is now configured via log4j2.properties { "remote_addr" => "192.168.31.130", "request" => "GET / HTTP/1.1", "body_bytes_sent" => "33", "message" => "192.168.31.130 - - [22/Jun/2017:13:50:33 +0800] \"GET / HTTP/1.1\" 200 33 \"-\" \"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\" \"-\"", "type" => "nginx-access-log", "tags" => [], "http_user_agent" => "curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2", "remote_user" => "-", "path" => "/data/logs/nginx/logstash.wangshenjin.com_access.log", "@timestamp" => 2017-06-22T05:50:34.860Z, "http_referer" => "-", "@version" => "1", "host" => "baseos-1", "http_x_forwarded_for" => "-", "status" => "200" }
message是每段讀進來的日誌,IPORHOST、USERNAME、HTTPDATE等都是patterns/grok-patterns中定義好的正則格式名稱,對照日誌進行編寫。
logstash 默認自帶120種正則格式,參考:https://github.com/logstash-plugins/logstash-patterns-core/tree/master/patterns
不少時候,日誌格式都是自定義的,這時候咱們須要根據實際狀況自定義正則。
這裏以 remote_addr、request爲例子,定義三個正則:
IPADDR [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} REQUESTPATH (?:/[\\A-Za-z0-9$.+!*'(){},~:;=@#% \[\]_<>^\-&?]*)+ REQUESTPRO ([^"]*)
使用上面的自定義正則:
IPADDR [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} REQUESTPRO ([^"]*) REQUESTPATH (?:/[\\A-Za-z0-9$.+!*'(){},~:;=@#% \[\]_<>^\-&?]*)+ NGXACCESSLOG %{IPADDR:client_ip} - (%{USERNAME:user}|-) \[%{HTTPDATE:log_timestamp}\] \"%{WORD:request_mathod} %{REQUESTPATH:request_path} %{REQUESTPRO:request_protocol}\" %{NUMBER:http_status} %{NUMBER:body_bytes_sent} (%{GREEDYDATA:http_referer}|-) \"%{DATA:http_user_agent}\" \"%{USERNAME:http_x_forwarded_for}\"
logstash 的配置:
filter { grok { patterns_dir => "/etc/logstash/conf.d/patterns/mypattern" match => { "message" => "%{NGXACCESSLOG}" } } } output { stdout { codec => rubydebug } }
效果以下所示:
{ "log_timestamp" => "11/Oct/2017:19:32:22 +0800", "body_bytes_sent" => "13", "message" => "192.168.31.1 - - [11/Oct/2017:19:32:22 +0800] \"GET /test/t/ HTTP/1.1\" 200 13 \"-\" \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36\" \"-\"", "type" => "logstash", "request_mathod" => "GET", "http_user_agent" => "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "path" => "/data/logs/nginx/logstash.wangshenjin.com_access.log", "@timestamp" => 2017-10-30T04:08:08.412Z, "http_referer" => "\"-\"", "@version" => "1", "host" => "baseos-1", "http_x_forwarded_for" => "-", "request_path" => "/test/t/", "client_ip" => "192.168.31.1", "http_status" => "200", "user" => "-", "request_protocol" => "HTTP/1.1" }
正則調試:https://grokdebug.herokuapp.com
# 新建一個目錄,統一存放自定義grok正則 [root@ conf.d]# mkdir patterns [root@ conf.d]# vim patterns/nginx_access NGINXACCESS %{IPORHOST:remote_addr} - %{USERNAME:remote_user} \[%{HTTPDATE:log_timestamp}\] \"%{DATA:request}\" %{INT:status} %{INT:body_bytes_sent} \"%{DATA:http_referer}\" \"%{DATA:http_user_agent}\" \"%{USERNAME:http_x_forwarded_for}\" [root@ conf.d]# vim nginx_access.conf **** filter { if [type] == "nginx-access-log" { grok { patterns_dir => "/etc/logstash/conf.d/patterns" //設置自定義正則路徑 match => { "message" => "%{NGINXACCESS}" } } } } ****
格式化日期
date { match => [ "log_timestamp", "YYYY-MM-dd HH:mm:ss"] }
藉助GeoIP數據庫來實現顯示請求來源的地理位置,GeoIP 庫能夠根據 IP 地址提供對應的地域信息,包括國別,省市,經緯度等。
獲取GeoIP數據庫
wget http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz gzip -d GeoLite2-City.mmdb.gz
配置:
filter { *** geoip { source => "client_ip" fields => ["city_name" , "country_name" ,"continent_code" , "continent_name" ] database => "/etc/logstash/GeoLite2-City.mmdb" } }
output配置以下:
output { redis { host => "127.0.0.1" port => 6000 password => "8a6715" data_type => "channel" key => "logstash-%{+yyyy.MM.dd}" } }
redis效果:
127.0.0.1:6000> subscribe logstash-2017.06.22 Reading messages... (press Ctrl-C to quit) 1) "subscribe" 2) "logstash-2017.06.22" 3) (integer) 1 1) "message" 2) "logstash-2017.06.22" 3) "{\"remote_addr\":\"192.168.31.130\",\"request\":\"GET / HTTP/1.1\",\"body_bytes_sent\":\"33\",\"message\":\"192.168.31.130 - - [22/Jun/2017:15:51:22 +0800] \\\"GET / HTTP/1.1\\\" 200 33 \\\"-\\\" \\\"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\\\" \\\"-\\\"\",\"type\":\"nginx-access-log\",\"tags\":[],\"http_user_agent\":\"curl/7.19.7 (x86_64-redhat-linux-gnu) libcurl/7.19.7 NSS/3.14.0.0 zlib/1.2.3 libidn/1.18 libssh2/1.4.2\",\"remote_user\":\"-\",\"path\":\"/data/logs/nginx/logstash.wangshenjin.com_access.log\",\"@timestamp\":\"2017-06-22T07:51:23.484Z\",\"http_referer\":\"-\",\"@version\":\"1\",\"host\":\"baseos-1\",\"http_x_forwarded_for\":\"-\",\"status\":\"200\"}"
參數:
output配置以下:
output { elasticsearch { hosts => ["192.168.1.147:9200","192.168.1.151:9200"] index => "nginx-access-log-%{+YYYY.MM.dd}" #定義index pattern } }
參數:
timeout:Set the timeout for network operations and requests sent Elasticsearch. If a timeout occurs,the request will be retried.
詳細參考:https://www.elastic.co/guide/en/logstash/5.0/plugins-outputs-elasticsearch.html