maven引用
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.6.0</version>
</dependency>
複製代碼
maven引用
<dependency>
<groupId>io.searchbox</groupId>
<artifactId>jest</artifactId>
<version>5.3.3</version>
</dependency>
複製代碼
composer引用
{
"require": {
"elasticsearch/elasticsearch": "~6.0"
}
}
複製代碼
curl -PUT 'localhost:9200/_index'
複製代碼
curl -POST 'localhost:9200/_index/_type?pretty'
{
"_type": {
"properties": {
"field1": {
"type": "text"
},
"field2": {
"type": "text"
},
"field3": {
"type": "text"
},
"field4": {
"type": "long"
}
}
}
}
複製代碼
curl -DELETE 'localhost:9200/_index'
複製代碼
curl -DELETE 'localhost:9200/_index1,_index2' 或 curl -DELETE 'localhost:9200/_index*'
複製代碼
curl -POST 'localhost:9200/_index/_type{/_id}'
{
"field1": "XXXXXXXX",
"field2": "XXXXXXXX",
"field3": "XXXXXXXX",
"field4": "1529396883"
}
複製代碼
curl -PUT 'localhost:9200/_index/_type/_id'
{
"field1": "XXXXXXXX",
"field2": "XXXXXXXX",
"field3": "XXXXXXXX",
"field4": "1529396883"
}
複製代碼
curl -DELETE 'localhost:9200/_index/_type/_id'
複製代碼
curl -XGET 'localhost:9200/_index/_type/_id'
複製代碼
{
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_version" : 1,
"found" : true,
"_source" : {
"field1": "XXXXXXXX",
"field2": "XXXXXXXX",
"field3": "XXXXXXXX",
"field4": "1529396883"
}
}
複製代碼
curl -XGET 'localhost:9200/_index/_type/_id?_source=field1,field2'
複製代碼
{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 1,
"found" : true,
"_source" : {
"field1": "My first blog entry" ,
"field2": "Just trying this out..."
}
}
複製代碼
curl -XGET 'localhost:9200/_index/_type/_id/_source'
複製代碼
{
"field1": "XXXXXXXX",
"field2": "XXXXXXXX",
"field3": "XXXXXXXX",
"field4": "1529396883"
}
複製代碼
curl -XPOST 'localhost:9200/_mget'
{
"docs": [
{
"_index": "_index",
"_type": "_type",
"_id": "_id"
},
{
"_index": "_index",
"_type": "_type",
"_id": "_id"
}
]
}
複製代碼
curl -XPOST 'localhost:9200/_bulk?pretty' -H 'Content-Type: application/json' -d' { "delete": { "_index": "website", "_type": "blog", "_id": "123" }} { "create": { "_index": "website", "_type": "blog", "_id": "123" }} { "title": "My first blog post" } { "index": { "_index": "website", "_type": "blog" }} { "title": "My second blog post" } { "update": { "_index": "website", "_type": "blog", "_id": "123", "_retry_on_conflict" : 3} } { "doc" : {"title" : "My updated blog post"} } '
複製代碼
{
"took": 4,
"errors": false,
"items": [
{ "create": {
"_index": "website",
"_type": "blog",
"_id": "123",
"_version": 3,
"status": 201
}}
]
}
複製代碼
{ action: { metadata }}\n
{ request body }\n
複製代碼
某個子請求的失敗不會對其餘子請求的成功與否形成影響。 若是其中任何子請求失敗,最頂層的 error 標誌被設置爲 true ,而且在相應的請求報告出錯誤明細php
{
"took": 3,
"errors": true,
"items": [
{ "create": {
"_index": "website",
"_type": "blog",
"_id": "123",
"status": 409,
"error": "DocumentAlreadyExistsException [[website][4] [blog][123]: document already exists]"
}}
]
}
複製代碼
bulk會把將要處理的數據載入內存中,因此數據量是有限制的,最佳的數據量不是一個肯定的數值,它取決於硬件、文檔大小和複雜性、索引和搜索的負載; 通常建議是1000-5000個文檔,大小建議是5-15MB,默認不能超過100M,能夠在es的配置文件(即$ES_HOME下的config下的elasticsearch.yml)中修改。html
curl -XGET 'localhost:9200/_index/_type/_search' -H 'Content-Type: application/json' -d' { "query":{ "bool": { "must": { "match": { "field": "value" }}, "must_not": { "match": { "field": "value" }}, "should": { "match": { "field": "value" }}, "filter": { "range": { "field" : { "gt" : num }} } } }, "form":0, "size":10, "sort":{"field":{"order":"desc"}} } ’ 複製代碼
結果查看 hits->total 的值
{
"took": 10,// 請求毫秒數
"timed_out": false,// 是否超時
"_shards": {// 分片信息
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 200,// 匹配到的文檔總數
"max_score": 14.509778,
"hits": [// 查詢結果,默認10條
······
]
}
}
複製代碼
curl -XGET 'localhost:9200/_index/_type/_search' -H 'Content-Type: application/json' -d' { "query": { "bool": { "must": [ { "match": { "posiName": "questionGender" } }, { "match": { "pageName": "questionDetail" } }, { "match": { "modleName": "questionAnswer" } } ] } }, "aggs": { "distinct": { "cardinality": { "field": "modleId" } } } } ’ 複製代碼
結果查看 aggregations->distinct->value 的值
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 200,
"max_score": 14.509778,
"hits": [
······
]
},
"aggregations": {
"distinct": {
"value": 3// 去重結果
}
}
}
複製代碼
curl -XGET 'localhost:9200/_index/_type/_search' -H 'Content-Type: application/json' -d' { "query": { "bool": { "must": [ { "match": { "posiName": "questionGender" } }, { "match": { "pageName": "questionDetail" } }, { "match": { "modleName": "questionAnswer" } } ] } }, "collapse":{ "field":"modleId" } } ’ 複製代碼
結果查看 hits->hits 的值
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 200,
"max_score": 14.509778,
"hits": [
{
"_index": "mxsp_events",
"_type": "events",
"_id": "aPVv6mQBkQR_Xrrgricj",
"_score": 14.509778,
"_source": {
"modleId": 2,
"posiName": "questionGender",
"pageName": "questionDetail",
"modleName": "questionAnswer",
"userId": 1540563,
"createdAt": 1532941929
},
"fields": {
"modleId": [
2
]
}
},
{
"_index": "mxsp_events",
"_type": "events",
"_id": "dgIP9GQBkQR_XrrgQF6S",
"_score": 14.509778,
"_source": {
"modleId": 1,
"posiName": "questionGender",
"pageName": "questionDetail",
"modleName": "questionAnswer",
"userId": 3,
"createdAt": 1533103385
},
"fields": {
"modleId": [
1
]
}
},
{
"_index": "mxsp_events",
"_type": "events",
"_id": "nMyw2WQBkQR_XrrgsDQ6",
"_score": 14.312874,
"_source": {
"modleId": "0",
"posiName": "questionGender",
"pageName": "questionDetail",
"modleName": "questionAnswer",
"userId": "19",
"createdAt": "1529396883"
},
"fields": {
"modleId": [
0
]
}
}
]
}
}
複製代碼
curl -XGET 'localhost:9200/_index/_type/_search' -H 'Content-Type: application/json' -d' { "query": { "bool": { "must": [ { "match": { "posiName": "questionGender" } }, { "match": { "pageName": "questionDetail" } }, { "match": { "modleName": "questionAnswer" } } ] } }, "aggs": { "group_by": { "terms": { "field": "modleId" } } } } ’ 複製代碼
結果查看 aggregations->group_by->buckets 的值
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 200,
"max_score": 14.509778,
"hits": [
······
]
},
"aggregations": {
"group_by": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [// 分組結果
{
"key": 2,
"doc_count": 116
},
{
"key": 1,
"doc_count": 83
},
{
"key": 0,
"doc_count": 1
}
]
}
}
}
複製代碼
bin/logstash -f logstash.confjava
-f:經過這個命令能夠指定Logstash的配置文件,根據配置文件配置logstashmysql
bin/logstash -e 'input { stdin { } } output { stdout {} }' 或 bin/logstash -e ""nginx
-e:後面跟着字符串,該字符串能夠被當作logstash的配置(若是是""則默認使用stdin做爲輸入,stdout做爲輸出)git
bin/logstash -f logstash.conf -tgithub
-t:檢查配置文件是否正確web
# 日誌導入
input {
}
# 日誌篩選匹配處理
filter {
}
# 日誌匹配輸出
output {
}
複製代碼
https://www.elastic.co/guide/en/logstash/current/input-plugins.html正則表達式
input{
file{
# 要導入的文件的位置,可使用*,例如/var/log/nginx/*.log
path=>"/var/lib/mysql/slow.log"
# 要排除的文件(配合path => "/var/log/*"使用)
excude=>"*.gz"
# 從文件開始的位置開始讀,end表示從結尾開始讀
start_position=>"beginning"
}
}
複製代碼
input{
redis{
# redis地址
host=>"127.0.0.1"
# redis端口號
port=>6379
# 使用redis的數據庫,默認爲0號
db=>0
# redis的密碼,默認不使用
password=>"XXX"
# 鏈接超時的時間
timeout=>5
# 操做類型,必填項(list、channel和pattern_channel三種;list是BLPOP,channel是SUBSCRIBE,pattern_channel是PSUBSCRIBE)
data_type=>"list"
# 監聽的鍵值,必填項
key=>"logstash-test-list"
# EVAL命令返回的事件數目,表示一次請求返回N條日誌信息
batch_count=>1
# 啓用線程數量
threads=>1
}
}
複製代碼
https://www.elastic.co/guide/en/logstash/current/filter-plugins.htmlredis
正則匹配內容
# SYNTAX表明匹配值的類型,如NUMBER、WORD;SEMANTIC表示存儲該值的一個變量名稱
基礎語法:%{SYNTAX:SEMANTIC}
# field_name表示存儲該值的一個變量名稱;後面跟上正則表達式;如:(?<queue_id>[0-9A-F]{10,11})
自定義語法:(?<field_name>the pattern here)
# 示例
filter {
grok {
match => {
"message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"
}
}
}
# 輸入
55.3.244.1 GET /index.html 15824 0.043
# 匹配結果
{
"@version" => "1",
"method" => "GET",
"message" => "58.23.56.101 GET /index.html 15824 0.043",
"duration" => "0.043",
"request" => "/index.html",
"client" => "58.23.56.101",
"bytes" => "15824",
"host" => "linchendeMac-mini.local",
"@timestamp" => 2019-03-06T06:24:21.333Z
}
複製代碼
基於分隔符原理解析數據,相比於 grok 速度更快、消耗更小的CPU資源;dissect插件有必定侷限性:主要適用於每行格式類似且分隔符明確簡單的場景;dissect語法比較簡單,有一系列字段(field)和分隔符(delimiter)組成
基礎語法:%{}字段名稱;%{}之間是分隔符
# 示例
input{
stdin{}
}
filter{
dissect {
mapping => { "message" => "%{ip} [%{time} %{+time}] %{method} %{request} %{bytes} %{duration}" }
}
}
output{
stdout{}
}
# 輸入
55.3.244.1 [07/Sep/2017:17:24:53 +0800] GET /index.html 15824 0.043
# 匹配結果
{
"bytes" => "15824",
"time" => "07/Sep/2017:17:24:53 +0800",
"duration" => "0.043",
"@timestamp" => 2019-03-06T09:15:28.822Z,
"ip" => "55.3.244.1",
"message" => "55.3.244.1 [07/Sep/2017:17:24:53 +0800] GET /index.html 15824 0.043",
"@version" => "1",
"host" => "linchendeMac-mini.local",
"method" => "GET",
"request" => "/index.html"
}
複製代碼
date插件會將 @timestamp 字段的值保存爲指定字段對應的時間值,不使用則爲當前時間
# 示例
filter {
grok {
match => {
"message" => "%{IP:client} \[%{HTTPDATE:time}\] %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"
}
}
date{
match=>["time","dd/MMM/yyyy:HH:mm:ss Z"]
}
}
# 輸入
55.3.244.1 [07/Sep/2017:17:24:53 +0800] GET /index.html 15824 0.043
# 匹配結果
{
"bytes" => "15824",
"time" => "07/Sep/2017:17:24:53 +0800",
"client" => "55.3.244.1",
"request" => "/index.html",
"@version" => "1",
"duration" => "0.043",
"method" => "GET",
"host" => "linchendeMac-mini.local",
"message" => "55.3.244.1 [07/Sep/2017:17:24:53 +0800] GET /index.html 15824 0.043",
"@timestamp" => 2017-09-07T09:24:53.000Z
}
複製代碼
根據ip地址提供對應的地域信息,好比經緯度、城市名等,方便進行地理數據分析
# 示例
filter {
grok {
match => {
"message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"
}
}
geoip {
# IP地址數據庫文件的路徑
database => "/usr/local/Cellar/logstash-6.6.0/config/GeoLite2-City.mmdb"
# 含有ip地址的字段名稱
source => "client"
# 指定須要的字段
# fields => ["country_name", "region_name", "city_name"]
}
}
# 輸入
55.3.244.1 GET /index.html 15824 0.043
# 匹配結果
{
"method" => "GET",
"bytes" => "15824",
"request" => "/index.html",
"duration" => "0.043",
"geoip" => {
"continent_code" => "AS",
"location" => {
"lat" => 24.4798,
"lon" => 118.0819
},
"region_name" => "Fujian",
"ip" => "58.23.56.101",
"city_name" => "Xiamen",
"latitude" => 24.4798,
"country_code3" => "CN",
"longitude" => 118.0819,
"region_code" => "FJ",
"timezone" => "Asia/Shanghai",
"country_name" => "China",
"country_code2" => "CN"
},
"host" => "linchendeMac-mini.local",
"@timestamp" => 2019-03-06T06:13:00.118Z,
"message" => "58.23.56.101 GET /index.html 15824 0.043",
"@version" => "1",
"client" => "58.23.56.101"
}
複製代碼
filter {
grok {
match => { "message" => ["(?<RemoteIP>(\d*.\d*.\d*.\d*)) - %{DATA:[nginx][access][user_name]} \[%{HTTPDATE:[nginx][access][time]}\] \"%{WORD:[nginx][access][method]} %{DATA:[nginx][access][url]} HTTP/%{NUMBER:[nginx][access][http_version]}\" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} \"%{DATA:[nginx][access][referrer]}\" \"%{DATA:[nginx][access][agent]}\""] }
}
}
複製代碼
https://www.elastic.co/guide/en/logstash/current/output-plugins.html
output{
elasticsearch{
# elasticsearch地址:端口
hosts=>["127.0.0.1:9200"]
# 導出到index的名稱,可使用時間變量
index=>"logstash-slow-%{+YYYY.MM.dd}"
# 導出到type的名稱,默認爲doc
document_type=>"log"
# elasticsearch帳號密碼,無安全認證不須要這兩個參數
user=>"admin"
password=>"xxxxxx"
# 模板文件路徑
template=>"/opt/logstash-conf/es-template.json"
# 模板命名
template_name=>"logstash"
# 自動管理模板功能(true:默認模板;false:自定義模板)
template_overwrite=>false
}
}
複製代碼
output{
redis{
# redis的地址和端口,會覆蓋全局端口
host=>["127.0.0.1:6379"]
# 全局端口,默認6379,若是host已指定,本條失效
port=>6379
# 使用redis的數據庫,默認爲0號
db=>0
# redis的密碼,默認不使用
password=>xxx
# 操做類型(list和channel兩種;list是RPUSH,channel是PUBLISH)
data_type=>list
# key的名稱
key=>xxx
# 失敗重連的間隔,默認爲1s
reconnect_interval=>1
# 鏈接超時的時間
timeout=>5
# 批量處理(僅用於data_type=list的模式)
# 是否批量處理(默認false:1條rpush命令只存儲1條數據;true:批量處理,1條rpush會發送batch_events條數據或發送batch_timeout秒(取決於哪個先到達))
batch=>true
# 批量處理時一次rpush的最大數據量
batch_events=>50
# 批量處理時一次rpush最多消耗多少時間
batch_timeout=>5
# 擁塞保護(僅用於data_type=list的模式,redis防止內存溢出)
# 每多長時間(單位爲秒,0爲每次都檢查)進行一次擁塞檢查
congestion_interval=>1
# list中最多能夠存在多少個item數據(默認爲0,表示禁用擁塞檢測;達到congestion_threshold的數量會阻塞直到有其餘消費者消費list中的數據)
congestion_threshold=>0
}
}
複製代碼