咱們在使用Elasticsearch作搜索引擎的時候有可能會遇到跨domain查詢的場景,好比作一個學生課程管理系統,搜一個學生的名字,像知道該學生的選課狀況。數組
固然解決問題的方法有不少,我能夠搜學生,而後去db查找學生關聯的選課,就能夠查到全部的課程,有時候數據量不是很大,而且個人索引只有一個課程維度的時候,就須要使用嵌套類型來解決這類問題。本文使用es和kibina來操做實例,由於基於中文的實例,還使用到了ik分詞器,具體能夠參考:bash
Elasticsearch支持對象類型的存儲,咱們能夠把一個對象數組存到某個document的字段內,好比一個課程做爲一個document,那麼這個課程能夠創建一個students字段,存儲該課程下的學生object數組。post
在Elasticsearch中,新建一個以下的class_test索引,其中student做爲一個object數組類型。性能
PUT /class_test
{
"mappings":{
"class_test": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"analyzer": "ik_max_word",
"type": "text"
},
"type":{
"type":"keyword"
},
"student":{
"properties": {
"name":{
"analyzer": "ik_max_word",
"type": "text"
},
"id":{
"type":"keyword"
}
}
}
}
}
},
"settings":{
"index": {
"refresh_interval": "1s",
"number_of_shards": 5,
"max_result_window": "10000000",
"mapper": {
"dynamic": "false"
},
"number_of_replicas": 0
}
}
}複製代碼
往class_test放入一下數據,如今索引裏面一共有兩條數據測試
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 1.0,
"hits" : [
{
"_index" : "class_test",
"_type" : "class_test",
"_id" : "ijfJ5GoBJeNZPNCWykLR",
"_score" : 1.0,
"_source" : {
"id" : "1",
"name" : "數學課",
"student" : [
{
"id" : "1",
"name" : "張三"
},
{
"id" : "2",
"name" : "李四"
}
]
}
},
{
"_index" : "class_test",
"_type" : "class_test",
"_id" : "Q9NxGGsBa-TqHCWqAaM4",
"_score" : 1.0,
"_source" : {
"id" : "2",
"name" : "語文",
"student" : [
{
"id" : "3",
"name" : "傑克"
},
{
"id" : "4",
"name" : "瑪麗"
}
]
}
}
]
}
}複製代碼
接下來,咱們可使用查詢語句對索引進行查詢。當咱們查詢id爲1的學生參見的課程的時候,能夠查到數學課。搜索引擎
GET /class_test/class_test/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"student.id": "1"
}
}
]
}
}
}複製代碼
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.2876821,
"hits" : [
{
"_index" : "class_test",
"_type" : "class_test",
"_id" : "ijfJ5GoBJeNZPNCWykLR",
"_score" : 0.2876821,
"_source" : {
"id" : "1",
"name" : "數學課",
"student" : [
{
"id" : "1",
"name" : "張三"
},
{
"id" : "2",
"name" : "李四"
}
]
}
}
]
}
}
複製代碼
當咱們查名字叫張三的學生參加的課程的時候,也能查到數學課。spa
GET /class_test/class_test/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"student.name": "張三"
}
}
]
}
}
}複製代碼
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.5753642,
"hits" : [
{
"_index" : "class_test",
"_type" : "class_test",
"_id" : "ijfJ5GoBJeNZPNCWykLR",
"_score" : 0.5753642,
"_source" : {
"id" : "1",
"name" : "數學課",
"student" : [
{
"id" : "1",
"name" : "張三"
},
{
"id" : "2",
"name" : "李四"
}
]
}
}
]
}
}複製代碼
可是當咱們查詢id爲1而且名字叫李四的學生參加的課程時code
GET /class_test/class_test/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"student.name": "李四"
}
},
{
"match": {
"student.id": "1"
}
}
]
}
}
}複製代碼
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 0.8630463,
"hits" : [
{
"_index" : "class_test",
"_type" : "class_test",
"_id" : "ijfJ5GoBJeNZPNCWykLR",
"_score" : 0.8630463,
"_source" : {
"id" : "1",
"name" : "數學課",
"student" : [
{
"id" : "1",
"name" : "張三"
},
{
"id" : "2",
"name" : "李四"
}
]
}
}
]
}
}
複製代碼
咱們發現,出來的結果也是數學課,這就有點奇怪,由於並無一個id爲1而且名字是李四的學生,那就不該該有這麼課。這是怎麼回事?原來在es內部,object數組類型會被打平,簡單來講咱們輸入的數組,實際存儲的類型是:
"student.id":[1,2],
"student.name":[張三,李四]複製代碼
因此倒排索引的創建,也是按照這種打平的邏輯。這個時候咱們能夠藉助Elasticsearch內的嵌套類型來解決問題。
和2中相似的,咱們須要建一個測試索引,名字爲class,不一樣的是student有了type字段,爲 "type":"nested"。
PUT /class
{
"mappings":{
"class": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"analyzer": "ik_max_word",
"type": "text"
},
"type":{
"type":"keyword"
},
"student":{
"type":"nested",
"properties": {
"name":{
"analyzer": "ik_max_word",
"type": "text"
},
"id":{
"type":"keyword"
}
}
}
}
}
},
"settings":{
"index": {
"refresh_interval": "1s",
"number_of_shards": 5,
"max_result_window": "10000000",
"mapper": {
"dynamic": "false"
},
"number_of_replicas": 0
}
}
}複製代碼
咱們導入相同的數據,而後用搜索id爲1而且名字爲李四的學生的課程,這個時候咱們看到搜索結果爲空:
GET /class/class/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "student",
"query": {
"bool": {"must": [
{
"match": {
"student.name": "李四"
}
},
{
"match": {
"student.id": "1"
}
}
]}
}
}
}
]
}
}
}複製代碼
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
複製代碼
其實解決這種跨domain的搜索還有一些其餘方式,對於嵌套類型,實際上是很是消耗Elasticsearch的性能的,咱們能夠選擇將須要搜索字段的值打平存一個字段,或者對學生單獨創建一個索引,而後去學生-班級映射關係表查詢班級。這一塊後面有機會再作介紹。