Elasticsearch-90-父子关系类型的数据建模与搜索聚合

parent child建模

之前用的nested object的建模方式,有个不好的地方,就是采取类似冗余数据的方式,将多个数据都放在了一起,维护成本比较高

parent child建模方式,采用的是类似数据库三范式的建模,多个实体都分割开来,每个实体之间都通过一些关联的方式,进行了父子关系的关联,各种数据都不需要放在一起,父doc和子doc在更新的时候,都不会影响对方

要点

父子关系元数据映射,是用来保证查询时候的高性能,但是有一个限制,就是父子数据必须存在于同一个shard中

数据存在同一个shard中,而且还有映射其关联关系的元数据,那么搜索父子关系数据的时候,不用跨分片,一个分片本地自己搞定,性能自然就高

建模案例

背景:以研发中心员工管理为案例,一个公司下有多个研发中心,一个研发中心下有多个员工

首先需要手动创建索引,设置mapping

PUT /company
{
  "mappings": {
    "rd_center":{},
    "employee":{
      "_parent": {
        "type": "rd_center"
      }
    }
  }
}

就是创建了一个company的索引,然后创建了两个type,一个是研发中心rd_center,一个是员工employee,员工里面设置了一个_parent,指向了rd_center,这样就建立了研发中心和员工的父子关系

父子关系建模的核心,就是多个type之间有父子关系的话,通过设置_parent指定父type

索引建好之后,添加几个研发中心的数据

POST /company/rd_center/_bulk
{ "index": { "_id": "1" }}
{ "name": "北京研发总部", "city": "北京", "country": "中国" }
{ "index": { "_id": "2" }}
{ "name": "上海研发中心", "city": "上海", "country": "中国" }
{ "index": { "_id": "3" }}
{ "name": "硅谷人工智能实验室", "city": "硅谷", "country": "美国" }

然后,再添加子type的数据

PUT /company/employee/1?parent=1 
{
  "name":  "张三",
  "birthday":   "1970-10-24",
  "hobby": "爬山"
}

这里在添加的时候跟了一个parent的参数,参数值是父doc的id

传了这个值以后就不会根据id是1的这个employee doc去路由了,而是根据id是1的这个父doc的路由规则去路由

parent-child关系,就确保了说,父doc和子doc都是保存在一个shard上的.内部原理还是doc routing,employee和rd_center的数据,都会用parent id作为routing,这样就会到一个shard

_bulk批量添加几条数据进去

POST /company/employee/_bulk
{ "index": { "_id": 2, "parent": "1" }}
{ "name": "李四", "birthday": "1982-05-16", "hobby": "游泳" }
{ "index": { "_id": 3, "parent": "2" }}
{ "name": "王二", "birthday": "1979-04-01", "hobby": "爬山" }
{ "index": { "_id": 4, "parent": "3" }}
{ "name": "赵五", "birthday": "1987-05-11", "hobby": "骑马" }

搜索聚合案例

建立好父子关系的数据模型之后,就要基于这个模型进行各种搜索和聚合了

搜索一

需求: 搜索1980年以后出生的员工的研发中心

GET /company/rd_center/_search
{
  "query": {
    "has_child": {
      "type": "employee",
      "query": {
        "range": {
          "birthday": {
            "gte": "1980-01-01"
          }
        }
      }
    }
  }
}

返回值:

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 1,
    "hits": [
      {
        "_index": "company",
        "_type": "rd_center",
        "_id": "1",
        "_score": 1,
        "_source": {
          "name": "北京研发总部",
          "city": "北京",
          "country": "中国"
        }
      },
      {
        "_index": "company",
        "_type": "rd_center",
        "_id": "3",
        "_score": 1,
        "_source": {
          "name": "硅谷人工智能实验室",
          "city": "硅谷",
          "country": "美国"
        }
      }
    ]
  }
}

就是搜索的父级类型,然后用has_child,下面设置child的type名称,然后查询就ok了

搜索二

需求: 搜索有姓名叫张三的员工的研发中心

GET /company/rd_center/_search
{
  "query": {
    "has_child": {
      "type": "employee",
      "query": {
        "match": {
          "name": "张三"
        }
      }
    }
  }
}

返回值:

{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "company",
        "_type": "rd_center",
        "_id": "1",
        "_score": 1,
        "_source": {
          "name": "北京研发总部",
          "city": "北京",
          "country": "中国"
        }
      }
    ]
  }
}

搜索三

需求: 搜索至少两个员工以上的研发中心

GET /company/rd_center/_search
{
  "query": {
    "has_child": {
      "type": "employee",
      "min_children": 2, 
      "query": {
        "match_all": {}
      }
    }
  }
}

返回值:

{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "company",
        "_type": "rd_center",
        "_id": "1",
        "_score": 1,
        "_source": {
          "name": "北京研发总部",
          "city": "北京",
          "country": "中国"
        }
      }
    ]
  }
}

搜索四

需求: 搜索在中国的研发中心的员工

GET /company/employee/_search
{
  "query": {
    "has_parent": {
      "parent_type": "rd_center",
      "query": {
        "match": {
          "country.keyword": "中国"
        }
      }
    }
  }
}

返回值:

{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 1,
    "hits": [
      {
        "_index": "company",
        "_type": "employee",
        "_id": "3",
        "_score": 1,
        "_routing": "2",
        "_parent": "2",
        "_source": {
          "name": "王二",
          "birthday": "1979-04-01",
          "hobby": "爬山"
        }
      },
      {
        "_index": "company",
        "_type": "employee",
        "_id": "1",
        "_score": 1,
        "_routing": "1",
        "_parent": "1",
        "_source": {
          "name": "张三",
          "birthday": "1970-10-24",
          "hobby": "爬山"
        }
      },
      {
        "_index": "company",
        "_type": "employee",
        "_id": "2",
        "_score": 1,
        "_routing": "1",
        "_parent": "1",
        "_source": {
          "name": "李四",
          "birthday": "1982-05-16",
          "hobby": "游泳"
        }
      }
    ]
  }
}

聚合

需求: 对每个国家的员工的兴趣爱好进行统计.

先对国家划分bucket,然后再进行兴趣爱好划分bucket

GET /company/rd_center/_search
{
  "size": 0,
  "aggs": {
    "group_by_country": {
      "terms": {
        "field": "country.keyword"
      },
      "aggs": {
        "group_by_child_employee": {
          "children": {
            "type": "employee"
          },
          "aggs": {
            "group_by_hobby": {
              "terms": {
                "field": "hobby.keyword"
              }
            }
          }
        }
      }
    }
  }
}

返回值:

{
  "took": 12,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_country": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "中国",
          "doc_count": 2,
          "group_by_child_employee": {
            "doc_count": 3,
            "group_by_hobby": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": "爬山",
                  "doc_count": 2
                },
                {
                  "key": "游泳",
                  "doc_count": 1
                }
              ]
            }
          }
        },
        {
          "key": "美国",
          "doc_count": 1,
          "group_by_child_employee": {
            "doc_count": 1,
            "group_by_hobby": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": "骑马",
                  "doc_count": 1
                }
              ]
            }
          }
        }
      ]
    }
  }
}

parent child建模

要点

建模案例

搜索 聚合案例

搜索一

搜索二

搜索三

搜索四

聚合

搜索聚合案例