#ruby-on-rails #elasticsearch #elasticsearch-percolate
Вопрос:
Смотрите обновление ниже.
Я работаю над обновлением наших ES с 5.6 до 6.8 в приложении Ruby on Rails (Rails: 5.2.6, Ruby: 2.5.8). Большое изменение, которое я внес, заключалось в объединении многотипных сопоставлений (типы: тема, сообщение) в индексе тем в один тип сопоставления (тема). Наряду с этим я обновил старые отношения родитель/потомок с новым полем соединения (parent_mapping) и полем пользовательского типа, чтобы имитировать старое поведение (как предложено в документации ES).
Вот текущее сопоставление индекса тем с пользовательским типом и полем объединения
{
"topics" : {
"mappings" : {
"topic" : {
"properties" : {
"answered_post_id" : {
"type" : "integer"
},
"author_host" : {
"type" : "keyword"
},
"author_name" : {
"type" : "keyword"
},
"awaiting_moderation" : {
"type" : "boolean"
},
"boosted" : {
"type" : "boolean"
},
"by_employee" : {
"type" : "boolean"
},
"content_updated_at" : {
"type" : "date"
},
"created_at" : {
"type" : "date"
},
"deleted" : {
"type" : "integer"
},
"deletion_reason_id" : {
"type" : "integer"
},
"engagement_count" : {
"type" : "integer"
},
"first_post_text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"forum_id" : {
"type" : "integer"
},
"forum_tags" : {
"type" : "integer"
},
"forum_type" : {
"type" : "keyword"
},
"from_vendor_page" : {
"type" : "boolean"
},
"has_poll" : {
"type" : "boolean"
},
"hotness" : {
"type" : "float"
},
"id" : {
"type" : "integer"
},
"is_best_answer" : {
"type" : "boolean"
},
"is_denied_forum" : {
"type" : "boolean"
},
"is_helpful_post" : {
"type" : "boolean"
},
"last_editor_id" : {
"type" : "integer"
},
"last_post_author" : {
"type" : "keyword"
},
"last_post_created_at" : {
"type" : "date"
},
"last_post_text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"last_post_user_id" : {
"type" : "integer"
},
"locked" : {
"type" : "boolean"
},
"muted" : {
"type" : "boolean"
},
"needs_answer" : {
"type" : "boolean"
},
"non_it" : {
"type" : "boolean"
},
"not_a_vendor" : {
"type" : "boolean"
},
"parent_id" : {
"type" : "integer"
},
"parent_mapping" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"topic" : "post"
}
},
"percolator_query" : {
"type" : "percolator"
},
"post_counter" : {
"type" : "integer"
},
"post_method" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"post_votes_count" : {
"type" : "integer"
},
"primary_text" : {
"type" : "text",
"fields" : {
"autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
},
"english" : {
"type" : "text",
"analyzer" : "folded_english"
}
},
"analyzer" : "standard"
},
"private" : {
"type" : "boolean"
},
"ranking" : {
"type" : "integer"
},
"resource_type" : {
"type" : "text"
},
"root_post" : {
"type" : "boolean"
},
"root_post_id" : {
"type" : "integer"
},
"searchable" : {
"type" : "boolean"
},
"secondary_text" : {
"type" : "text",
"fields" : {
"english" : {
"type" : "text",
"analyzer" : "folded_english"
}
},
"analyzer" : "standard"
},
"spotlight" : {
"type" : "boolean"
},
"stripped_text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"subject" : {
"type" : "text",
"fields" : {
"autocomplete" : {
"type" : "text",
"analyzer" : "autocomplete",
"search_analyzer" : "standard"
},
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"text" : {
"type" : "text",
"fields" : {
"letters" : {
"type" : "text",
"analyzer" : "letters"
},
"standard" : {
"type" : "text",
"analyzer" : "standard_no_html",
"search_analyzer" : "standard"
},
"synonym" : {
"type" : "text",
"analyzer" : "synonym_snowball"
}
},
"analyzer" : "snowball"
},
"topic_id" : {
"type" : "integer"
},
"type" : {
"type" : "keyword"
},
"unanswered" : {
"type" : "boolean"
},
"updated_at" : {
"type" : "date"
},
"user_id" : {
"type" : "integer"
},
"version" : {
"type" : "integer"
}
}
}
}
}
}
В этом индексе есть поле перколятора, используемое для оповещений. Проблема, с которой я сталкиваюсь, заключается в том, что когда документ индексируется в индексе тем (пользовательский тип: post, parent_mapping: {имя: сообщение, идентификатор родителя}), также создается перколятор с помощью gem elasticsearch-api (версия 6.0.3) .метод поиска с полезной нагрузкой выглядит примерно так:
{
index: topics,
body: {
query: {
percolate: {
field: 'percolator_query',
index: 'topics',
type: 'topic',
id: 123,
}
}
}
}
At this point, there’s a document in the index which holds the query to match against when a new doc is indexed that looks something like this:
{
"_index" : "topics",
"_type" : "topic",
"_id" : "Alert:49",
"_score" : 1.0,
"_source" : {
"percolator_query" : {
"bool" : {
"should" : [
{
"query_string" : {
"boost" : 2,
"fields" : [
"stripped_text.standard"
],
"query" : "capybara"
}
},
{
"query_string" : {
"analyzer" : "standard",
"fields" : [
"stripped_text"
],
"query" : "capybara"
}
},
{
"bool" : {
"minimum_should_match" : 1,
"must" : [
{
"term" : {
"root_post" : true
}
}
],
"should" : [
{
"query_string" : {
"boost" : 2,
"fields" : [
"subject.standard"
],
"query" : "capybara"
}
},
{
"query_string" : {
"analyzer" : "standard",
"fields" : [
"subject"
],
"query" : "capybara"
}
}
]
}
}
],
"filter" : {
"bool" : {
"must" : [
{
"term" : {
"private" : false
}
},
{
"term" : {
"deleted" : 0
}
}
],
"must_not" : [
{
"term" : {
"awaiting_moderation" : true
}
},
{
"term" : {
"user_id" : 52342
}
}
]
}
}
}
}
}
}
So when I call the es.client.search { ... percolate: { field: ... }
method, I get the following error: {"type":"illegal_argument_exception","reason":"[routing] is missing for join field [parent_mapping]"}
.
here’s the full, somewhat unformatted error thrown by ES:
"Elasticsearch"::"Transport"::"Transport"::"Errors"::"BadRequest":[
400
]{
"error":{
"root_cause":[
{
"type":"mapper_parsing_exception",
"reason":"failed to parse"
}
],
"type":"search_phase_execution_exception",
"reason":"all shards failed",
"phase":"query",
"grouped":true,
"failed_shards":[
{
"shard":0,
"index":"2021-08-24-ae9a89e1ab242572_topics",
"node":"uHaQHJi0QcCSocaTGwH44w",
"reason":{
"type":"query_shard_exception",
"reason":"failed to create query: {n "percolate" : {n "document_type" : null,n "field" : "percolator_query",n "documents" : [n {n "id" : 1054172433,n "created_at" : "2021-08-24T18:42:23Z",n "updated_at" : "2021-08-24T18:44:56Z",n "deleted" : 0,n "author_host" : null,n "subject" : "The topic of discussion 1",n "text" : "The topic of discussion 1",n "post_method" : "web",n "post_votes_count" : 0,n "moderation_status" : null,n "content_updated_at" : null,n "version" : 1,n "muted" : false,n "forum_type" : "GroupForum",n "forum_id" : 1073075633,n "is_denied_forum" : false,n "private" : false,n "type" : "post",n "awaiting_moderation" : false,n "root_post" : true,n "author_name" : "user1",n "not_a_vendor" : true,n "stripped_text" : "The topic of discussion 1",n "is_helpful_post" : false,n "is_best_answer" : false,n "parent_mapping" : {n "name" : "post",n "parent" : 1017419981n },n "topic_id" : 1017419981,n "parent_id" : null,n "user_id" : 1059399168,n "last_editor_id" : null,n "deletion_reason_id" : nulln }n ],n "boost" : 1.0n }n}",
"index_uuid":"viXlvCPzRGOdjIS8y6NBHg",
"index":"2021-08-24-ae9a89e1ab242572_topics",
"caused_by":{
"type":"mapper_parsing_exception",
"reason":"failed to parse",
"caused_by":{
"type":"illegal_argument_exception",
"reason":"[routing] is missing for join field [parent_mapping]"
}
}
}
}
],
"caused_by":{
"type":"mapper_parsing_exception",
"reason":"failed to parse",
"caused_by":{
"type":"illegal_argument_exception",
"reason":"[routing] is missing for join field [parent_mapping]"
}
}
},
"status":400
}
I’ve tried adding the routing key with the parent doc id, as well as something like body: ... { document: {name: 'post', parent: parent_id}
inside the payload to the .search call. and still keep getting the same error. I’m running through the code and reading up on percolator changes but I’m running out of ideas. This part of the codebase is new to me and ES is fairly new to me as well so I’m sure there’s something I’m missing. I haven’t been able to find a good example of how percolators can work with indexes that use the join field to create a parent/child relationship so a link to that if it exists would help for sure.
Thanks in advance for any suggestions or help. I’m happy to provide more information and context if needed.
UPDATE:
I was able to narrow down the problem in a much simpler example. I also got passed the routing issue by adding "document": { "type": "post" }
when I do a percolate search. The reason for the routing field missing message was due to the parent documents in the index not having a ‘_routing’ field as these were not routed anywhere when created (only child documents are routed to be in the same shard as their parent doc). Now the issue I’m having is the percolate query is not finding the document I’m expecting for it to find. Below is the current setup I’m using to recreate the issue directly on ES 6.8.
# create index with percolator and join field
PUT /perc-index?include_type_name=true
{
"mappings": {
"perc" : {
"properties": {
"type": { "type": "keyword" },
"message": { "type": "text" },
"id": { "type": "integer" },
"percolator_query": { "type": "percolator" },
"parent_mapping": {
"type": "join",
"relations": {
"perc": "perc_child"
}
}
}
}
}
}
PUT /perc-index/perc/alert:1
{
"percolator_query": {
"bool": {
"filter": [
{ "match": {
"message": { "query": "capybara" }
}
}
]
}
}
}
# index parent document
PUT /perc-index/perc/1?refresh=true
{
"id": 1,
"type": "perc",
"message": "perc message",
"parent_mapping": "perc"
}
# index child document
PUT /perc-index/perc/80?routing=1
{
"id": 80,
"type": "perc_child",
"message": "perc child capybara",
"parent_mapping": {
"name": "perc_child",
"parent": 1
}
}
GET /perc-index/_search
{
"query": {
"match_all": {}
}
}
#send a percolate through the search api
GET /perc-index/_search
{
"query": {
"percolate": {
"field": "percolator_query",
"type": "perc",
"routing": "1",
"id": "80",
"document": {
"type": "perc_child"
}
}
}
}
Поэтому в приведенном выше примере я создаю новый индекс, а затем сохраняю запрос с идентификатором alert:1
. После этого я создаю родительский и дочерний документ (используя поле объединения и пользовательский тип для имитации старого многотипного сопоставления). После этого я выполняю поиск с помощью поля «Просачивание» и ожидаю, что он вернет дочерний документ с идентификатором 80, но я не получаю никаких результатов.
Поэтому на данный момент я не знаю, чего мне не хватает или что я делаю неправильно, если в ES 6.8 есть реальная ошибка, связанная с этой настройкой.