This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e1a1a04c2f [Enhancement](Doe) Be query es use fe generate dsl. (#11840) e1a1a04c2f is described below commit e1a1a04c2fc9a6d7eb10e7fe8cbe04cd870f849c Author: Stalary <stal...@163.com> AuthorDate: Thu Aug 18 10:31:17 2022 +0800 [Enhancement](Doe) Be query es use fe generate dsl. (#11840) --- be/src/exec/es/es_scan_reader.h | 1 + be/src/exec/es/es_scroll_query.cpp | 16 +++++++++---- be/src/exec/es_http_scan_node.cpp | 5 ++++ be/src/vec/exec/ves_http_scan_node.cpp | 6 +++++ .../docs/ecosystem/external-table/doris-on-es.md | 2 ++ .../docs/ecosystem/external-table/doris-on-es.md | 2 ++ .../java/org/apache/doris/catalog/EsTable.java | 2 +- .../main/java/org/apache/doris/common/Config.java | 6 +++++ .../doris/external/elasticsearch/EsUtil.java | 27 ++++++++++++++++++---- .../java/org/apache/doris/planner/EsScanNode.java | 17 ++++++++------ .../doris/external/elasticsearch/EsUtilTest.java | 15 ++++++++---- 11 files changed, 78 insertions(+), 21 deletions(-) diff --git a/be/src/exec/es/es_scan_reader.h b/be/src/exec/es/es_scan_reader.h index f162dc3bca..79042838af 100644 --- a/be/src/exec/es/es_scan_reader.h +++ b/be/src/exec/es/es_scan_reader.h @@ -41,6 +41,7 @@ public: static constexpr const char* KEY_TERMINATE_AFTER = "limit"; static constexpr const char* KEY_DOC_VALUES_MODE = "doc_values_mode"; static constexpr const char* KEY_HTTP_SSL_ENABLED = "http_ssl_enabled"; + static constexpr const char* KEY_QUERY_DSL = "query_dsl"; ESScanReader(const std::string& target, const std::map<std::string, std::string>& props, bool doc_value_mode); ~ESScanReader(); diff --git a/be/src/exec/es/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp index 8123139a6c..cefa935a6c 100644 --- a/be/src/exec/es/es_scroll_query.cpp +++ b/be/src/exec/es/es_scroll_query.cpp @@ -68,10 +68,18 @@ std::string ESScrollQueryBuilder::build(const std::map<std::string, std::string> // generate the filter clause rapidjson::Document scratch_document; rapidjson::Value query_node(rapidjson::kObjectType); - query_node.SetObject(); - BooleanQueryBuilder::to_query(predicates, &scratch_document, &query_node); - // note: add `query` for this value.... - es_query_dsl.AddMember("query", query_node, allocator); + // use fe generate dsl, it must be placed outside the if, otherwise it will cause problems in AddMember + rapidjson::Document fe_query_dsl; + if (properties.find(ESScanReader::KEY_QUERY_DSL) != properties.end()) { + auto query_dsl = properties.at(ESScanReader::KEY_QUERY_DSL); + es_query_dsl.AddMember("query", fe_query_dsl.Parse(query_dsl.c_str(), query_dsl.length()), + allocator); + } else { + query_node.SetObject(); + BooleanQueryBuilder::to_query(predicates, &scratch_document, &query_node); + // note: add `query` for this value.... + es_query_dsl.AddMember("query", query_node, allocator); + } bool pure_docvalue = true; // Doris FE already has checked docvalue-scan optimization diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index ec23253a5b..680f18c75f 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -125,6 +125,11 @@ Status EsHttpScanNode::open(RuntimeState* state) { SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); + if (_properties.find(ESScanReader::KEY_QUERY_DSL) != _properties.end()) { + RETURN_IF_ERROR(start_scanners()); + return Status::OK(); + } + // if conjunct is constant, compute direct and set eos = true for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { diff --git a/be/src/vec/exec/ves_http_scan_node.cpp b/be/src/vec/exec/ves_http_scan_node.cpp index 3a9a1ef673..b05ca0d8e7 100644 --- a/be/src/vec/exec/ves_http_scan_node.cpp +++ b/be/src/vec/exec/ves_http_scan_node.cpp @@ -124,6 +124,12 @@ Status VEsHttpScanNode::open(RuntimeState* state) { SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); RETURN_IF_CANCELLED(state); + // fe by enable_new_es_dsl to control whether to generate DSL for easy rollback. After the code is stable, can delete the be generation logic + if (_properties.find(ESScanReader::KEY_QUERY_DSL) != _properties.end()) { + RETURN_IF_ERROR(start_scanners()); + return Status::OK(); + } + // if conjunct is constant, compute direct and set eos = true for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { diff --git a/docs/en/docs/ecosystem/external-table/doris-on-es.md b/docs/en/docs/ecosystem/external-table/doris-on-es.md index 44cff17f11..77dc7beb92 100644 --- a/docs/en/docs/ecosystem/external-table/doris-on-es.md +++ b/docs/en/docs/ecosystem/external-table/doris-on-es.md @@ -155,6 +155,8 @@ Parameter | Description An important ability of `Doris On ES` is the push-down of filter conditions: The filtering conditions are pushed to ES, so that only the data that really meets the conditions will be returned, which can significantly improve query performance and reduce CPU, memory, and IO utilization of Doris and ES +`enable_new_es_dsl`Represents whether to use the new dsl generation logic, subsequent bug fixes and iterations are development in the new dsl, default to `true`, can be changed in `fe.conf` + The following operators (Operators) will be optimized to the following ES Query: | SQL syntax | ES 5.x+ syntax | diff --git a/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md b/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md index c170f185b5..99cdffcac4 100644 --- a/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md +++ b/docs/zh-CN/docs/ecosystem/external-table/doris-on-es.md @@ -152,6 +152,8 @@ PROPERTIES ( ##### 过滤条件下推 `Doris On ES`一个重要的功能就是过滤条件的下推: 过滤条件下推给ES,这样只有真正满足条件的数据才会被返回,能够显著的提高查询性能和降低Doris和Elasticsearch的CPU、memory、IO使用量 +`enable_new_es_dsl`代表是否使用新版dsl生成逻辑, 后续 bug 修复和迭代都在新版dsl开发, 默认为`true`, 可在`fe.conf`中进行修改 + 下面的操作符(Operators)会被优化成如下ES Query: | SQL syntax | ES 5.x+ syntax | diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java index e7a1123aa1..80cc1dff02 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/EsTable.java @@ -66,7 +66,7 @@ public class EsTable extends Table { public static final String MAX_DOCVALUE_FIELDS = "max_docvalue_fields"; public static final String NODES_DISCOVERY = "nodes_discovery"; public static final String HTTP_SSL_ENABLED = "http_ssl_enabled"; - public static final String ES_DSL = "es_dsl"; + public static final String QUERY_DSL = "query_dsl"; private static final Logger LOG = LogManager.getLogger(EsTable.class); // Solr doc_values vs stored_fields performance-smackdown indicate: diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index b1f2eff13a..f8243f2a39 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1738,4 +1738,10 @@ public class Config extends ConfigBase { */ @ConfField(mutable = true, masterOnly = true) public static boolean enable_array_type = false; + + /** + * Use new fe generate es dsl. + */ + @ConfField(mutable = true) + public static boolean enable_new_es_dsl = true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java index 692b129e77..33dd1ce97d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/elasticsearch/EsUtil.java @@ -234,6 +234,13 @@ public class EsUtil { } } + private static Expr exprWithoutCast(Expr expr) { + if (expr instanceof CastExpr) { + return exprWithoutCast(expr.getChild(0)); + } + return expr; + } + public static QueryBuilder toEsDsl(Expr expr) { return toEsDsl(expr, new ArrayList<>()); } @@ -250,12 +257,22 @@ public class EsUtil { return toCompoundEsDsl(expr, notPushDownList); } TExprOpcode opCode = expr.getOpcode(); - // Cast can not pushdown - if (expr.getChild(0) instanceof CastExpr || expr.getChild(1) instanceof CastExpr) { - notPushDownList.add(expr); - return null; + String column; + Expr leftExpr = expr.getChild(0); + // Type transformed cast can not pushdown + if (leftExpr instanceof CastExpr) { + Expr withoutCastExpr = exprWithoutCast(leftExpr); + // pushdown col(float) >= 3 + if (withoutCastExpr.getType().equals(leftExpr.getType()) || (withoutCastExpr.getType().isFloatingPointType() + && leftExpr.getType().isFloatingPointType())) { + column = ((SlotRef) withoutCastExpr).getColumnName(); + } else { + notPushDownList.add(expr); + return null; + } + } else { + column = ((SlotRef) leftExpr).getColumnName(); } - String column = ((SlotRef) expr.getChild(0)).getColumnName(); if (expr instanceof BinaryPredicate) { Object value = toDorisLiteral(expr.getChild(1)); switch (opCode) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java index db0f98a65c..f68bb42aae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -28,6 +28,7 @@ import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.RangePartitionInfo; import org.apache.doris.catalog.external.EsExternalTable; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.common.UserException; import org.apache.doris.external.elasticsearch.EsShardPartitions; import org.apache.doris.external.elasticsearch.EsShardRouting; @@ -105,6 +106,7 @@ public class EsScanNode extends ScanNode { computeColumnFilter(); assignBackends(); computeStats(analyzer); + buildQuery(); } @Override @@ -162,7 +164,6 @@ public class EsScanNode extends ScanNode { @SneakyThrows @Override protected void toThrift(TPlanNode msg) { - buildQuery(); msg.node_type = TPlanNodeType.ES_HTTP_SCAN_NODE; Map<String, String> properties = Maps.newHashMap(); if (table.getUserName() != null) { @@ -173,15 +174,17 @@ public class EsScanNode extends ScanNode { } properties.put(EsTable.HTTP_SSL_ENABLED, String.valueOf(table.isHttpSslEnabled())); TEsScanNode esScanNode = new TEsScanNode(desc.getId().asInt()); - esScanNode.setProperties(properties); if (table.isEnableDocValueScan()) { esScanNode.setDocvalueContext(table.docValueContext()); properties.put(EsTable.DOC_VALUES_MODE, String.valueOf(useDocValueScan(desc, table.docValueContext()))); } - properties.put(EsTable.ES_DSL, queryBuilder.toJson()); + if (Config.enable_new_es_dsl) { + properties.put(EsTable.QUERY_DSL, queryBuilder.toJson()); + } if (table.isEnableKeywordSniff() && table.fieldsContext().size() > 0) { esScanNode.setFieldsContext(table.fieldsContext()); } + esScanNode.setProperties(properties); msg.es_scan_node = esScanNode; } @@ -339,11 +342,8 @@ public class EsScanNode extends ScanNode { if (!conjuncts.isEmpty()) { output.append(prefix).append("LOCAL_PREDICATES: ").append(getExplainString(conjuncts)).append("\n"); - buildQuery(); - output.append(prefix).append("REMOTE_PREDICATES: ").append(queryBuilder.toJson()).append("\n"); - } else { - output.append(prefix).append("REMOTE_PREDICATES: ").append("{\"match_all\": {}}").append("\n"); } + output.append(prefix).append("REMOTE_PREDICATES: ").append(queryBuilder.toJson()).append("\n"); String indexName = table.getIndexName(); String typeName = table.getMappingType(); output.append(prefix).append(String.format("ES index/type: %s/%s", indexName, typeName)).append("\n"); @@ -369,6 +369,9 @@ public class EsScanNode extends ScanNode { } else { queryBuilder = boolQueryBuilder; } + if (Config.enable_new_es_dsl) { + conjuncts.removeIf(expr -> !notPushDownList.contains(expr)); + } } } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/EsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/EsUtilTest.java index 435dc7c32e..9a00b58b4d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/EsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/EsUtilTest.java @@ -30,10 +30,10 @@ import org.apache.doris.analysis.IsNullPredicate; import org.apache.doris.analysis.LikePredicate; import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.StringLiteral; -import org.apache.doris.analysis.TypeDef; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.EsTable; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; import org.apache.doris.common.ExceptionChecker; import mockit.Expectations; @@ -256,10 +256,9 @@ public class EsUtilTest extends EsTestCase { @Test public void testCastConvertEsDsl() { - SlotRef k1 = new SlotRef(null, "k1"); FloatLiteral floatLiteral = new FloatLiteral(3.14); - CastExpr castExpr = new CastExpr(TypeDef.create(PrimitiveType.INT), floatLiteral); - BinaryPredicate castPredicate = new BinaryPredicate(Operator.EQ, k1, castExpr); + CastExpr castExpr = new CastExpr(Type.INT, floatLiteral); + BinaryPredicate castPredicate = new BinaryPredicate(Operator.EQ, castExpr, new IntLiteral(3)); List<Expr> notPushDownList = new ArrayList<>(); Assertions.assertNull(EsUtil.toEsDsl(castPredicate, notPushDownList)); Assertions.assertEquals(1, notPushDownList.size()); @@ -271,6 +270,14 @@ public class EsUtilTest extends EsTestCase { eqPredicate); EsUtil.toEsDsl(compoundPredicate, notPushDownList); Assertions.assertEquals(3, notPushDownList.size()); + + SlotRef k3 = new SlotRef(null, "k3"); + k3.setType(Type.FLOAT); + CastExpr castDoubleExpr = new CastExpr(Type.DOUBLE, k3); + BinaryPredicate castDoublePredicate = new BinaryPredicate(Operator.GE, castDoubleExpr, + new FloatLiteral(3.0, Type.DOUBLE)); + EsUtil.toEsDsl(castDoublePredicate, notPushDownList); + Assertions.assertEquals(3, notPushDownList.size()); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org