morningman commented on a change in pull request #4006: URL: https://github.com/apache/incubator-doris/pull/4006#discussion_r449891254
########## File path: fe/src/main/java/org/apache/doris/load/DeleteHandler.java ########## @@ -516,25 +537,22 @@ private void checkDeleteV2(OlapTable table, Partition partition, List<Predicate> } Map<Long, List<Column>> indexIdToSchema = table.getIndexIdToSchema(); for (MaterializedIndex index : partition.getMaterializedIndices(MaterializedIndex.IndexExtState.VISIBLE)) { + if (table.getBaseIndexId() == index.getId()) { + continue; Review comment: Why skipping the base index? If there is no rollup index, the entire check will be skipped. ########## File path: be/src/olap/delete_handler.cpp ########## @@ -66,9 +67,22 @@ OLAPStatus DeleteConditionHandler::generate_delete_predicate( // 存储删除条件 for (const TCondition& condition : conditions) { - string condition_str = construct_sub_predicates(condition); - del_pred->add_sub_predicates(condition_str); - LOG(INFO) << "store one sub-delete condition. condition=" << condition_str; + if (condition.condition_values.size() > 1) { + InPredicatePB* in_pred = del_pred->add_in_predicates(); + in_pred->set_column_name(condition.column_name); + bool is_not_in = condition.condition_op == "!*="; + in_pred->set_is_not_in(is_not_in); + for (const auto& condition_value : condition.condition_values) { + in_pred->add_values(condition_value); + } + string condition_str; + json2pb::ProtoMessageToJson(*in_pred, &condition_str); + LOG(INFO) << "store one sub-delete condition. condition=" << condition_str; Review comment: This log may be very long. the number of conditions and column name is enough. ########## File path: fe/src/main/java/org/apache/doris/qe/SessionVariable.java ########## @@ -248,6 +249,8 @@ private int maxScanKeyNum = -1; @VariableMgr.VarAttr(name = MAX_PUSHDOWN_CONDITIONS_PER_COLUMN) private int maxPushdownConditionsPerColumn = -1; + @VariableMgr.VarAttr(name = MAX_ALLOWED_IN_ELEMENT_NUM_OF_DELETE) + private int maxAllowedInElementNumOfDelete = 1024; Review comment: Is it better be a FE's config instead of a session variable? I think this should be a system-level restriction to prevent users from writing too many conditions, resulting in a reduction in the efficiency of reading and writing the underlying data in storage system. ########## File path: be/src/olap/olap_cond.cpp ########## @@ -57,77 +57,49 @@ using doris::ColumnStatistics; namespace doris { static CondOp parse_op_type(const string& op) { - if (op.size() > 2) { + if (op.size() > 3) { Review comment: Could we defined this magic number somewhere? ########## File path: be/src/olap/olap_cond.cpp ########## @@ -378,20 +347,31 @@ int Cond::del_eval(const std::pair<WrapperField*, WrapperField*>& stat) const { return ret; } case OP_IN: { - FieldSet::const_iterator it = operand_set.begin(); - for (; it != operand_set.end(); ++it) { - if ((*it)->cmp(stat.first) >= 0 - && (*it)->cmp(stat.second) <= 0) { - if (stat.first->cmp(stat.second) == 0) { - ret = DEL_SATISFIED; - } else { - ret = DEL_PARTIAL_SATISFIED; - } - break; + if (stat.first->cmp(stat.second) == 0) { + if (operand_set.find(stat.first) != operand_set.end()) { + ret = DEL_SATISFIED; + } else { + ret = DEL_NOT_SATISFIED; + } + } else { + if ((min_value_field->cmp(stat.first) >= 0 && min_value_field->cmp(stat.second) <= 0) || Review comment: Should this be: `min_value_field->cmp(stat.second) <= 0 && max_value_field->cmp(stat.first) >= 0` ? ########## File path: be/src/olap/olap_cond.h ########## @@ -46,7 +46,8 @@ enum CondOp { OP_GE = 5, // greater or equal OP_IN = 6, // IN OP_IS = 7, // is null or not null - OP_NULL = 8 // invalid OP + OP_NULL = 8, // invalid OP + OP_NOT_IN = 9 // not in Review comment: Normally, the invalid OP should be at the beginning(-1) or at last of the enum. I am not sure if we persist the order of CondOp in storage. If not, I think its better to move the `OP_NULL` to the beginning and set it as -1. ########## File path: be/src/olap/olap_cond.cpp ########## @@ -262,28 +242,17 @@ bool Cond::eval(const std::pair<WrapperField*, WrapperField*>& statistic) const return operand_field->cmp(statistic.second) <= 0; } case OP_IN: { - FieldSet::const_iterator it = operand_set.begin(); - for (; it != operand_set.end(); ++it) { - if ((*it)->cmp(statistic.first) >= 0 - && (*it)->cmp(statistic.second) <= 0) { - return true; - } - } - break; + return (min_value_field->cmp(statistic.first) >= 0 && min_value_field->cmp(statistic.second) <= 0) || Review comment: Should this be: `min_value_field->cmp(stat.second) <= 0 && max_value_field->cmp(stat.first) >= 0` ? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org