This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new a9563c6da81 [Feature](partition) Support OLAP table null partition (#31827) a9563c6da81 is described below commit a9563c6da8130f86fbad086da157d97d2567a9cb Author: zclllyybb <zhaochan...@selectdb.com> AuthorDate: Sun Mar 24 14:19:28 2024 +0800 [Feature](partition) Support OLAP table null partition (#31827) for auto partition, support nullable partition column. for auto list partition, support create real null partition for null values. for auto range partition, null value will raise a error now. but maybe we can improve this in the future --- be/src/exec/tablet_info.cpp | 253 +++++++++++---------- be/src/exec/tablet_info.h | 6 +- be/src/olap/utils.h | 17 +- be/src/vec/columns/column_const.cpp | 2 +- be/src/vec/columns/column_const.h | 85 ++++--- be/src/vec/columns/column_nullable.h | 2 +- be/src/vec/common/assert_cast.h | 13 +- be/src/vec/sink/vrow_distribution.cpp | 60 +++-- be/src/vec/sink/vrow_distribution.h | 5 +- fe/fe-core/src/main/cup/sql_parser.cup | 12 + .../org/apache/doris/analysis/DateLiteral.java | 10 +- .../org/apache/doris/analysis/LiteralExpr.java | 1 + .../java/org/apache/doris/analysis/MaxLiteral.java | 1 + .../org/apache/doris/analysis/PartitionDesc.java | 4 - .../apache/doris/analysis/PartitionExprUtil.java | 77 ++++--- .../org/apache/doris/analysis/PartitionValue.java | 6 +- .../org/apache/doris/catalog/PartitionKey.java | 3 +- .../doris/nereids/parser/PartitionTableInfo.java | 4 - .../rules/OneRangePartitionEvaluator.java | 2 +- .../rules/expression/rules/PartitionPruner.java | 9 +- .../expressions/literal/DateTimeV2Literal.java | 5 + .../org/apache/doris/planner/OlapTableSink.java | 9 +- .../apache/doris/service/FrontendServiceImpl.java | 23 +- .../doris/service/FrontendServiceImplTest.java | 14 +- gensrc/thrift/Exprs.thrift | 5 + gensrc/thrift/FrontendService.thrift | 2 +- .../auto_partition/test_auto_list_partition.out | 52 +++++ .../test_auto_partition_behavior.out | 9 + .../auto_partition/test_auto_range_partition.out | 4 + .../auto_partition/test_auto_list_partition.groovy | 50 ++-- .../test_auto_partition_behavior.groovy | 93 +++----- .../auto_partition/test_auto_partition_load.groovy | 38 ++-- .../test_auto_range_partition.groovy | 59 +++-- .../test_dynamic_partition_with_rename.groovy | 17 +- .../test_multi_column_partition.groovy | 1 + .../partition_p0/test_datev2_partition.groovy | 26 +-- 36 files changed, 567 insertions(+), 412 deletions(-) diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 061cc7b6681..ab8c3d562e7 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -32,6 +32,7 @@ #include <tuple> #include "common/exception.h" +#include "common/logging.h" #include "common/status.h" #include "olap/tablet_schema.h" #include "runtime/define_primitive_type.h" @@ -44,7 +45,6 @@ #include "util/string_parser.hpp" #include "util/string_util.h" #include "vec/columns/column.h" -#include "vec/columns/column_nullable.h" // NOLINTNEXTLINE(unused-includes) #include "vec/exprs/vexpr_context.h" #include "vec/exprs/vliteral.h" @@ -55,19 +55,19 @@ namespace doris { void OlapTableIndexSchema::to_protobuf(POlapTableIndexSchema* pindex) const { pindex->set_id(index_id); pindex->set_schema_hash(schema_hash); - for (auto slot : slots) { + for (auto* slot : slots) { pindex->add_columns(slot->col_name()); } - for (auto column : columns) { + for (auto* column : columns) { column->to_schema_pb(pindex->add_columns_desc()); } - for (auto index : indexes) { + for (auto* index : indexes) { index->to_schema_pb(pindex->add_indexes_desc()); } } -bool VOlapTablePartKeyComparator::operator()(const BlockRowWithIndicator lhs, - const BlockRowWithIndicator rhs) const { +bool VOlapTablePartKeyComparator::operator()(const BlockRowWithIndicator& lhs, + const BlockRowWithIndicator& rhs) const { vectorized::Block* l_block = std::get<0>(lhs); vectorized::Block* r_block = std::get<0>(rhs); int32_t l_row = std::get<1>(lhs); @@ -100,15 +100,6 @@ bool VOlapTablePartKeyComparator::operator()(const BlockRowWithIndicator lhs, for (int i = 0; i < _slot_locs.size(); i++) { vectorized::ColumnPtr l_col = l_block->get_by_position((*l_index)[i]).column; vectorized::ColumnPtr r_col = r_block->get_by_position((*r_index)[i]).column; - //TODO: when we support any function for transform, maybe the best way is refactor all doris' functions to its essential nullable mode. - if (auto* nullable = - vectorized::check_and_get_column<vectorized::ColumnNullable>(l_col)) { - l_col = nullable->get_nested_column_ptr(); - } - if (auto* nullable = - vectorized::check_and_get_column<vectorized::ColumnNullable>(r_col)) { - r_col = nullable->get_nested_column_ptr(); - } auto res = l_col->compare_at(l_row, r_row, *r_col, -1); if (res != 0) { @@ -131,14 +122,14 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { _auto_increment_column = pschema.auto_increment_column(); } - for (auto& col : pschema.partial_update_input_columns()) { + for (const auto& col : pschema.partial_update_input_columns()) { _partial_update_input_columns.insert(col); } std::unordered_map<std::pair<std::string, FieldType>, SlotDescriptor*> slots_map; _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc())); - for (auto& p_slot_desc : pschema.slot_descs()) { - auto slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc)); + for (const auto& p_slot_desc : pschema.slot_descs()) { + auto* slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc)); _tuple_desc->add_slot(slot_desc); string data_type; EnumToString(TPrimitiveType, to_thrift(slot_desc->col_type()), data_type); @@ -147,13 +138,13 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { slot_desc); } - for (auto& p_index : pschema.indexes()) { - auto index = _obj_pool.add(new OlapTableIndexSchema()); + for (const auto& p_index : pschema.indexes()) { + auto* index = _obj_pool.add(new OlapTableIndexSchema()); index->index_id = p_index.id(); index->schema_hash = p_index.schema_hash(); - for (auto& pcolumn_desc : p_index.columns_desc()) { + for (const auto& pcolumn_desc : p_index.columns_desc()) { if (!_is_partial_update || - _partial_update_input_columns.count(pcolumn_desc.name()) > 0) { + _partial_update_input_columns.contains(pcolumn_desc.name())) { auto it = slots_map.find(std::make_pair( to_lower(pcolumn_desc.name()), TabletColumn::get_field_type_by_string(pcolumn_desc.type()))); @@ -167,7 +158,7 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { tc->init_from_pb(pcolumn_desc); index->columns.emplace_back(tc); } - for (auto& pindex_desc : p_index.indexes_desc()) { + for (const auto& pindex_desc : p_index.indexes_desc()) { TabletIndex* ti = _obj_pool.add(new TabletIndex()); ti->init_from_pb(pindex_desc); index->indexes.emplace_back(ti); @@ -194,26 +185,26 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { _auto_increment_column = tschema.auto_increment_column; } - for (auto& tcolumn : tschema.partial_update_input_columns) { + for (const auto& tcolumn : tschema.partial_update_input_columns) { _partial_update_input_columns.insert(tcolumn); } std::unordered_map<std::pair<std::string, PrimitiveType>, SlotDescriptor*> slots_map; _tuple_desc = _obj_pool.add(new TupleDescriptor(tschema.tuple_desc)); - for (auto& t_slot_desc : tschema.slot_descs) { - auto slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc)); + for (const auto& t_slot_desc : tschema.slot_descs) { + auto* slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc)); _tuple_desc->add_slot(slot_desc); slots_map.emplace(std::make_pair(to_lower(slot_desc->col_name()), slot_desc->col_type()), slot_desc); } - for (auto& t_index : tschema.indexes) { + for (const auto& t_index : tschema.indexes) { std::unordered_map<std::string, int32_t> index_slots_map; - auto index = _obj_pool.add(new OlapTableIndexSchema()); + auto* index = _obj_pool.add(new OlapTableIndexSchema()); index->index_id = t_index.id; index->schema_hash = t_index.schema_hash; - for (auto& tcolumn_desc : t_index.columns_desc) { + for (const auto& tcolumn_desc : t_index.columns_desc) { if (!_is_partial_update || - _partial_update_input_columns.count(tcolumn_desc.column_name) > 0) { + _partial_update_input_columns.contains(tcolumn_desc.column_name)) { auto it = slots_map.find( std::make_pair(to_lower(tcolumn_desc.column_name), thrift_to_type(tcolumn_desc.column_type.type))); @@ -230,7 +221,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { index->columns.emplace_back(tc); } if (t_index.__isset.indexes_desc) { - for (auto& tindex_desc : t_index.indexes_desc) { + for (const auto& tindex_desc : t_index.indexes_desc) { std::vector<int32_t> column_unique_ids(tindex_desc.columns.size()); for (size_t i = 0; i < tindex_desc.columns.size(); i++) { auto it = index_slots_map.find(to_lower(tindex_desc.columns[i])); @@ -268,10 +259,10 @@ void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const { *pschema->add_partial_update_input_columns() = col; } _tuple_desc->to_protobuf(pschema->mutable_tuple_desc()); - for (auto slot : _tuple_desc->slots()) { + for (auto* slot : _tuple_desc->slots()) { slot->to_protobuf(pschema->add_slot_descs()); } - for (auto index : _indexes) { + for (auto* index : _indexes) { index->to_protobuf(pschema->add_indexes()); } } @@ -289,11 +280,6 @@ VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr<OlapTableSche _slots(_schema->tuple_desc()->slots()), _mem_tracker(std::make_unique<MemTracker>("OlapTablePartitionParam")), _part_type(t_param.partition_type) { - for (auto slot : _slots) { - _partition_block.insert( - {slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()}); - } - if (t_param.__isset.enable_automatic_partition && t_param.enable_automatic_partition) { _is_auto_partition = true; auto size = t_param.partition_function_exprs.size(); @@ -319,6 +305,38 @@ VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr<OlapTableSche DCHECK(t_param.__isset.overwrite_group_id); _overwrite_group_id = t_param.overwrite_group_id; } + + if (_is_auto_partition) { + // the nullable mode depends on partition_exprs. not column slots. so use them. + DCHECK(_partition_function.size() <= _slots.size()) + << _partition_function.size() << ", " << _slots.size(); + + // suppose (k0, [k1], [k2]), so get [k1, 0], [k2, 1] + std::map<std::string, int> partition_slots_map; // name to idx in part_exprs + for (size_t i = 0; i < t_param.partition_columns.size(); i++) { + partition_slots_map.emplace(t_param.partition_columns[i], i); + } + + // here we rely on the same order and number of the _part_funcs and _slots in the prefix + // _part_block contains all slots of table. + for (auto* slot : _slots) { + // try to replace with partition expr. + if (auto it = partition_slots_map.find(slot->col_name()); + it != partition_slots_map.end()) { // it's a partition column slot + auto& expr_type = _partition_function[it->second]->data_type(); + _partition_block.insert({expr_type->create_column(), expr_type, slot->col_name()}); + } else { + _partition_block.insert({slot->get_empty_mutable_column(), + slot->get_data_type_ptr(), slot->col_name()}); + } + } + } else { + // we insert all. but not all will be used. it will controlled by _partition_slot_locs + for (auto* slot : _slots) { + _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(), + slot->col_name()}); + } + } } VOlapTablePartitionParam::~VOlapTablePartitionParam() { @@ -327,7 +345,7 @@ VOlapTablePartitionParam::~VOlapTablePartitionParam() { Status VOlapTablePartitionParam::init() { std::vector<std::string> slot_column_names; - for (auto slot_desc : _schema->tuple_desc()->slots()) { + for (auto* slot_desc : _schema->tuple_desc()->slots()) { slot_column_names.emplace_back(slot_desc->col_name()); } @@ -342,6 +360,7 @@ Status VOlapTablePartitionParam::init() { return Status::OK(); }; + // here we find the partition columns. others maybe non-partition columns/special columns. if (_t_param.__isset.partition_columns) { for (auto& part_col : _t_param.partition_columns) { RETURN_IF_ERROR(find_slot_locs(part_col, _partition_slot_locs, "partition")); @@ -382,82 +401,16 @@ Status VOlapTablePartitionParam::init() { bool VOlapTablePartitionParam::_part_contains(VOlapTablePartition* part, BlockRowWithIndicator key) const { - // start_key.second == -1 means only single partition VOlapTablePartKeyComparator comparator(_partition_slot_locs, _transformed_slot_locs); - return part->start_key.second == -1 || - !comparator(key, std::tuple {part->start_key.first, part->start_key.second, false}); -} - -Status VOlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs, - BlockRow* part_key) { - for (int i = 0; i < t_exprs.size(); i++) { - RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, _partition_slot_locs[i])); - } - return Status::OK(); + // we have used upper_bound to find to ensure key < part.right and this part is closest(right - key is min) + // now we only have to check (key >= part.left). the comparator(a,b) means a < b, so we use anti + return part->start_key.second == -1 /* spj: start_key.second == -1 means only single partition*/ + || !comparator(key, std::tuple {part->start_key.first, part->start_key.second, false}); } -Status VOlapTablePartitionParam::generate_partition_from(const TOlapTablePartition& t_part, - VOlapTablePartition*& part_result) { - DCHECK(part_result == nullptr); - // here we set the default value of partition bounds first! if it doesn't have some key, it will be -1. - part_result = _obj_pool.add(new VOlapTablePartition(&_partition_block)); - part_result->id = t_part.id; - part_result->is_mutable = t_part.is_mutable; - // only load_to_single_tablet = true will set load_tablet_idx - if (t_part.__isset.load_tablet_idx) { - part_result->load_tablet_idx = t_part.load_tablet_idx; - } - - if (!_is_in_partition) { - if (t_part.__isset.start_keys) { - RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part_result->start_key)); - } - - if (t_part.__isset.end_keys) { - RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part_result->end_key)); - } - } else { - for (const auto& keys : t_part.in_keys) { - RETURN_IF_ERROR(_create_partition_keys( - keys, &part_result->in_keys.emplace_back(&_partition_block, -1))); - } - if (t_part.__isset.is_default_partition && t_part.is_default_partition && - _default_partition == nullptr) { - _default_partition = part_result; - } - } - - part_result->num_buckets = t_part.num_buckets; - auto num_indexes = _schema->indexes().size(); - if (t_part.indexes.size() != num_indexes) { - return Status::InternalError( - "number of partition's index is not equal with schema's" - ", num_part_indexes={}, num_schema_indexes={}", - t_part.indexes.size(), num_indexes); - } - part_result->indexes = t_part.indexes; - std::sort(part_result->indexes.begin(), part_result->indexes.end(), - [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) { - return lhs.index_id < rhs.index_id; - }); - // check index - for (int j = 0; j < num_indexes; ++j) { - if (part_result->indexes[j].index_id != _schema->indexes()[j]->index_id) { - std::stringstream ss; - ss << "partition's index is not equal with schema's" - << ", part_index=" << part_result->indexes[j].index_id - << ", schema_index=" << _schema->indexes()[j]->index_id; - return Status::InternalError( - "partition's index is not equal with schema's" - ", part_index={}, schema_index={}", - part_result->indexes[j].index_id, _schema->indexes()[j]->index_id); - } - } - return Status::OK(); -} - -Status VOlapTablePartitionParam::_create_partition_key(const TExprNode& t_expr, BlockRow* part_key, - uint16_t pos) { +// insert value into _partition_block's column +// NOLINTBEGIN(readability-function-size) +static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key, uint16_t pos) { auto column = std::move(*part_key->first->get_by_position(pos).column).mutate(); //TODO: use assert_cast before insert_data switch (t_expr.node_type) { @@ -519,9 +472,9 @@ Status VOlapTablePartitionParam::_create_partition_key(const TExprNode& t_expr, } case TExprNodeType::LARGE_INT_LITERAL: { StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - __int128 value = StringParser::string_to_int<__int128>( - t_expr.large_int_literal.value.c_str(), t_expr.large_int_literal.value.size(), - &parse_result); + auto value = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(), + t_expr.large_int_literal.value.size(), + &parse_result); if (parse_result != StringParser::PARSE_SUCCESS) { value = MAX_INT128; } @@ -551,16 +504,80 @@ Status VOlapTablePartitionParam::_create_partition_key(const TExprNode& t_expr, part_key->second = column->size() - 1; return Status::OK(); } +// NOLINTEND(readability-function-size) + +Status VOlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs, + BlockRow* part_key) { + for (int i = 0; i < t_exprs.size(); i++) { + RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, _partition_slot_locs[i])); + } + return Status::OK(); +} + +Status VOlapTablePartitionParam::generate_partition_from(const TOlapTablePartition& t_part, + VOlapTablePartition*& part_result) { + DCHECK(part_result == nullptr); + // here we set the default value of partition bounds first! if it doesn't have some key, it will be -1. + part_result = _obj_pool.add(new VOlapTablePartition(&_partition_block)); + part_result->id = t_part.id; + part_result->is_mutable = t_part.is_mutable; + // only load_to_single_tablet = true will set load_tablet_idx + if (t_part.__isset.load_tablet_idx) { + part_result->load_tablet_idx = t_part.load_tablet_idx; + } + + if (_is_in_partition) { + for (const auto& keys : t_part.in_keys) { + RETURN_IF_ERROR(_create_partition_keys( + keys, &part_result->in_keys.emplace_back(&_partition_block, -1))); + } + if (t_part.__isset.is_default_partition && t_part.is_default_partition && + _default_partition == nullptr) { + _default_partition = part_result; + } + } else { // range + if (t_part.__isset.start_keys) { + RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part_result->start_key)); + } + // we generate the right bound but not insert into partition map + if (t_part.__isset.end_keys) { + RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part_result->end_key)); + } + } + + part_result->num_buckets = t_part.num_buckets; + auto num_indexes = _schema->indexes().size(); + if (t_part.indexes.size() != num_indexes) { + return Status::InternalError( + "number of partition's index is not equal with schema's" + ", num_part_indexes={}, num_schema_indexes={}", + t_part.indexes.size(), num_indexes); + } + part_result->indexes = t_part.indexes; + std::sort(part_result->indexes.begin(), part_result->indexes.end(), + [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) { + return lhs.index_id < rhs.index_id; + }); + // check index + for (int j = 0; j < num_indexes; ++j) { + if (part_result->indexes[j].index_id != _schema->indexes()[j]->index_id) { + return Status::InternalError( + "partition's index is not equal with schema's" + ", part_index={}, schema_index={}", + part_result->indexes[j].index_id, _schema->indexes()[j]->index_id); + } + } + return Status::OK(); +} Status VOlapTablePartitionParam::add_partitions( const std::vector<TOlapTablePartition>& partitions) { for (const auto& t_part : partitions) { - auto part = _obj_pool.add(new VOlapTablePartition(&_partition_block)); + auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block)); part->id = t_part.id; part->is_mutable = t_part.is_mutable; - DCHECK(t_part.__isset.start_keys == t_part.__isset.end_keys && - t_part.__isset.start_keys != t_part.__isset.in_keys); + // we dont pass right keys when it's MAX_VALUE. so there's possibility we only have start_key but not end_key // range partition if (t_part.__isset.start_keys) { RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part->start_key)); diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index 9c3a1b6db44..092d9a18c7b 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -146,7 +146,7 @@ public: // return true if lhs < rhs // 'row' is -1 mean maximal boundary - bool operator()(const BlockRowWithIndicator lhs, const BlockRowWithIndicator rhs) const; + bool operator()(const BlockRowWithIndicator& lhs, const BlockRowWithIndicator& rhs) const; private: const std::vector<uint16_t>& _slot_locs; @@ -168,7 +168,6 @@ public: int64_t version() const { return _t_param.version; } // return true if we found this block_row in partition - //TODO: use virtual function to refactor it ALWAYS_INLINE bool find_partition(vectorized::Block* block, int row, VOlapTablePartition*& partition) const { auto it = _is_in_partition ? _partitions_map->find(std::tuple {block, row, true}) @@ -275,8 +274,6 @@ public: private: Status _create_partition_keys(const std::vector<TExprNode>& t_exprs, BlockRow* part_key); - Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key, uint16_t pos); - // check if this partition contain this key bool _part_contains(VOlapTablePartition* part, BlockRowWithIndicator key) const; @@ -295,6 +292,7 @@ private: std::vector<VOlapTablePartition*> _partitions; // For all partition value rows saved in this map, indicator is false. whenever we use a value to find in it, the param is true. // so that we can distinguish which column index to use (origin slots or transformed slots). + // For range partition we ONLY SAVE RIGHT ENDS. when we find a part's RIGHT by a value, check if part's left cover it then. std::unique_ptr< std::map<BlockRowWithIndicator, VOlapTablePartition*, VOlapTablePartKeyComparator>> _partitions_map; diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index d078d260f09..4539aef288f 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -261,15 +261,14 @@ constexpr bool is_numeric_type(const FieldType& field_type) { } // Util used to get string name of thrift enum item -#define EnumToString(enum_type, index, out) \ - do { \ - std::map<int, const char*>::const_iterator it = \ - _##enum_type##_VALUES_TO_NAMES.find(index); \ - if (it == _##enum_type##_VALUES_TO_NAMES.end()) { \ - out = "NULL"; \ - } else { \ - out = it->second; \ - } \ +#define EnumToString(enum_type, index, out) \ + do { \ + auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \ + if (it == _##enum_type##_VALUES_TO_NAMES.end()) { \ + out = "NULL"; \ + } else { \ + out = it->second; \ + } \ } while (0) struct RowLocation { diff --git a/be/src/vec/columns/column_const.cpp b/be/src/vec/columns/column_const.cpp index e06e53b4289..f9c72b20bc1 100644 --- a/be/src/vec/columns/column_const.cpp +++ b/be/src/vec/columns/column_const.cpp @@ -172,7 +172,7 @@ ColumnPtr ColumnConst::index(const IColumn& indexes, size_t limit) const { } std::pair<ColumnPtr, size_t> check_column_const_set_readability(const IColumn& column, - const size_t row_num) noexcept { + size_t row_num) noexcept { std::pair<ColumnPtr, size_t> result; if (is_column_const(column)) { result.first = static_cast<const ColumnConst&>(column).get_data_column_ptr(); diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 746cb00fd5d..c4ac651c280 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -21,11 +21,10 @@ #pragma once #include <glog/logging.h> -#include <stdint.h> #include <sys/types.h> -#include <concepts> #include <cstddef> +#include <cstdint> #include <functional> #include <initializer_list> #include <ostream> @@ -48,14 +47,43 @@ class SipHash; -namespace doris { -namespace vectorized { +namespace doris::vectorized { + class Arena; class Block; -} // namespace vectorized -} // namespace doris -namespace doris::vectorized { +/* + * @return first : pointer to column itself if it's not ColumnConst, else to column's data column. + * second : zero if column is ColumnConst, else itself. +*/ +std::pair<ColumnPtr, size_t> check_column_const_set_readability(const IColumn& column, + size_t row_num) noexcept; + +/* + * @warning use this function sometimes cause performance problem in GCC. +*/ +template <typename T> + requires std::is_integral_v<T> +T index_check_const(T arg, bool constancy) noexcept { + return constancy ? 0 : arg; +} + +/* + * @return first : data_column_ptr for ColumnConst, itself otherwise. + * second : whether it's ColumnConst. +*/ +std::pair<const ColumnPtr&, bool> unpack_if_const(const ColumnPtr&) noexcept; + +/* + * For the functions that some columns of arguments are almost but not completely always const, we use this function to preprocessing its parameter columns + * (which are not data columns). When we have two or more columns which only provide parameter, use this to deal with corner case. So you can specialize you + * implementations for all const or all parameters const, without considering some of parameters are const. + + * Do the transformation only for the columns whose arg_indexes in parameters. +*/ +void default_preprocess_parameter_columns(ColumnPtr* columns, const bool* col_const, + const std::initializer_list<size_t>& parameters, + Block& block, const ColumnNumbers& arg_indexes) noexcept; /** ColumnConst contains another column with single element, * but looks like a column with arbitrary amount of same elements. @@ -140,7 +168,7 @@ public: } const char* deserialize_and_insert_from_arena(const char* pos) override { - auto res = data->deserialize_and_insert_from_arena(pos); + const auto* res = data->deserialize_and_insert_from_arena(pos); data->pop_back(1); ++s; return res; @@ -202,8 +230,9 @@ public: int compare_at(size_t, size_t, const IColumn& rhs, int nan_direction_hint) const override { auto rhs_const_column = assert_cast<const ColumnConst&>(rhs); - auto* this_nullable = check_and_get_column<ColumnNullable>(data.get()); - auto* rhs_nullable = check_and_get_column<ColumnNullable>(rhs_const_column.data.get()); + const auto* this_nullable = check_and_get_column<ColumnNullable>(data.get()); + const auto* rhs_nullable = + check_and_get_column<ColumnNullable>(rhs_const_column.data.get()); if (this_nullable && rhs_nullable) { return data->compare_at(0, 0, *rhs_const_column.data, nan_direction_hint); } else if (this_nullable) { @@ -228,8 +257,9 @@ public: void for_each_subcolumn(ColumnCallback callback) override { callback(data); } bool structure_equals(const IColumn& rhs) const override { - if (auto rhs_concrete = typeid_cast<const ColumnConst*>(&rhs)) + if (const auto* rhs_concrete = typeid_cast<const ColumnConst*>(&rhs)) { return data->structure_equals(*rhs_concrete->data); + } return false; } @@ -264,37 +294,4 @@ public: LOG(FATAL) << "should not call the method in column const"; } }; - -/* - * @return first : pointer to column itself if it's not ColumnConst, else to column's data column. - * second : zero if column is ColumnConst, else itself. -*/ -std::pair<ColumnPtr, size_t> check_column_const_set_readability(const IColumn& column, - const size_t row_num) noexcept; - -/* - * @warning use this function sometimes cause performance problem in GCC. -*/ -template <typename T> - requires std::is_integral_v<T> -T index_check_const(T arg, bool constancy) noexcept { - return constancy ? 0 : arg; -} - -/* - * @return first : data_column_ptr for ColumnConst, itself otherwise. - * second : whether it's ColumnConst. -*/ -std::pair<const ColumnPtr&, bool> unpack_if_const(const ColumnPtr&) noexcept; - -/* - * For the functions that some columns of arguments are almost but not completely always const, we use this function to preprocessing its parameter columns - * (which are not data columns). When we have two or more columns which only provide parameter, use this to deal with corner case. So you can specialize you - * implementations for all const or all parameters const, without considering some of parameters are const. - - * Do the transformation only for the columns whose arg_indexes in parameters. -*/ -void default_preprocess_parameter_columns(ColumnPtr* columns, const bool* col_const, - const std::initializer_list<size_t>& parameters, - Block& block, const ColumnNumbers& arg_indexes) noexcept; } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 8dc4e54073a..f097432cf40 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -87,7 +87,7 @@ public: std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; } MutableColumnPtr clone_resized(size_t size) const override; size_t size() const override { return assert_cast<const ColumnUInt8&>(*null_map).size(); } - bool is_null_at(size_t n) const override { + PURE bool is_null_at(size_t n) const override { return assert_cast<const ColumnUInt8&>(*null_map).get_data()[n] != 0; } bool is_default_at(size_t n) const override { return is_null_at(n); } diff --git a/be/src/vec/common/assert_cast.h b/be/src/vec/common/assert_cast.h index 879f27ed8d8..6d8765befa2 100644 --- a/be/src/vec/common/assert_cast.h +++ b/be/src/vec/common/assert_cast.h @@ -20,13 +20,10 @@ #pragma once -#include <string> #include <type_traits> -#include <typeindex> #include <typeinfo> #include "common/logging.h" -#include "fmt/format.h" #include "vec/common/demangle.h" /** Perform static_cast in release build. @@ -34,11 +31,13 @@ * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful. */ template <typename To, typename From> -To assert_cast(From&& from) { +PURE To assert_cast(From&& from) { #ifndef NDEBUG try { if constexpr (std::is_pointer_v<To>) { - if (typeid(*from) == typeid(std::remove_pointer_t<To>)) return static_cast<To>(from); + if (typeid(*from) == typeid(std::remove_pointer_t<To>)) { + return static_cast<To>(from); + } if constexpr (std::is_pointer_v<std::remove_reference_t<From>>) { if (auto ptr = dynamic_cast<To>(from); ptr != nullptr) { return ptr; @@ -48,7 +47,9 @@ To assert_cast(From&& from) { demangle(typeid(To).name())); } } else { - if (typeid(from) == typeid(To)) return static_cast<To>(from); + if (typeid(from) == typeid(To)) { + return static_cast<To>(from); + } } } catch (const std::exception& e) { LOG(FATAL) << "assert cast err:" << e.what(); diff --git a/be/src/vec/sink/vrow_distribution.cpp b/be/src/vec/sink/vrow_distribution.cpp index d98131613a1..8ec8814293b 100644 --- a/be/src/vec/sink/vrow_distribution.cpp +++ b/be/src/vec/sink/vrow_distribution.cpp @@ -23,6 +23,7 @@ #include <cstdint> #include <memory> +#include <sstream> #include "common/logging.h" #include "common/status.h" @@ -35,6 +36,7 @@ #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" +#include "vec/common/assert_cast.h" #include "vec/data_types/data_type.h" #include "vec/sink/writer/vtablet_writer.h" @@ -45,17 +47,24 @@ VRowDistribution::_get_partition_function() { return {_vpartition->get_part_func_ctx(), _vpartition->get_partition_function()}; } -Status VRowDistribution::_save_missing_values(std::vector<std::vector<std::string>>& col_strs, - int col_size, Block* block, - std::vector<int64_t> filter) { +Status VRowDistribution::_save_missing_values( + std::vector<std::vector<std::string>>& col_strs, // non-const ref for move + int col_size, Block* block, std::vector<int64_t> filter, + const std::vector<const NullMap*>& col_null_maps) { // de-duplication for new partitions but save all rows. _batching_block->add_rows(block, filter); - std::vector<TStringLiteral> cur_row_values; + std::vector<TNullableStringLiteral> cur_row_values; for (int row = 0; row < col_strs[0].size(); ++row) { cur_row_values.clear(); for (int col = 0; col < col_size; ++col) { - TStringLiteral node; - node.value = std::move(col_strs[col][row]); + TNullableStringLiteral node; + const auto* null_map = col_null_maps[col]; // null map for this col + node.__set_is_null((null_map && (*null_map)[filter[row]]) + ? true + : node.is_null); // if not, dont change(default false) + if (!node.is_null) { + node.__set_value(col_strs[col][row]); + } cur_row_values.push_back(node); } //For duplicate cur_values, they will be filtered in FE @@ -299,7 +308,6 @@ Status VRowDistribution::_generate_rows_distribution_for_auto_partition( auto num_rows = block->rows(); std::vector<uint16_t> partition_keys = _vpartition->get_partition_keys(); - //TODO: use loop to create missing_vals for multi column. auto partition_col = block->get_by_position(partition_keys[0]); _missing_map.clear(); _missing_map.reserve(partition_col.column->size()); @@ -319,29 +327,34 @@ Status VRowDistribution::_generate_rows_distribution_for_auto_partition( if (!_missing_map.empty()) { // for missing partition keys, calc the missing partition and save in _partitions_need_create - auto [part_ctxs, part_funcs] = _get_partition_function(); - auto funcs_size = part_funcs.size(); + auto [part_ctxs, part_exprs] = _get_partition_function(); + auto part_col_num = part_exprs.size(); + // the two vectors are in column-first-order std::vector<std::vector<std::string>> col_strs; - col_strs.resize(funcs_size); - - for (int i = 0; i < funcs_size; ++i) { - auto return_type = part_funcs[i]->data_type(); - // expose the data column - vectorized::ColumnPtr range_left_col = - block->get_by_position(partition_cols_idx[i]).column; - if (const auto* nullable = - check_and_get_column<vectorized::ColumnNullable>(*range_left_col)) { - range_left_col = nullable->get_nested_column_ptr(); - return_type = assert_cast<const vectorized::DataTypeNullable*>(return_type.get()) - ->get_nested_type(); + std::vector<const NullMap*> col_null_maps; + col_strs.resize(part_col_num); + col_null_maps.reserve(part_col_num); + + for (int i = 0; i < part_col_num; ++i) { + auto return_type = part_exprs[i]->data_type(); + // expose the data column. the return type would be nullable + const auto& [range_left_col, col_const] = + unpack_if_const(block->get_by_position(partition_cols_idx[i]).column); + if (range_left_col->is_nullable()) { + col_null_maps.push_back(&(assert_cast<const ColumnNullable*>(range_left_col.get()) + ->get_null_map_data())); + } else { + col_null_maps.push_back(nullptr); } for (auto row : _missing_map) { - col_strs[i].push_back(return_type->to_string(*range_left_col, row)); + col_strs[i].push_back( + return_type->to_string(*range_left_col, index_check_const(row, col_const))); } } // calc the end value and save them. in the end of sending, we will create partitions for them and deal them. - RETURN_IF_ERROR(_save_missing_values(col_strs, funcs_size, block, _missing_map)); + RETURN_IF_ERROR( + _save_missing_values(col_strs, part_col_num, block, _missing_map, col_null_maps)); size_t new_bt_rows = _batching_block->rows(); size_t new_bt_bytes = _batching_block->bytes(); @@ -426,6 +439,7 @@ Status VRowDistribution::generate_rows_distribution( auto func_size = part_funcs.size(); for (int i = 0; i < func_size; ++i) { int result_idx = -1; + // we just calc left range here. leave right to FE to avoid dup calc. RETURN_IF_ERROR(part_funcs[i]->execute(part_ctxs[i].get(), block.get(), &result_idx)); VLOG_DEBUG << "Partition-calculated block:" << block->dump_data(); partition_cols_idx.push_back(result_idx); diff --git a/be/src/vec/sink/vrow_distribution.h b/be/src/vec/sink/vrow_distribution.h index 19a6538cc12..5267b488400 100644 --- a/be/src/vec/sink/vrow_distribution.h +++ b/be/src/vec/sink/vrow_distribution.h @@ -137,7 +137,8 @@ private: std::pair<vectorized::VExprContextSPtrs, vectorized::VExprSPtrs> _get_partition_function(); Status _save_missing_values(std::vector<std::vector<std::string>>& col_strs, int col_size, - Block* block, std::vector<int64_t> filter); + Block* block, std::vector<int64_t> filter, + const std::vector<const NullMap*>& col_null_maps); void _get_tablet_ids(vectorized::Block* block, int32_t index_idx, std::vector<int64_t>& tablet_ids); @@ -173,7 +174,7 @@ private: int _batch_size = 0; // for auto partitions - std::vector<std::vector<TStringLiteral>> _partitions_need_create; + std::vector<std::vector<TNullableStringLiteral>> _partitions_need_create; public: std::unique_ptr<MutableBlock> _batching_block; diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index a2b4b78dea9..7a8873f4b10 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -3428,6 +3428,10 @@ partition_value_list ::= {: RESULT = l; :} + | KW_NULL + {: + RESULT = Lists.newArrayList(new PartitionValue("", true)); + :} ; /* List<PartitionValue> */ @@ -3450,6 +3454,10 @@ partition_key_item_list ::= l.add(new PartitionValue(item)); RESULT = l; :} + | KW_NULL + {: + RESULT = Lists.newArrayList(new PartitionValue("", true)); + :} ; partition_key_list ::= @@ -3485,6 +3493,10 @@ partition_key_list ::= {: RESULT = Lists.newArrayList(PartitionValue.MAX_VALUE); :} + | KW_NULL + {: + RESULT = Lists.newArrayList(new PartitionValue("", true)); + :} ; fixed_partition_key_desc ::= diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java index 28ed98df0cb..97922cee126 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java @@ -26,7 +26,6 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.InvalidFormatException; import org.apache.doris.nereids.util.DateUtils; -import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.thrift.TDateLiteral; import org.apache.doris.thrift.TExprNode; @@ -794,12 +793,9 @@ public class DateLiteral extends LiteralExpr { try { checkValueValid(); } catch (AnalysisException e) { - if (ConnectContext.get() != null) { - ConnectContext.get().getState().reset(); - } - // If date value is invalid, set this to null - msg.node_type = TExprNodeType.NULL_LITERAL; - msg.setIsNullable(true); + // we must check before here. when we think we are ready to send thrift msg, + // the invalid value is not acceptable. we can't properly deal with it. + LOG.warn("meet invalid value when plan to translate " + toString() + " to thrift node"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java index 0814235f0a3..38b8d13dd8a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java @@ -332,6 +332,7 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr @Override public boolean isNullable() { + // TODO: use base class's isNullLiteral() to replace this return this instanceof NullLiteral; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MaxLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MaxLiteral.java index b0d54eb5032..57032f1683e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MaxLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MaxLiteral.java @@ -50,6 +50,7 @@ public final class MaxLiteral extends LiteralExpr { @Override protected void toThrift(TExprNode msg) { + // TODO: complete this type } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java index 39569d0b3e5..9cb0a5e36a6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java @@ -199,10 +199,6 @@ public class PartitionDesc { throw new AnalysisException("Complex type column can't be partition column: " + columnDef.getType().toString()); } - // prohibit to create auto partition with null column anyhow - if (this.isAutoCreatePartitions && columnDef.isAllowNull()) { - throw new AnalysisException("The auto partition column must be NOT NULL"); - } if (!ConnectContext.get().getSessionVariable().isAllowPartitionColumnNullable() && columnDef.isAllowNull()) { throw new AnalysisException( diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java index 9323b5d2558..2869097555b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java @@ -23,7 +23,7 @@ import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; -import org.apache.doris.thrift.TStringLiteral; +import org.apache.doris.thrift.TNullableStringLiteral; import com.google.common.collect.Maps; import org.apache.logging.log4j.LogManager; @@ -114,7 +114,7 @@ public class PartitionExprUtil { } public static Map<String, AddPartitionClause> getAddPartitionClauseFromPartitionValues(OlapTable olapTable, - ArrayList<List<TStringLiteral>> partitionValues, PartitionInfo partitionInfo) + ArrayList<List<TNullableStringLiteral>> partitionValues, PartitionInfo partitionInfo) throws AnalysisException { Map<String, AddPartitionClause> result = Maps.newHashMap(); ArrayList<Expr> partitionExprs = partitionInfo.getPartitionExprs(); @@ -124,15 +124,23 @@ public class PartitionExprUtil { FunctionIntervalInfo intervalInfo = getFunctionIntervalInfo(partitionExprs, partitionType); Set<String> filterPartitionValues = new HashSet<String>(); - for (List<TStringLiteral> partitionValueList : partitionValues) { + for (List<TNullableStringLiteral> partitionValueList : partitionValues) { PartitionKeyDesc partitionKeyDesc = null; String partitionName = "p"; ArrayList<String> curPartitionValues = new ArrayList<>(); - for (TStringLiteral tStringLiteral : partitionValueList) { - curPartitionValues.add(tStringLiteral.value); + for (TNullableStringLiteral tStringLiteral : partitionValueList) { + if (tStringLiteral.is_null) { + if (partitionType == PartitionType.RANGE) { + throw new AnalysisException("Can't create partition for NULL Range"); + } + curPartitionValues.add(null); + } else { + curPartitionValues.add(tStringLiteral.value); + } } + // Concatenate each string with its length. X means null String filterStr = curPartitionValues.stream() - .map(s -> s + s.length()) // Concatenate each string with its length + .map(s -> (s == null) ? "X" : (s + s.length())) .reduce("", (s1, s2) -> s1 + s2); if (filterPartitionValues.contains(filterStr)) { continue; @@ -151,11 +159,14 @@ public class PartitionExprUtil { List<List<PartitionValue>> listValues = new ArrayList<>(); List<PartitionValue> inValues = new ArrayList<>(); for (String value : curPartitionValues) { - inValues.add(new PartitionValue(value)); + if (value == null) { + inValues.add(new PartitionValue("", true)); + } else { + inValues.add(new PartitionValue(value)); + } } listValues.add(inValues); - partitionKeyDesc = PartitionKeyDesc.createIn( - listValues); + partitionKeyDesc = PartitionKeyDesc.createIn(listValues); partitionName += getFormatPartitionValue(filterStr); if (hasStringType) { if (partitionName.length() > 50) { @@ -179,35 +190,43 @@ public class PartitionExprUtil { return result; } - public static PartitionKeyDesc createPartitionKeyDescWithRange(DateLiteral beginDateTime, + private static PartitionKeyDesc createPartitionKeyDescWithRange(DateLiteral beginDateTime, DateLiteral endDateTime, Type partitionColumnType) throws AnalysisException { - String beginTime; - String endTime; - // maybe need check the range in FE also, like getAddPartitionClause. + PartitionValue lowerValue = getPartitionFromDate(partitionColumnType, beginDateTime); + PartitionValue upperValue = getPartitionFromDate(partitionColumnType, endDateTime); + return PartitionKeyDesc.createFixed( + Collections.singletonList(lowerValue), + Collections.singletonList(upperValue)); + } + + private static PartitionValue getPartitionFromDate(Type partitionColumnType, DateLiteral dateLiteral) + throws AnalysisException { + // check out of range. + try { + // if lower than range, parse will error. so if hits here, the only possiblility + // is rounding to beyond the limit + dateLiteral.checkValueValid(); + } catch (AnalysisException e) { + return PartitionValue.MAX_VALUE; + } + + String timeString; if (partitionColumnType.isDate() || partitionColumnType.isDateV2()) { - beginTime = String.format(DATE_FORMATTER, beginDateTime.getYear(), beginDateTime.getMonth(), - beginDateTime.getDay()); - endTime = String.format(DATE_FORMATTER, endDateTime.getYear(), endDateTime.getMonth(), - endDateTime.getDay()); + timeString = String.format(DATE_FORMATTER, dateLiteral.getYear(), dateLiteral.getMonth(), + dateLiteral.getDay()); } else if (partitionColumnType.isDatetime() || partitionColumnType.isDatetimeV2()) { - beginTime = String.format(DATETIME_FORMATTER, - beginDateTime.getYear(), beginDateTime.getMonth(), beginDateTime.getDay(), - beginDateTime.getHour(), beginDateTime.getMinute(), beginDateTime.getSecond()); - endTime = String.format(DATETIME_FORMATTER, - endDateTime.getYear(), endDateTime.getMonth(), endDateTime.getDay(), - endDateTime.getHour(), endDateTime.getMinute(), endDateTime.getSecond()); + timeString = String.format(DATETIME_FORMATTER, + dateLiteral.getYear(), dateLiteral.getMonth(), dateLiteral.getDay(), + dateLiteral.getHour(), dateLiteral.getMinute(), dateLiteral.getSecond()); } else { throw new AnalysisException( "not support range partition with column type : " + partitionColumnType.toString()); } - PartitionValue lowerValue = new PartitionValue(beginTime); - PartitionValue upperValue = new PartitionValue(endTime); - return PartitionKeyDesc.createFixed( - Collections.singletonList(lowerValue), - Collections.singletonList(upperValue)); + + return new PartitionValue(timeString); } - public static String getFormatPartitionValue(String value) { + private static String getFormatPartitionValue(String value) { StringBuilder sb = new StringBuilder(); // When the value is negative if (value.length() > 0 && value.charAt(0) == '-') { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java index b20a9035869..62aa8d18e98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java @@ -46,11 +46,13 @@ public class PartitionValue { } public LiteralExpr getValue(Type type) throws AnalysisException { + if (isNullPartition) { + return new NullLiteral(); + } if (isMax()) { return LiteralExpr.createInfinity(type, true); - } else { - return LiteralExpr.create(value, type); } + return LiteralExpr.create(value, type); } public boolean isMax() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java index 0a7c8268450..b227afdc142 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java @@ -95,7 +95,8 @@ public class PartitionKey implements Comparable<PartitionKey>, Writable { for (i = 0; i < keys.size(); ++i) { Type keyType = columns.get(i).getType(); // If column type is datatime and key type is date, we should convert date to datetime. - if (keyType.isDatetime() || keyType.isDatetimeV2()) { + // if it's max value, no need to parse. + if (!keys.get(i).isMax() && (keyType.isDatetime() || keyType.isDatetimeV2())) { Literal dateTimeLiteral = getDateTimeLiteral(keys.get(i).getStringValue(), keyType); partitionKey.keys.add(dateTimeLiteral.toLegacyLiteral()); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PartitionTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PartitionTableInfo.java index 33223a39ecc..2b07b53fdbc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PartitionTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PartitionTableInfo.java @@ -135,10 +135,6 @@ public class PartitionTableInfo { throw new AnalysisException("Complex type column can't be partition column: " + column.getType().toString()); } - // prohibit to create auto partition with null column anyhow - if (this.isAutoPartition && column.isNullable()) { - throw new AnalysisException("The auto partition column must be NOT NULL"); - } if (!ctx.getSessionVariable().isAllowPartitionColumnNullable() && column.isNullable()) { throw new AnalysisException( "The partition column must be NOT NULL with allow_partition_column_nullable OFF"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java index d800900e134..deccd6cc1a3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java @@ -433,7 +433,7 @@ public class OneRangePartitionEvaluator boolean hasNewChildren = false; for (Expression child : expr.children()) { EvaluateRangeResult childResult = child.accept(this, context); - if (childResult.result != child) { + if (!childResult.result.equals(child)) { hasNewChildren = true; } childrenResults.add(childResult); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java index b8440777872..9d6f420b47d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java @@ -98,7 +98,7 @@ public class PartitionPruner extends DefaultExpressionRewriter<Void> { public List<Long> prune() { Builder<Long> scanPartitionIds = ImmutableList.builder(); for (OnePartitionEvaluator partition : partitions) { - if (!canPrune(partition)) { + if (!canBePrunedOut(partition)) { scanPartitionIds.add(partition.getPartitionId()); } } @@ -143,14 +143,19 @@ public class PartitionPruner extends DefaultExpressionRewriter<Void> { } } - private boolean canPrune(OnePartitionEvaluator evaluator) { + /** + * return true if partition is not qualified. that is, can be pruned out. + */ + private boolean canBePrunedOut(OnePartitionEvaluator evaluator) { List<Map<Slot, PartitionSlotInput>> onePartitionInputs = evaluator.getOnePartitionInputs(); for (Map<Slot, PartitionSlotInput> currentInputs : onePartitionInputs) { + // evaluate wether there's possible for this partition to accept this predicate Expression result = evaluator.evaluateWithDefaultPartition(partitionPredicate, currentInputs); if (!result.equals(BooleanLiteral.FALSE) && !(result instanceof NullLiteral)) { return false; } } + // only have false result: Can be pruned out. have other exprs: CanNot be pruned out return true; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java index 061666c681f..53cfe1e1835 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.trees.expressions.literal; import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.exceptions.UnboundException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; @@ -75,6 +76,10 @@ public class DateTimeV2Literal extends DateTimeLiteral { this.second = localDateTime.getSecond(); this.microSecond -= 1000000; } + if (checkRange() || checkDate()) { + // may fallback to legacy planner. make sure the behaviour of rounding is same. + throw new AnalysisException("datetime literal [" + toString() + "] is out of range"); + } } public String getFullMicroSecondValue() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 789b94ee952..a4a3471d984 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -477,16 +477,19 @@ public class OlapTableSink extends DataSink { return partitionParam; } - public static void setPartitionKeys(TOlapTablePartition tPartition, PartitionItem partitionItem, int partColNum) { + public static void setPartitionKeys(TOlapTablePartition tPartition, PartitionItem partitionItem, int partColNum) + throws UserException { if (partitionItem instanceof RangePartitionItem) { Range<PartitionKey> range = partitionItem.getItems(); - // set start keys + // set start keys. min value is a REAL value. should be legal. if (range.hasLowerBound() && !range.lowerEndpoint().isMinValue()) { for (int i = 0; i < partColNum; i++) { tPartition.addToStartKeys(range.lowerEndpoint().getKeys().get(i).treeToThrift().getNodes().get(0)); } } - // set end keys + // TODO: support real MaxLiteral in thrift. + // now we dont send it to BE. if BE meet it, treat it as default value. + // see VOlapTablePartition's ctor in tablet_info.h if (range.hasUpperBound() && !range.upperEndpoint().isMaxValue()) { for (int i = 0; i < partColNum; i++) { tPartition.addToEndKeys(range.upperEndpoint().getKeys().get(i).treeToThrift().getNodes().get(0)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index dc058156b60..df51b868359 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -190,6 +190,7 @@ import org.apache.doris.thrift.TMasterResult; import org.apache.doris.thrift.TMySqlLoadAcquireTokenResult; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TNodeInfo; +import org.apache.doris.thrift.TNullableStringLiteral; import org.apache.doris.thrift.TOlapTableIndexTablets; import org.apache.doris.thrift.TOlapTablePartition; import org.apache.doris.thrift.TPipelineFragmentParams; @@ -225,7 +226,6 @@ import org.apache.doris.thrift.TStatusCode; import org.apache.doris.thrift.TStreamLoadMultiTablePutResult; import org.apache.doris.thrift.TStreamLoadPutRequest; import org.apache.doris.thrift.TStreamLoadPutResult; -import org.apache.doris.thrift.TStringLiteral; import org.apache.doris.thrift.TTableIndexQueryStats; import org.apache.doris.thrift.TTableMetadataNameIds; import org.apache.doris.thrift.TTableQueryStats; @@ -3443,7 +3443,7 @@ public class FrontendServiceImpl implements FrontendService.Iface { OlapTable olapTable = (OlapTable) table; PartitionInfo partitionInfo = olapTable.getPartitionInfo(); - ArrayList<List<TStringLiteral>> partitionValues = new ArrayList<>(); + ArrayList<List<TNullableStringLiteral>> partitionValues = new ArrayList<>(); for (int i = 0; i < request.partitionValues.size(); i++) { if (partitionInfo.getType() == PartitionType.RANGE && request.partitionValues.get(i).size() != 1) { errorStatus.setErrorMsgs( @@ -3502,8 +3502,14 @@ public class FrontendServiceImpl implements FrontendService.Iface { TOlapTablePartition tPartition = new TOlapTablePartition(); tPartition.setId(partition.getId()); int partColNum = partitionInfo.getPartitionColumns().size(); - // set partition keys - OlapTableSink.setPartitionKeys(tPartition, partitionInfo.getItem(partition.getId()), partColNum); + try { + OlapTableSink.setPartitionKeys(tPartition, partitionInfo.getItem(partition.getId()), partColNum); + } catch (UserException ex) { + errorStatus.setErrorMsgs(Lists.newArrayList(ex.getMessage())); + result.setStatus(errorStatus); + LOG.warn("send create partition error status: {}", result); + return result; + } for (MaterializedIndex index : partition.getMaterializedIndices(MaterializedIndex.IndexExtState.ALL)) { tPartition.addToIndexes(new TOlapTableIndexTablets(index.getId(), Lists.newArrayList( index.getTablets().stream().map(Tablet::getId).collect(Collectors.toList())))); @@ -3661,7 +3667,14 @@ public class FrontendServiceImpl implements FrontendService.Iface { // set partition keys int partColNum = partitionInfo.getPartitionColumns().size(); - OlapTableSink.setPartitionKeys(tPartition, partitionInfo.getItem(partition.getId()), partColNum); + try { + OlapTableSink.setPartitionKeys(tPartition, partitionInfo.getItem(partition.getId()), partColNum); + } catch (UserException ex) { + errorStatus.setErrorMsgs(Lists.newArrayList(ex.getMessage())); + result.setStatus(errorStatus); + LOG.warn("send replace partition error status: {}", result); + return result; + } for (MaterializedIndex index : partition.getMaterializedIndices(MaterializedIndex.IndexExtState.ALL)) { tPartition.addToIndexes(new TOlapTableIndexTablets(index.getId(), Lists.newArrayList( index.getTablets().stream().map(Tablet::getId).collect(Collectors.toList())))); diff --git a/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java b/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java index 318926167c9..56891782b65 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/service/FrontendServiceImplTest.java @@ -37,9 +37,9 @@ import org.apache.doris.thrift.TGetDbsParams; import org.apache.doris.thrift.TGetDbsResult; import org.apache.doris.thrift.TMetadataTableRequestParams; import org.apache.doris.thrift.TMetadataType; +import org.apache.doris.thrift.TNullableStringLiteral; import org.apache.doris.thrift.TSchemaTableName; import org.apache.doris.thrift.TStatusCode; -import org.apache.doris.thrift.TStringLiteral; import org.apache.doris.utframe.UtFrameUtils; import mockit.Mocked; @@ -109,10 +109,10 @@ public class FrontendServiceImplTest { Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException("test"); OlapTable table = (OlapTable) db.getTableOrAnalysisException("partition_range"); - List<List<TStringLiteral>> partitionValues = new ArrayList<>(); - List<TStringLiteral> values = new ArrayList<>(); + List<List<TNullableStringLiteral>> partitionValues = new ArrayList<>(); + List<TNullableStringLiteral> values = new ArrayList<>(); - TStringLiteral start = new TStringLiteral(); + TNullableStringLiteral start = new TNullableStringLiteral(); start.setValue("2023-08-07 00:00:00"); values.add(start); @@ -148,10 +148,10 @@ public class FrontendServiceImplTest { Database db = Env.getCurrentInternalCatalog().getDbOrAnalysisException("test"); OlapTable table = (OlapTable) db.getTableOrAnalysisException("partition_list"); - List<List<TStringLiteral>> partitionValues = new ArrayList<>(); - List<TStringLiteral> values = new ArrayList<>(); + List<List<TNullableStringLiteral>> partitionValues = new ArrayList<>(); + List<TNullableStringLiteral> values = new ArrayList<>(); - TStringLiteral start = new TStringLiteral(); + TNullableStringLiteral start = new TNullableStringLiteral(); start.setValue("BEIJING"); values.add(start); diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index 9894c1539e8..6b7abc2ad93 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -191,6 +191,11 @@ struct TStringLiteral { 1: required string value; } +struct TNullableStringLiteral { + 1: optional string value; + 2: optional bool is_null = false; +} + struct TJsonLiteral { 1: required string value; } diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index b1dcb4defd7..7251afd7624 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -1271,7 +1271,7 @@ struct TCreatePartitionRequest { 2: optional i64 db_id 3: optional i64 table_id // for each partition column's partition values. [missing_rows, partition_keys]->Left bound(for range) or Point(for list) - 4: optional list<list<Exprs.TStringLiteral>> partitionValues + 4: optional list<list<Exprs.TNullableStringLiteral>> partitionValues // be_endpoint = <ip>:<heartbeat_port> to distinguish a particular BE 5: optional string be_endpoint } diff --git a/regression-test/data/partition_p0/auto_partition/test_auto_list_partition.out b/regression-test/data/partition_p0/auto_partition/test_auto_list_partition.out index 53eeb493257..dd39f5eafab 100644 --- a/regression-test/data/partition_p0/auto_partition/test_auto_list_partition.out +++ b/regression-test/data/partition_p0/auto_partition/test_auto_list_partition.out @@ -1,5 +1,8 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql1 -- +\N +\N + Abc Beijing Beijing @@ -7,6 +10,45 @@ XXX xxx -- !sql2 -- +\N +\N +\N + +Abc +Abc +Beijing +Beijing +Beijing +Beijing +XXX +XXX +new +xxx +xxx + +-- !sql_null0 -- + +Abc +Abc +Beijing +Beijing +Beijing +Beijing +XXX +XXX +new +xxx +xxx + +-- !sql_null1 -- +\N +\N +\N + +-- !sql_null2 -- + + +-- !sql_null3 -- Abc Abc Beijing @@ -43,3 +85,13 @@ x x y +-- !sql_multi_col1 -- +\N \N \N +\N \N \N +-3 \N qwe +-3 \N qwe +-3 -3 qwe +-1 -1 vdf +1 1 asd +2 2 xxx + diff --git a/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out b/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out index 5fe25cce3d8..40ae8a97a89 100644 --- a/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out +++ b/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out @@ -108,3 +108,12 @@ Yyy -- !sql_overwrite2 -- Xxx +-- !sql_non_order1 -- +1 2020-12-12T00:00 + +-- !sql_non_order2 -- +2 2023-12-12T00:00 + +-- !sql_non_order3 -- +3 2013-12-12T00:00 + diff --git a/regression-test/data/partition_p0/auto_partition/test_auto_range_partition.out b/regression-test/data/partition_p0/auto_partition/test_auto_range_partition.out index 5cab9d69b1d..93c6d86bf9c 100644 --- a/regression-test/data/partition_p0/auto_partition/test_auto_range_partition.out +++ b/regression-test/data/partition_p0/auto_partition/test_auto_range_partition.out @@ -104,3 +104,7 @@ 2122-12-19T22:22:22.222 2122-12-20T22:22:22.222 +-- !right_bound -- +9999-12-31T23:59:59 +9999-12-31T23:59:59.999999 + diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy index af70ca35a87..7868f1ffb9a 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy @@ -20,7 +20,7 @@ suite("test_auto_list_partition") { sql "drop table if exists list_table1" sql """ CREATE TABLE `list_table1` ( - `str` varchar not null + `str` varchar ) ENGINE=OLAP DUPLICATE KEY(`str`) COMMENT 'OLAP' @@ -32,20 +32,25 @@ suite("test_auto_list_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into list_table1 values ("Beijing"), ("XXX"), ("xxx"), ("Beijing"), ("Abc") """ + sql """ insert into list_table1 values ("Beijing"), ("XXX"), ("xxx"), ("Beijing"), ("Abc"), (null) """ + sql """ insert into list_table1 values (null), ("") """ // not same partition qt_sql1 """ select * from list_table1 order by `str` """ def result11 = sql "show partitions from list_table1" - assertEquals(result11.size(), 4) - sql """ insert into list_table1 values ("Beijing"), ("XXX"), ("xxx"), ("Beijing"), ("Abc"), ("new") """ + assertEquals(result11.size(), 6) + sql """ insert into list_table1 values ("Beijing"), ("XXX"), ("xxx"), ("Beijing"), ("Abc"), ("new"), (null) """ qt_sql2 """ select * from list_table1 order by `str` """ def result12 = sql "show partitions from list_table1" - assertEquals(result12.size(), 5) + assertEquals(result12.size(), 7) + qt_sql_null0 " select * from list_table1 where str is not null order by str;" // should have empty string + qt_sql_null1 " select * from list_table1 where str is null order by str;" + qt_sql_null2 """ select * from list_table1 where str = "" order by str; """ + qt_sql_null3 """ select * from list_table1 where str != "" order by str; """ // char sql "drop table if exists list_table2" sql """ CREATE TABLE `list_table2` ( - `ch` char not null + `ch` char ) ENGINE=OLAP DUPLICATE KEY(`ch`) COMMENT 'OLAP' @@ -72,7 +77,7 @@ suite("test_auto_list_partition") { sql """ CREATE TABLE `${tblName3}` ( `k1` INT, - `k2` VARCHAR(50) not null, + `k2` VARCHAR(50), `k3` DATETIMEV2(6) ) ENGINE=OLAP DUPLICATE KEY(`k1`) @@ -85,10 +90,10 @@ suite("test_auto_list_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into ${tblName3} values (1, 'ABC', '2000-01-01 12:12:12.123456'), (2, 'AAA', '2000-01-01'), (3, 'aaa', '2000-01-01'), (3, 'AaA', '2000-01-01') """ + sql """ insert into ${tblName3} values (1, 'ABC', '2000-01-01 12:12:12.123456'), (2, 'AAA', '2000-01-01'), (3, 'aaa', '2000-01-01'), (3, 'AaA', '2000-01-01'), (4, null, null) """ def result3 = sql "show partitions from ${tblName3}" logger.info("${result3}") - assertEquals(result3.size(), 4) + assertEquals(result3.size(), 5) // int sql "drop table if exists list_table4" @@ -178,7 +183,7 @@ suite("test_auto_list_partition") { sql "drop table if exists test_bigint" sql """ CREATE TABLE test_bigint ( - k bigint not null + k bigint ) AUTO PARTITION BY LIST (`k`) ( @@ -197,7 +202,7 @@ suite("test_auto_list_partition") { sql "drop table if exists test_smallint" sql """ CREATE TABLE test_smallint ( - k smallint not null + k smallint ) AUTO PARTITION BY LIST (`k`) ( @@ -253,9 +258,9 @@ suite("test_auto_list_partition") { sql "drop table if exists test_list_many_column2" sql """ CREATE TABLE test_list_many_column2 ( - id int not null, - k largeint not null, - str varchar not null + id int, + k largeint, + str varchar ) AUTO PARTITION BY LIST (`id`, `k`, `str`) ( @@ -267,9 +272,24 @@ suite("test_auto_list_partition") { """ sql """ insert into test_list_many_column2 values (1,1,"asd"), (-1,-1,"vdf");""" sql """ insert into test_list_many_column2 values (2,2,"xxx"), (-3,-3,"qwe");""" + sql """ insert into test_list_many_column2 values (null,null,null), (-3,null,"qwe");""" + sql """ insert into test_list_many_column2 values (null,null,null), (-3,null,"qwe");""" + qt_sql_multi_col1 "select * from test_list_many_column2 order by id,k,str" result12 = sql "show partitions from test_list_many_column2" logger.info("${result12}") - assertEquals(result12.size(), 4) + assertEquals(result12.size(), 6) + explain { + sql "select * from test_list_many_column2 where id is null" + contains "partitions=1/6 (pXXX)" + } + explain { + sql "select * from test_list_many_column2 where id is null and k is not null" + contains "VEMPTYSET" + } + explain { + sql "select * from test_list_many_column2 where k is not null" + contains "partitions=4/6" + } sql "drop table if exists stream_load_list_test_table_string_key" sql """ diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy index 951f656f55f..5c2b5c6f79c 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy @@ -186,66 +186,43 @@ suite("test_auto_partition_behavior") { sql """ insert overwrite table rewrite partition(p1) values ("Xxx") """ qt_sql_overwrite2 """ select * from rewrite """ // Xxx - // prohibit NULLABLE auto partition column - // legacy - sql " set experimental_enable_nereids_planner=false " - test { - sql "drop table if exists test_null1" - sql """ - create table test_null1( - k0 datetime(6) null - ) - auto partition by range date_trunc(k0, 'hour') - ( - ) - DISTRIBUTED BY HASH(`k0`) BUCKETS 2 - properties("replication_num" = "1"); - """ - exception "The auto partition column must be NOT NULL" - } - test { - sql "drop table if exists test_null2" - sql """ - create table test_null2( - k0 int null - ) - auto partition by list (k0) - ( - ) - DISTRIBUTED BY HASH(`k0`) BUCKETS 2 - properties("replication_num" = "1"); - """ - exception "The auto partition column must be NOT NULL" - } - // nereids - sql " set experimental_enable_nereids_planner=true " - test { - sql "drop table if exists test_null1" - sql """ - create table test_null1( - k0 datetime(6) null - ) - auto partition by range date_trunc(k0, 'hour') - ( - ) - DISTRIBUTED BY HASH(`k0`) BUCKETS 2 - properties("replication_num" = "1"); + sql " drop table if exists non_order; " + sql """ + CREATE TABLE `non_order` ( + `k0` int not null, + `k1` datetime(6) not null + ) + AUTO PARTITION BY RANGE date_trunc(`k1`, 'year') + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); """ - exception "The auto partition column must be NOT NULL" - } + // insert + sql """ insert into non_order values (1, '2020-12-12'); """ + sql """ insert into non_order values (2, '2023-12-12'); """ + sql """ insert into non_order values (3, '2013-12-12'); """ + qt_sql_non_order1 """ select * from non_order where k1 = '2020-12-12'; """ + qt_sql_non_order2 """ select * from non_order where k1 = '2023-12-12'; """ + qt_sql_non_order3 """ select * from non_order where k1 = '2013-12-12'; """ + + // range partition can't auto create null partition + sql "drop table if exists invalid_null_range" + sql """ + create table invalid_null_range( + k0 datetime(6) null + ) + auto partition by range date_trunc(k0, 'hour') + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 2 + properties("replication_num" = "1"); + """ test { - sql "drop table if exists test_null2" - sql """ - create table test_null2( - k0 int null - ) - auto partition by list (k0) - ( - ) - DISTRIBUTED BY HASH(`k0`) BUCKETS 2 - properties("replication_num" = "1"); - """ - exception "The auto partition column must be NOT NULL" + sql " insert into invalid_null_range values (null); " + exception "Can't create partition for NULL Range" } // PROHIBIT different timeunit of interval when use both auto & dynamic partition diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy index 81b440e0f80..aa1a1b1f5d3 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_load.groovy @@ -16,10 +16,9 @@ // under the License. suite("test_auto_partition_load") { - def tblName1 = "load_table1" - sql "drop table if exists ${tblName1}" + sql "drop table if exists load_table1" sql """ - CREATE TABLE `${tblName1}` ( + CREATE TABLE `load_table1` ( `k1` INT, `k2` DATETIME NOT NULL, `k3` DATETIMEV2(6) @@ -35,26 +34,25 @@ suite("test_auto_partition_load") { ); """ streamLoad { - table "${tblName1}" + table "load_table1" set 'column_separator', ',' file "auto_partition_stream_load1.csv" time 20000 } - sql """ insert into ${tblName1} values (11, '2007-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ - sql """ insert into ${tblName1} values (12, '2008-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ - sql """ insert into ${tblName1} values (13, '2003-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ - sql """ insert into ${tblName1} values (14, '2002-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ + sql """ insert into load_table1 values (11, '2007-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ + sql """ insert into load_table1 values (12, '2008-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ + sql """ insert into load_table1 values (13, '2003-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ + sql """ insert into load_table1 values (14, '2002-12-12 12:12:12.123', '2001-11-14 12:12:12.123456') """ - qt_select1 "select * from ${tblName1} order by k1" - result1 = sql "show partitions from ${tblName1}" + qt_select1 "select * from load_table1 order by k1" + result1 = sql "show partitions from load_table1" logger.info("${result1}") assertEquals(result1.size(), 8) - def tblName2 = "load_table2" - sql "drop table if exists ${tblName2}" + sql "drop table if exists load_table2" sql """ - CREATE TABLE `${tblName2}` ( + CREATE TABLE `load_table2` ( `k1` INT, `k2` VARCHAR(50) not null, `k3` DATETIMEV2(6) @@ -70,18 +68,18 @@ suite("test_auto_partition_load") { ); """ streamLoad { - table "${tblName2}" + table "load_table2" set 'column_separator', ',' file "auto_partition_stream_load2.csv" time 20000 } - sql """ insert into ${tblName2} values (11, '11', '2123-11-14 12:12:12.123456') """ - sql """ insert into ${tblName2} values (12, 'Chengdu', '2123-11-14 12:12:12.123456') """ - sql """ insert into ${tblName2} values (13, '11', '2123-11-14 12:12:12.123456') """ - sql """ insert into ${tblName2} values (14, '12', '2123-11-14 12:12:12.123456') """ + sql """ insert into load_table2 values (11, '11', '2123-11-14 12:12:12.123456') """ + sql """ insert into load_table2 values (12, 'Chengdu', '2123-11-14 12:12:12.123456') """ + sql """ insert into load_table2 values (13, '11', '2123-11-14 12:12:12.123456') """ + sql """ insert into load_table2 values (14, '12', '2123-11-14 12:12:12.123456') """ - qt_select2 "select * from ${tblName2} order by k1" - result2 = sql "show partitions from ${tblName2}" + qt_select2 "select * from load_table2 order by k1" + result2 = sql "show partitions from load_table2" logger.info("${result2}") assertEquals(result2.size(), 11) } diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy index 33574990bc0..9273851e72c 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_range_partition.groovy @@ -16,10 +16,9 @@ // under the License. suite("test_auto_range_partition") { - def tblName1 = "range_table1" - sql "drop table if exists ${tblName1}" + sql "drop table if exists range_table1" sql """ - CREATE TABLE `${tblName1}` ( + CREATE TABLE `range_table1` ( `TIME_STAMP` datetimev2 NOT NULL COMMENT '采集日期' ) ENGINE=OLAP DUPLICATE KEY(`TIME_STAMP`) @@ -32,12 +31,12 @@ suite("test_auto_range_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into ${tblName1} values ('2022-12-14'), ('2022-12-15'), ('2022-12-16'), ('2022-12-17'), ('2022-12-18'), ('2022-12-19'), ('2022-12-20') """ - sql """ insert into ${tblName1} values ('2122-12-14'), ('2122-12-15'), ('2122-12-16'), ('2122-12-17'), ('2122-12-18'), ('2122-12-19'), ('2122-12-20') """ + sql """ insert into range_table1 values ('2022-12-14'), ('2022-12-15'), ('2022-12-16'), ('2022-12-17'), ('2022-12-18'), ('2022-12-19'), ('2022-12-20') """ + sql """ insert into range_table1 values ('2122-12-14'), ('2122-12-15'), ('2122-12-16'), ('2122-12-17'), ('2122-12-18'), ('2122-12-19'), ('2122-12-20') """ - qt_select00 """ select * from ${tblName1} order by TIME_STAMP """ - qt_select01 """ select * from ${tblName1} WHERE TIME_STAMP = '2022-12-15' order by TIME_STAMP """ - qt_select02 """ select * from ${tblName1} WHERE TIME_STAMP > '2022-12-15' order by TIME_STAMP """ + qt_select00 """ select * from range_table1 order by TIME_STAMP """ + qt_select01 """ select * from range_table1 WHERE TIME_STAMP = '2022-12-15' order by TIME_STAMP """ + qt_select02 """ select * from range_table1 WHERE TIME_STAMP > '2022-12-15' order by TIME_STAMP """ def tblDate = "range_table_date" sql "drop table if exists ${tblDate}" @@ -62,10 +61,9 @@ suite("test_auto_range_partition") { qt_date2 """ select * from ${tblDate} WHERE TIME_STAMP = '2022-12-15' order by TIME_STAMP """ qt_date3 """ select * from ${tblDate} WHERE TIME_STAMP > '2022-12-15' order by TIME_STAMP """ - def tblName2 = "range_table2" - sql "drop table if exists ${tblName2}" + sql "drop table if exists range_table2" sql """ - CREATE TABLE `${tblName2}` ( + CREATE TABLE `range_table2` ( `TIME_STAMP` datetimev2(3) NOT NULL COMMENT '采集日期' ) ENGINE=OLAP DUPLICATE KEY(`TIME_STAMP`) @@ -78,19 +76,18 @@ suite("test_auto_range_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into ${tblName2} values ('2022-12-14 22:22:22.222'), ('2022-12-15 22:22:22.222'), ('2022-12-16 22:22:22.222'), ('2022-12-17 22:22:22.222'), ('2022-12-18 22:22:22.222'), ('2022-12-19 22:22:22.222'), ('2022-12-20 22:22:22.222') """ - sql """ insert into ${tblName2} values ('2122-12-14 22:22:22.222'), ('2122-12-15 22:22:22.222'), ('2122-12-16 22:22:22.222'), ('2122-12-17 22:22:22.222'), ('2122-12-18 22:22:22.222'), ('2122-12-19 22:22:22.222'), ('2122-12-20 22:22:22.222') """ - sql """ insert into ${tblName2} values ('2022-11-14 22:22:22.222'), ('2022-11-15 22:22:22.222'), ('2022-11-16 22:22:22.222'), ('2022-11-17 22:22:22.222'), ('2022-11-18 22:22:22.222'), ('2022-11-19 22:22:22.222'), ('2022-11-20 22:22:22.222') """ + sql """ insert into range_table2 values ('2022-12-14 22:22:22.222'), ('2022-12-15 22:22:22.222'), ('2022-12-16 22:22:22.222'), ('2022-12-17 22:22:22.222'), ('2022-12-18 22:22:22.222'), ('2022-12-19 22:22:22.222'), ('2022-12-20 22:22:22.222') """ + sql """ insert into range_table2 values ('2122-12-14 22:22:22.222'), ('2122-12-15 22:22:22.222'), ('2122-12-16 22:22:22.222'), ('2122-12-17 22:22:22.222'), ('2122-12-18 22:22:22.222'), ('2122-12-19 22:22:22.222'), ('2122-12-20 22:22:22.222') """ + sql """ insert into range_table2 values ('2022-11-14 22:22:22.222'), ('2022-11-15 22:22:22.222'), ('2022-11-16 22:22:22.222'), ('2022-11-17 22:22:22.222'), ('2022-11-18 22:22:22.222'), ('2022-11-19 22:22:22.222'), ('2022-11-20 22:22:22.222') """ - qt_select10 """ select * from ${tblName2} order by TIME_STAMP """ - qt_select11 """ select * from ${tblName2} WHERE TIME_STAMP = '2022-12-15 22:22:22.222' order by TIME_STAMP """ - qt_select12 """ select * from ${tblName2} WHERE TIME_STAMP > '2022-12-15 22:22:22.222' order by TIME_STAMP """ + qt_select10 """ select * from range_table2 order by TIME_STAMP """ + qt_select11 """ select * from range_table2 WHERE TIME_STAMP = '2022-12-15 22:22:22.222' order by TIME_STAMP """ + qt_select12 """ select * from range_table2 WHERE TIME_STAMP > '2022-12-15 22:22:22.222' order by TIME_STAMP """ - def tblName3 = "range_table3" - sql "drop table if exists ${tblName3}" + sql "drop table if exists range_table3" sql """ - CREATE TABLE `${tblName3}` ( + CREATE TABLE `range_table3` ( `k1` INT, `k2` DATETIMEV2(3) NOT NULL, `k3` DATETIMEV2(6) @@ -105,8 +102,26 @@ suite("test_auto_range_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into ${tblName3} values (1, '1990-01-01', '2000-01-01 12:12:12.123456'), (2, '1991-02-01', '2000-01-01'), (3, '1991-01-01', '2000-01-01'), (3, '1991-01-01', '2000-01-01') """ - result1 = sql "show partitions from ${tblName3}" + sql """ insert into range_table3 values (1, '1990-01-01', '2000-01-01 12:12:12.123456'), (2, '1991-02-01', '2000-01-01'), (3, '1991-01-01', '2000-01-01'), (3, '1991-01-01', '2000-01-01') """ + result1 = sql "show partitions from range_table3" logger.info("${result1}") assertEquals(result1.size(), 3) + + sql "drop table if exists right_bound" + sql """ + create table right_bound( + k0 datetime(6) null + ) + auto partition by range date_trunc(k0, 'second') + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS auto + properties("replication_num" = "1"); + """ + sql " insert into right_bound values ('9999-12-31 23:59:59'); " + sql " insert into right_bound values ('9999-12-31 23:59:59.999999'); " + qt_right_bound " select * from right_bound order by k0; " + result2 = sql "show partitions from right_bound" + logger.info("${result2}") + assertEquals(result2.size(), 1) } diff --git a/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_with_rename.groovy b/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_with_rename.groovy index 6c9d245bef3..5fd190effc5 100644 --- a/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_with_rename.groovy +++ b/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_with_rename.groovy @@ -16,10 +16,9 @@ // under the License. suite("test_dynamic_partition_with_rename") { - def tbl = "test_dynamic_partition_with_rename" - sql "drop table if exists ${tbl}" + sql "drop table if exists test_dynamic_partition_with_rename" sql """ - CREATE TABLE IF NOT EXISTS ${tbl} + CREATE TABLE IF NOT EXISTS test_dynamic_partition_with_rename ( k1 date NOT NULL, k2 varchar(20) NOT NULL, k3 int sum NOT NULL ) AGGREGATE KEY(k1,k2) PARTITION BY RANGE(k1) ( ) @@ -34,21 +33,21 @@ suite("test_dynamic_partition_with_rename") { "dynamic_partition.create_history_partition"="true", "dynamic_partition.replication_allocation" = "tag.location.default: 1") """ - def result = sql "show partitions from ${tbl}" + def result = sql "show partitions from test_dynamic_partition_with_rename" assertEquals(7, result.size()) // rename distributed column, then try to add too more dynamic partition - sql "alter table ${tbl} rename column k1 renamed_k1" + sql "alter table test_dynamic_partition_with_rename rename column k1 renamed_k1" sql """ ADMIN SET FRONTEND CONFIG ('dynamic_partition_check_interval_seconds' = '1') """ - sql """ alter table ${tbl} set('dynamic_partition.end'='5') """ - result = sql "show partitions from ${tbl}" + sql """ alter table test_dynamic_partition_with_rename set('dynamic_partition.end'='5') """ + result = sql "show partitions from test_dynamic_partition_with_rename" for (def retry = 0; retry < 120; retry++) { // at most wait 120s if (result.size() == 9) { break; } logger.info("wait dynamic partition scheduler, sleep 1s") sleep(1000); // sleep 1s - result = sql "show partitions from ${tbl}" + result = sql "show partitions from test_dynamic_partition_with_rename" } assertEquals(9, result.size()) for (def line = 0; line < result.size(); line++) { @@ -56,5 +55,5 @@ suite("test_dynamic_partition_with_rename") { assertEquals("renamed_k1", result.get(line).get(7)) } - sql "drop table ${tbl}" + sql "drop table test_dynamic_partition_with_rename" } diff --git a/regression-test/suites/partition_p0/multi_partition/test_multi_column_partition.groovy b/regression-test/suites/partition_p0/multi_partition/test_multi_column_partition.groovy index 3740e6ff684..4808665520b 100644 --- a/regression-test/suites/partition_p0/multi_partition/test_multi_column_partition.groovy +++ b/regression-test/suites/partition_p0/multi_partition/test_multi_column_partition.groovy @@ -78,6 +78,7 @@ suite("test_multi_partition_key", "p0") { // partition columns are int & datetime + sql " drop table if exists err_1 " sql """ CREATE TABLE err_1 ( k1 TINYINT NOT NULL, diff --git a/regression-test/suites/partition_p0/test_datev2_partition.groovy b/regression-test/suites/partition_p0/test_datev2_partition.groovy index 63852bb4e2e..54db28a18c7 100644 --- a/regression-test/suites/partition_p0/test_datev2_partition.groovy +++ b/regression-test/suites/partition_p0/test_datev2_partition.groovy @@ -16,10 +16,9 @@ // under the License. suite("test_datev2_partition") { - def tblName1 = "test_datev2_partition1" - sql "drop table if exists ${tblName1}" + sql "drop table if exists test_datev2_partition1" sql """ - CREATE TABLE `${tblName1}` ( + CREATE TABLE `test_datev2_partition1` ( `TIME_STAMP` datev2 NOT NULL COMMENT '采集日期' ) ENGINE=OLAP DUPLICATE KEY(`TIME_STAMP`) @@ -38,16 +37,15 @@ suite("test_datev2_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into ${tblName1} values ('2022-12-14'), ('2022-12-15'), ('2022-12-16'), ('2022-12-17'), ('2022-12-18'), ('2022-12-19'), ('2022-12-20') """ + sql """ insert into test_datev2_partition1 values ('2022-12-14'), ('2022-12-15'), ('2022-12-16'), ('2022-12-17'), ('2022-12-18'), ('2022-12-19'), ('2022-12-20') """ - qt_select """ select * from ${tblName1} order by TIME_STAMP """ - qt_select """ select * from ${tblName1} WHERE TIME_STAMP = '2022-12-15' order by TIME_STAMP """ - qt_select """ select * from ${tblName1} WHERE TIME_STAMP > '2022-12-15' order by TIME_STAMP """ + qt_select """ select * from test_datev2_partition1 order by TIME_STAMP """ + qt_select """ select * from test_datev2_partition1 WHERE TIME_STAMP = '2022-12-15' order by TIME_STAMP """ + qt_select """ select * from test_datev2_partition1 WHERE TIME_STAMP > '2022-12-15' order by TIME_STAMP """ - def tblName2 = "test_datev2_partition2" - sql "drop table if exists ${tblName2}" + sql "drop table if exists test_datev2_partition2" sql """ - CREATE TABLE `${tblName2}` ( + CREATE TABLE `test_datev2_partition2` ( `TIME_STAMP` datetimev2(3) NOT NULL COMMENT '采集日期' ) ENGINE=OLAP DUPLICATE KEY(`TIME_STAMP`) @@ -66,9 +64,9 @@ suite("test_datev2_partition") { "replication_allocation" = "tag.location.default: 1" ); """ - sql """ insert into ${tblName2} values ('2022-12-14 22:22:22.222'), ('2022-12-15 22:22:22.222'), ('2022-12-16 22:22:22.222'), ('2022-12-17 22:22:22.222'), ('2022-12-18 22:22:22.222'), ('2022-12-19 22:22:22.222'), ('2022-12-20 22:22:22.222') """ + sql """ insert into test_datev2_partition2 values ('2022-12-14 22:22:22.222'), ('2022-12-15 22:22:22.222'), ('2022-12-16 22:22:22.222'), ('2022-12-17 22:22:22.222'), ('2022-12-18 22:22:22.222'), ('2022-12-19 22:22:22.222'), ('2022-12-20 22:22:22.222') """ - qt_select """ select * from ${tblName2} order by TIME_STAMP """ - qt_select """ select * from ${tblName2} WHERE TIME_STAMP = '2022-12-15 22:22:22.222' order by TIME_STAMP """ - qt_select """ select * from ${tblName2} WHERE TIME_STAMP > '2022-12-15 22:22:22.222' order by TIME_STAMP """ + qt_select """ select * from test_datev2_partition2 order by TIME_STAMP """ + qt_select """ select * from test_datev2_partition2 WHERE TIME_STAMP = '2022-12-15 22:22:22.222' order by TIME_STAMP """ + qt_select """ select * from test_datev2_partition2 WHERE TIME_STAMP > '2022-12-15 22:22:22.222' order by TIME_STAMP """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org