This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new aa1796299ff branch-4.0: [fix](be) Handle legacy DecimalV2 segments
with missing precision/frac (#63569) (#63581)
aa1796299ff is described below
commit aa1796299ff2713ae2407ac0afd158bc1598a010
Author: TengJianPing <[email protected]>
AuthorDate: Thu May 28 09:57:34 2026 +0800
branch-4.0: [fix](be) Handle legacy DecimalV2 segments with missing
precision/frac (#63569) (#63581)
Pick: #63569
Problem Summary: After upgrading from 3.1.4 to 4.0.4, queries on
DecimalV2 columns fail with:
[INTERNAL_ERROR]meet invalid precision: real_precision=0,
max_decimal_precision=27, min_decimal_precision=1
Root cause: Segments written by Doris < 2.1.0 (before #26572) do not
persist precision/frac in ColumnMetaPB; they default to 0 when read
back. Since #35222, DataTypeFactory passes those raw values as
original_precision/original_scale into DataTypeDecimalV2, which calls
check_type_precision(0) and throws. 3.1.4 was unaffected because the old
code hardcoded (27, 9).
Fix: In _create_primitive_data_type for OLAP_FIELD_TYPE_DECIMAL, when
precision is not positive (legacy segment), pass UINT32_MAX to
DataTypeDecimalV2 to signal that the original precision/scale are
unknown. DecimalScaleInfo<TYPE_DECIMALV2> already treats UINT32_MAX as
'unknown' and falls back to the in-memory (27, 9) representation in
get_original_precision()/get_original_scale().
Fix "meet invalid precision: real_precision=0" when querying DecimalV2
columns on segments written by Doris versions older than 2.1.0.
- Test: Unit Test
- Added DataTypeDecimalTest.create_decimalv2_from_legacy_tablet_column
covering: legacy TabletColumn (unset precision/frac), modern
TabletColumn with decimal(26,6), and legacy segment_v2::ColumnMetaPB
with default-0 precision/frac.
- Behavior changed: No
- Does this need documentation: No
---
be/src/vec/data_types/data_type_factory.cpp | 19 ++-
be/test/vec/data_types/data_type_decimal_test.cpp | 176 ++++++++++++++++++++++
2 files changed, 193 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/data_types/data_type_factory.cpp
b/be/src/vec/data_types/data_type_factory.cpp
index 5849781a2e5..e6bfadd32a4 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -385,8 +385,23 @@ DataTypePtr DataTypeFactory::create_data_type(const
segment_v2::ColumnMetaPB& pc
nested = std::make_shared<DataTypeStruct>(dataTypes, names);
} else {
// TODO add precision and frac
- nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
- pcolumn.precision(),
pcolumn.frac(), -1);
+ auto meta_precision = pcolumn.precision();
+ auto meta_scale = pcolumn.frac();
+ if (pcolumn.type() ==
static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL)) {
+ // Segments written by Doris < 2.1.0 (before #26572) do not persist
+ // precision/frac in ColumnMetaPB, so they default to 0 when read
back.
+ // Pass UINT32_MAX to DataTypeDecimalV2 to signal that the original
+ // precision/scale are unknown; otherwise check_type_precision(0)
throws
+ // "meet invalid precision: real_precision=0".
+ UInt32 orig_precision =
+ meta_precision > 0 ? static_cast<UInt32>(meta_precision) :
UINT32_MAX;
+ UInt32 orig_scale = meta_precision > 0 ?
static_cast<UInt32>(meta_scale) : UINT32_MAX;
+ nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+ orig_precision, orig_scale,
-1);
+ } else {
+ nested =
_create_primitive_data_type(static_cast<FieldType>(pcolumn.type()),
+ meta_precision, meta_scale,
-1);
+ }
}
if (pcolumn.is_nullable() && nested) {
diff --git a/be/test/vec/data_types/data_type_decimal_test.cpp
b/be/test/vec/data_types/data_type_decimal_test.cpp
index 4d96c3a2260..c462d0414e3 100644
--- a/be/test/vec/data_types/data_type_decimal_test.cpp
+++ b/be/test/vec/data_types/data_type_decimal_test.cpp
@@ -29,6 +29,7 @@
#include <type_traits>
#include "common/exception.h"
+#include "gen_cpp/segment_v2.pb.h"
#include "testutil/test_util.h"
#include "vec/columns/column.h"
#include "vec/common/assert_cast.h"
@@ -37,8 +38,12 @@
#include "vec/data_types/common_data_type_serder_test.h"
#include "vec/data_types/common_data_type_test.h"
#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_agg_state.h"
+#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_struct.h"
namespace doris::vectorized {
static std::string test_data_dir;
@@ -474,6 +479,177 @@ TEST_F(DataTypeDecimalTest, ser_deser) {
test_func(dt_decimal256_3, *column_decimal256_3, USE_CONST_SERDE);
test_func(dt_decimal256_3, *column_decimal256_3, AGGREGATION_2_1_VERSION);
}
+// Regression for legacy DecimalV2 segments written by Doris < 2.1.0 (before
+// #26572): ColumnMetaPB.precision/frac are absent and default to 0 when read
+// back. DataTypeFactory must not pass 0 as the original precision/scale to
+// DataTypeDecimalV2, otherwise check_type_precision throws
+// "meet invalid precision: real_precision=0".
+TEST_F(DataTypeDecimalTest, create_decimalv2_from_legacy_tablet_column) {
+ // Case 1: legacy segment — precision/frac missing (default 0)
+ {
+ TabletColumn col;
+ col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+ // intentionally do not set precision/frac to mimic old segments
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col,
false));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ // When original precision/scale are unknown, get_original_* should
fall
+ // back to the in-memory (27, 9) representation.
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+ // Case 2: new segment — precision/frac are persisted (e.g. decimal(26,6))
+ {
+ TabletColumn col;
+ col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL);
+ col.set_precision(26);
+ col.set_frac(6);
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col,
false));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ EXPECT_EQ(decv2->get_original_precision(), 26U);
+ EXPECT_EQ(decv2->get_original_scale(), 6U);
+ }
+ // Case 3: same regression via the segment-read path that consumes
+ // ColumnMetaPB directly (this is the exact code path that broke after
+ // PR #35222 when reading old DecimalV2 segments).
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+ // precision/frac intentionally unset -> default 0
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(dt.get());
+ EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+}
+
+// Regression for complex types wrapping legacy DecimalV2 children whose
+// segment ColumnMetaPB.precision/frac default to 0. The fix in
+// DataTypeFactory::create_data_type(segment_v2::ColumnMetaPB&) must
+// propagate through Array / Map / Struct / AggState recursively.
+TEST_F(DataTypeDecimalTest, create_complex_types_with_legacy_decimalv2) {
+ auto make_legacy_decv2_meta = []() {
+ segment_v2::ColumnMetaPB child;
+ child.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_DECIMAL));
+ // precision/frac intentionally unset -> default 0
+ return child;
+ };
+ auto unwrap_nullable = [](const DataTypePtr& dt) -> DataTypePtr {
+ if (dt && dt->is_nullable()) {
+ return assert_cast<const
DataTypeNullable*>(dt.get())->get_nested_type();
+ }
+ return dt;
+ };
+
+ // Array<DecimalV2>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+ const auto* arr = assert_cast<const DataTypeArray*>(dt.get());
+ auto elem = unwrap_nullable(arr->get_nested_type());
+ EXPECT_EQ(elem->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(elem.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+
+ // Map<Int, DecimalV2>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+ auto* k = meta.add_children_columns();
+ k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_MAP);
+ const auto* m = assert_cast<const DataTypeMap*>(dt.get());
+ EXPECT_EQ(unwrap_nullable(m->get_key_type())->get_primitive_type(),
TYPE_INT);
+ auto v = unwrap_nullable(m->get_value_type());
+ EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(v.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ }
+
+ // Struct<DecimalV2, Int>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_STRUCT));
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ auto* second = meta.add_children_columns();
+ second->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_STRUCT);
+ const auto* s = assert_cast<const DataTypeStruct*>(dt.get());
+ ASSERT_EQ(s->get_elements().size(), 2u);
+ EXPECT_EQ(unwrap_nullable(s->get_element(0))->get_primitive_type(),
TYPE_DECIMALV2);
+ EXPECT_EQ(unwrap_nullable(s->get_element(1))->get_primitive_type(),
TYPE_INT);
+ }
+
+ // Nested: Array<Map<Int, DecimalV2>>
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_ARRAY));
+ auto* map_child = meta.add_children_columns();
+ map_child->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_MAP));
+ auto* k = map_child->add_children_columns();
+ k->set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_INT));
+ *map_child->add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY);
+ auto inner =
+ unwrap_nullable(assert_cast<const
DataTypeArray*>(dt.get())->get_nested_type());
+ EXPECT_EQ(inner->get_primitive_type(), TYPE_MAP);
+ auto v = unwrap_nullable(assert_cast<const
DataTypeMap*>(inner.get())->get_value_type());
+ EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2);
+ }
+
+ // AggState (count) with legacy DecimalV2 sub-type
+ {
+ segment_v2::ColumnMetaPB meta;
+ meta.set_type(static_cast<int>(FieldType::OLAP_FIELD_TYPE_AGG_STATE));
+ meta.set_function_name("count");
+ meta.set_result_is_nullable(false);
+ meta.set_be_exec_version(BeExecVersionManager::get_newest_version());
+ *meta.add_children_columns() = make_legacy_decv2_meta();
+ DataTypePtr dt;
+ EXPECT_NO_THROW(dt =
DataTypeFactory::instance().create_data_type(meta));
+ ASSERT_NE(dt, nullptr);
+ EXPECT_EQ(dt->get_primitive_type(), TYPE_AGG_STATE);
+ const auto* agg = assert_cast<const DataTypeAggState*>(dt.get());
+ ASSERT_EQ(agg->get_sub_types().size(), 1u);
+ auto sub = unwrap_nullable(agg->get_sub_types()[0]);
+ EXPECT_EQ(sub->get_primitive_type(), TYPE_DECIMALV2);
+ const auto* decv2 = assert_cast<const DataTypeDecimalV2*>(sub.get());
+ EXPECT_EQ(decv2->get_original_precision(),
BeConsts::MAX_DECIMALV2_PRECISION);
+ EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE);
+ }
+}
+
TEST_F(DataTypeDecimalTest, to_pb_column_meta) {
auto test_func = [](auto dt, PGenericType_TypeId expected_type) {
auto col_meta = std::make_shared<PColumnMeta>();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]