morningman commented on code in PR #45881: URL: https://github.com/apache/doris/pull/45881#discussion_r1901774931
########## be/src/vec/exec/format/column_type_convert.h: ########## @@ -234,9 +239,14 @@ struct SafeCastString<TYPE_TINYINT> { PrimitiveTypeTraits<TYPE_TINYINT>::ColumnType::value_type* value) { int32 cast_to_int = 0; bool can_cast = safe_strto32(startptr, buffer_size, &cast_to_int); - *value = cast_to_int; - return can_cast && cast_to_int <= std::numeric_limits<int8>::max() && - cast_to_int >= std::numeric_limits<int8>::min(); + if (can_cast && cast_to_int <= std::numeric_limits<int8>::max() && + cast_to_int >= std::numeric_limits<int8>::min()) { + // has checked the cast_to_int is in the range of int8 + *value = cast_set<int8_t>(cast_to_int); Review Comment: Is this a bug? Looks like it does not equal to previous logic ########## be/src/vec/exec/format/column_type_convert.h: ########## @@ -247,9 +257,14 @@ struct SafeCastString<TYPE_SMALLINT> { PrimitiveTypeTraits<TYPE_SMALLINT>::ColumnType::value_type* value) { int32 cast_to_int = 0; bool can_cast = safe_strto32(startptr, buffer_size, &cast_to_int); - *value = cast_to_int; - return can_cast && cast_to_int <= std::numeric_limits<int16>::max() && - cast_to_int >= std::numeric_limits<int16>::min(); + if (can_cast && cast_to_int <= std::numeric_limits<int16>::max() && Review Comment: ditto ########## be/src/vec/exec/format/column_type_convert.h: ########## @@ -389,14 +404,15 @@ class CastStringConverter : public ColumnTypeConverter { bool can_cast = false; if constexpr (is_decimal_type_const<DstPrimitiveType>()) { can_cast = SafeCastDecimalString<DstPrimitiveType>::safe_cast_string( - string_value.data, string_value.size, &value, + string_value.data, cast_set<int>(string_value.size), &value, Review Comment: how to make sure it can cast to `int`? ########## be/src/vec/exec/format/json/new_json_reader.h: ########## @@ -230,10 +222,10 @@ class NewJsonReader : public GenericReader { bool _skip_first_line; std::string _line_delimiter; - int _line_delimiter_length; + size_t _line_delimiter_length; - int _next_row; - int _total_rows; + uint32_t _next_row; Review Comment: it is stranger that `_next_row` is using `uint32`, but `_total_rows` is using `size_t`? ########## be/src/vec/exec/format/table/transactional_hive_reader.h: ########## @@ -57,15 +55,15 @@ class TransactionalHiveReader : public TableFormatReader { public: struct AcidRowID { int64_t original_transaction; - int32_t bucket; + int64_t bucket; int64_t row_id; struct Hash { size_t operator()(const AcidRowID& transactional_row_id) const { size_t hash_value = 0; hash_value ^= std::hash<int64_t> {}(transactional_row_id.original_transaction) + 0x9e3779b9 + (hash_value << 6) + (hash_value >> 2); - hash_value ^= std::hash<int32_t> {}(transactional_row_id.bucket) + 0x9e3779b9 + + hash_value ^= std::hash<int64_t> {}(transactional_row_id.bucket) + 0x9e3779b9 + Review Comment: Why changing to int64? Does it affect the hash result? ########## be/src/vec/exec/format/orc/vorc_reader.cpp: ########## @@ -2081,19 +2080,17 @@ void OrcReader::_fill_batch_vec(std::vector<orc::ColumnVectorBatch*>& result, void OrcReader::_build_delete_row_filter(const Block* block, size_t rows) { // transactional hive orc delete row if (_delete_rows != nullptr) { - _delete_rows_filter_ptr.reset(new IColumn::Filter(rows, 1)); + _delete_rows_filter_ptr = std::make_unique<IColumn::Filter>(rows, 1); auto* __restrict _pos_delete_filter_data = _delete_rows_filter_ptr->data(); - const ColumnInt64& original_transaction_column = - assert_cast<const ColumnInt64&>(*remove_nullable( - block->get_by_name(TransactionalHive::ORIGINAL_TRANSACTION_LOWER_CASE) - .column)); - const ColumnInt32& bucket_id_column = assert_cast<const ColumnInt32&>( + const auto& original_transaction_column = assert_cast<const ColumnInt64&>(*remove_nullable( + block->get_by_name(TransactionalHive::ORIGINAL_TRANSACTION_LOWER_CASE).column)); + const auto& bucket_id_column = assert_cast<const ColumnInt32&>( *remove_nullable(block->get_by_name(TransactionalHive::BUCKET_LOWER_CASE).column)); - const ColumnInt64& row_id_column = assert_cast<const ColumnInt64&>( + const auto& row_id_column = assert_cast<const ColumnInt64&>( *remove_nullable(block->get_by_name(TransactionalHive::ROW_ID_LOWER_CASE).column)); for (int i = 0; i < rows; ++i) { Int64 original_transaction = original_transaction_column.get_int(i); - Int32 bucket_id = bucket_id_column.get_int(i); + Int64 bucket_id = bucket_id_column.get_int(i); Review Comment: Why using `Int64`, instead of `int64_t`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org