EmmyMiao87 commented on a change in pull request #3739: URL: https://github.com/apache/incubator-doris/pull/3739#discussion_r439912521
########## File path: be/src/olap/schema_change.cpp ########## @@ -194,6 +188,105 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) break; \ } + +bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(ref_field_idx); + BitmapValue bitmap; + if (!read_helper->is_null(ref_field_idx)) { + uint64_t origin_value; + char *src = read_helper->cell_ptr(ref_field_idx); + switch (ref_column.type()) { + case OLAP_FIELD_TYPE_TINYINT: + if (*(int8_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int8_t *) src Review comment: > How about return Status with this error message? > And do not print log here, it may lead to lots of log, and this error info should return to user. +1 ########## File path: be/src/olap/schema_change.cpp ########## @@ -194,6 +188,105 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) break; \ } + +bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(ref_field_idx); + BitmapValue bitmap; + if (!read_helper->is_null(ref_field_idx)) { + uint64_t origin_value; + char *src = read_helper->cell_ptr(ref_field_idx); + switch (ref_column.type()) { + case OLAP_FIELD_TYPE_TINYINT: + if (*(int8_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int8_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int8_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + origin_value = *(uint8_t *) src; + break; + case OLAP_FIELD_TYPE_SMALLINT: + if (*(int16_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int16_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int16_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + origin_value = *(uint16_t *) src; + break; + case OLAP_FIELD_TYPE_INT: + if (*(int32_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int32_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int32_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_INT: + origin_value = *(uint32_t *) src; + break; + case OLAP_FIELD_TYPE_BIGINT: + if (*(int64_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int64_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int64_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + origin_value = *(uint64_t *) src; + break; + default: + LOG(WARNING) << "the column type which was altered from was unsupported." + << " from_type=" + << ref_column.type(); + return false; + } + bitmap.add(origin_value); + } + char *buf = reinterpret_cast<char *>(mem_pool->allocate(bitmap.getSizeInBytes())); + Slice dst(buf, bitmap.getSizeInBytes()); + bitmap.write(dst.data); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool hll_hash(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + HyperLogLog hll; + if (!read_helper->is_null(ref_field_idx)) { Review comment: If the read_field is null, the hll could be null also. Same as bitmap. ########## File path: be/src/olap/schema_change.cpp ########## @@ -266,6 +359,33 @@ bool RowBlockChanger::change_row_block( int32_t ref_column = _schema_mapping[i].ref_column; if (_schema_mapping[i].ref_column >= 0) { + if (!_schema_mapping[i].materialized_function.empty()) { + bool (*_do_materialized_transform) (RowCursor*, RowCursor*, const TabletColumn&, int, int, MemPool* ); + if (_schema_mapping[i].materialized_function == "to_bitmap") { + _do_materialized_transform = to_bitmap; + } else if (_schema_mapping[i].materialized_function == "hll_hash") { + _do_materialized_transform = hll_hash; + } else if (_schema_mapping[i].materialized_function == "count") { + _do_materialized_transform = count; Review comment: How about count(*)? ########## File path: be/src/olap/schema_change.cpp ########## @@ -194,6 +188,105 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) break; \ } + +bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(ref_field_idx); + BitmapValue bitmap; + if (!read_helper->is_null(ref_field_idx)) { + uint64_t origin_value; + char *src = read_helper->cell_ptr(ref_field_idx); + switch (ref_column.type()) { + case OLAP_FIELD_TYPE_TINYINT: + if (*(int8_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int8_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int8_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + origin_value = *(uint8_t *) src; + break; + case OLAP_FIELD_TYPE_SMALLINT: + if (*(int16_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int16_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int16_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + origin_value = *(uint16_t *) src; + break; + case OLAP_FIELD_TYPE_INT: + if (*(int32_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int32_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int32_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_INT: + origin_value = *(uint32_t *) src; + break; + case OLAP_FIELD_TYPE_BIGINT: + if (*(int64_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int64_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int64_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + origin_value = *(uint64_t *) src; + break; + default: + LOG(WARNING) << "the column type which was altered from was unsupported." + << " from_type=" + << ref_column.type(); + return false; + } + bitmap.add(origin_value); + } + char *buf = reinterpret_cast<char *>(mem_pool->allocate(bitmap.getSizeInBytes())); + Slice dst(buf, bitmap.getSizeInBytes()); + bitmap.write(dst.data); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool hll_hash(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + HyperLogLog hll; + if (!read_helper->is_null(ref_field_idx)) { + Slice src; + if (ref_column.type() != OLAP_FIELD_TYPE_VARCHAR) { + src.data = read_helper->cell_ptr(ref_field_idx); + src.size = ref_column.length(); + } else { + src = *reinterpret_cast<Slice *>(read_helper->cell_ptr(ref_field_idx)); + } + uint64_t hash_value = HashUtil::murmur_hash64A(src.data, src.size, HashUtil::MURMUR_SEED); + hll.update(hash_value); + } + std::string buf; + buf.resize(hll.max_serialized_size()); + buf.resize(hll.serialize((uint8_t *) buf.c_str())); + Slice dst(buf); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool count(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, Review comment: How do you distinguish count(k1) and count(*) ########## File path: be/src/olap/schema_change.cpp ########## @@ -194,6 +188,105 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) break; \ } + +bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(ref_field_idx); + BitmapValue bitmap; + if (!read_helper->is_null(ref_field_idx)) { + uint64_t origin_value; + char *src = read_helper->cell_ptr(ref_field_idx); + switch (ref_column.type()) { + case OLAP_FIELD_TYPE_TINYINT: + if (*(int8_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int8_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int8_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + origin_value = *(uint8_t *) src; + break; + case OLAP_FIELD_TYPE_SMALLINT: + if (*(int16_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int16_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int16_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + origin_value = *(uint16_t *) src; + break; + case OLAP_FIELD_TYPE_INT: + if (*(int32_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int32_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int32_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_INT: + origin_value = *(uint32_t *) src; + break; + case OLAP_FIELD_TYPE_BIGINT: + if (*(int64_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int64_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int64_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + origin_value = *(uint64_t *) src; + break; + default: + LOG(WARNING) << "the column type which was altered from was unsupported." + << " from_type=" + << ref_column.type(); + return false; + } + bitmap.add(origin_value); + } + char *buf = reinterpret_cast<char *>(mem_pool->allocate(bitmap.getSizeInBytes())); + Slice dst(buf, bitmap.getSizeInBytes()); + bitmap.write(dst.data); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool hll_hash(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + HyperLogLog hll; + if (!read_helper->is_null(ref_field_idx)) { + Slice src; + if (ref_column.type() != OLAP_FIELD_TYPE_VARCHAR) { + src.data = read_helper->cell_ptr(ref_field_idx); + src.size = ref_column.length(); + } else { + src = *reinterpret_cast<Slice *>(read_helper->cell_ptr(ref_field_idx)); + } + uint64_t hash_value = HashUtil::murmur_hash64A(src.data, src.size, HashUtil::MURMUR_SEED); + hll.update(hash_value); + } + std::string buf; + buf.resize(hll.max_serialized_size()); + buf.resize(hll.serialize((uint8_t *) buf.c_str())); + Slice dst(buf); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool count(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + int64_t count = read_helper->is_null(field_idx) ? 0 : 1; Review comment: The value of count can only be positive, why don't you apply `uint`? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org