HangyuanLiu commented on a change in pull request #3739: URL: https://github.com/apache/incubator-doris/pull/3739#discussion_r439233209
########## File path: be/src/olap/schema_change.cpp ########## @@ -194,6 +188,105 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) break; \ } + +bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(ref_field_idx); + BitmapValue bitmap; + if (!read_helper->is_null(ref_field_idx)) { + uint64_t origin_value; + char *src = read_helper->cell_ptr(ref_field_idx); + switch (ref_column.type()) { + case OLAP_FIELD_TYPE_TINYINT: + if (*(int8_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int8_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int8_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + origin_value = *(uint8_t *) src; + break; + case OLAP_FIELD_TYPE_SMALLINT: + if (*(int16_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int16_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int16_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + origin_value = *(uint16_t *) src; + break; + case OLAP_FIELD_TYPE_INT: + if (*(int32_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int32_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int32_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_INT: + origin_value = *(uint32_t *) src; + break; + case OLAP_FIELD_TYPE_BIGINT: + if (*(int64_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int64_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int64_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + origin_value = *(uint64_t *) src; + break; + default: + LOG(WARNING) << "the column type which was altered from was unsupported." + << " from_type=" + << ref_column.type(); + return false; + } + bitmap.add(origin_value); + } + char *buf = reinterpret_cast<char *>(mem_pool->allocate(bitmap.getSizeInBytes())); + Slice dst(buf, bitmap.getSizeInBytes()); + bitmap.write(dst.data); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool hll_hash(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + HyperLogLog hll; + if (!read_helper->is_null(ref_field_idx)) { + Slice src; + if (ref_column.type() != OLAP_FIELD_TYPE_VARCHAR) { + src.data = read_helper->cell_ptr(ref_field_idx); + src.size = ref_column.length(); + } else { + src = *reinterpret_cast<Slice *>(read_helper->cell_ptr(ref_field_idx)); + } + uint64_t hash_value = HashUtil::murmur_hash64A(src.data, src.size, HashUtil::MURMUR_SEED); + hll.update(hash_value); + } + std::string buf; + buf.resize(hll.max_serialized_size()); + buf.resize(hll.serialize((uint8_t *) buf.c_str())); + Slice dst(buf); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool count(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + int64_t count = read_helper->is_null(field_idx) ? 0 : 1; Review comment: I think the type of count should be int64_t ,because the result type of count is BIGINT。So we shoul keep set_field_content is 8 byte. ########## File path: be/src/olap/schema_change.cpp ########## @@ -194,6 +188,105 @@ ColumnMapping* RowBlockChanger::get_mutable_column_mapping(size_t column_index) break; \ } + +bool to_bitmap(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(ref_field_idx); + BitmapValue bitmap; + if (!read_helper->is_null(ref_field_idx)) { + uint64_t origin_value; + char *src = read_helper->cell_ptr(ref_field_idx); + switch (ref_column.type()) { + case OLAP_FIELD_TYPE_TINYINT: + if (*(int8_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int8_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int8_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + origin_value = *(uint8_t *) src; + break; + case OLAP_FIELD_TYPE_SMALLINT: + if (*(int16_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int16_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int16_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + origin_value = *(uint16_t *) src; + break; + case OLAP_FIELD_TYPE_INT: + if (*(int32_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int32_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int32_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_INT: + origin_value = *(uint32_t *) src; + break; + case OLAP_FIELD_TYPE_BIGINT: + if (*(int64_t *) src < 0) { + LOG(WARNING) << "The input: " << *(int64_t *) src + << " is not valid, to_bitmap only support bigint value from 0 to 18446744073709551615 currently"; + return false; + } + origin_value = *(int64_t *) src; + break; + case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + origin_value = *(uint64_t *) src; + break; + default: + LOG(WARNING) << "the column type which was altered from was unsupported." + << " from_type=" + << ref_column.type(); + return false; + } + bitmap.add(origin_value); + } + char *buf = reinterpret_cast<char *>(mem_pool->allocate(bitmap.getSizeInBytes())); + Slice dst(buf, bitmap.getSizeInBytes()); + bitmap.write(dst.data); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool hll_hash(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + HyperLogLog hll; + if (!read_helper->is_null(ref_field_idx)) { + Slice src; + if (ref_column.type() != OLAP_FIELD_TYPE_VARCHAR) { + src.data = read_helper->cell_ptr(ref_field_idx); + src.size = ref_column.length(); + } else { + src = *reinterpret_cast<Slice *>(read_helper->cell_ptr(ref_field_idx)); + } + uint64_t hash_value = HashUtil::murmur_hash64A(src.data, src.size, HashUtil::MURMUR_SEED); + hll.update(hash_value); + } + std::string buf; + buf.resize(hll.max_serialized_size()); + buf.resize(hll.serialize((uint8_t *) buf.c_str())); + Slice dst(buf); + write_helper->set_field_content(field_idx, reinterpret_cast<char *>(&dst), mem_pool); + return true; +} + +bool count(RowCursor* read_helper, RowCursor* write_helper, const TabletColumn& ref_column, + int field_idx, int ref_field_idx, MemPool* mem_pool) { + write_helper->set_not_null(field_idx); + int64_t count = read_helper->is_null(field_idx) ? 0 : 1; Review comment: I think the type of count should be int64_t ,because the result type of count is BIGINT。So we should keep set_field_content is 8 byte. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org