HappenLee commented on code in PR #26915:
URL: https://github.com/apache/doris/pull/26915#discussion_r1392322161
##########
be/src/vec/sink/vrow_distribution.cpp:
##########
@@ -36,22 +38,38 @@ VRowDistribution::_get_partition_function() {
return {_vpartition->get_part_func_ctx(),
_vpartition->get_partition_function()};
}
-void VRowDistribution::_save_missing_values(vectorized::ColumnPtr col,
- vectorized::DataTypePtr
value_type) {
- _partitions_need_create.clear();
- std::set<std::string> deduper;
- // de-duplication
- for (auto row : _missing_map) {
- deduper.emplace(value_type->to_string(*col, row));
+Status VRowDistribution::_save_missing_values(vectorized::ColumnPtr col,
+ vectorized::DataTypePtr
value_type, Block* block,
+ std::vector<int64_t> filter) {
+ // de-duplication for new partitions but save all rows.
+ for (auto row : filter) {
+ _batching_block->add_row(block, row);
+ auto val_str = value_type->to_string(*col, row);
+ if (!_deduper.contains(val_str)) {
+ _deduper.emplace(val_str);
+ TStringLiteral node;
+ node.value = val_str;
+ _partitions_need_create.emplace_back(std::vector {node}); // only
1 partition column now
+ }
}
- for (auto& value : deduper) {
- TStringLiteral node;
- node.value = value;
- _partitions_need_create.emplace_back(std::vector {node}); // only 1
partition column now
+
+ // to avoid too large mem use
+ if (_batching_rows > _batch_size) {
+ _deal_batched = true;
}
+
+ return Status::OK();
+}
+
+void VRowDistribution::clear_batchings() {
+ _partitions_need_create.clear();
+ _deduper.clear();
+ _batching_block->reset_column_data(); // the columns was moved. add them
again
Review Comment:
why not reuse the mem?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]