zhannngchen commented on code in PR #12716: URL: https://github.com/apache/doris/pull/12716#discussion_r977151695
########## be/src/runtime/tablets_channel.cpp: ########## @@ -196,77 +196,94 @@ void TabletsChannel::_close_wait(DeltaWriter* writer, template <typename TabletWriterAddResult> Status TabletsChannel::reduce_mem_usage(int64_t mem_limit, TabletWriterAddResult* response) { - std::lock_guard<std::mutex> l(_lock); - if (_state == kFinished) { - // TabletsChannel is closed without LoadChannel's lock, - // therefore it's possible for reduce_mem_usage() to be called right after close() - return _close_status; - } - - // Sort the DeltaWriters by mem consumption in descend order. - std::vector<DeltaWriter*> writers; - for (auto& it : _tablet_writers) { - it.second->save_memtable_consumption_snapshot(); - writers.push_back(it.second); - } - std::sort(writers.begin(), writers.end(), [](const DeltaWriter* lhs, const DeltaWriter* rhs) { - return lhs->get_memtable_consumption_snapshot() > rhs->get_memtable_consumption_snapshot(); - }); + std::vector<DeltaWriter*> writers_to_flush; + { + std::lock_guard<std::mutex> l(_lock); + if (_state == kFinished) { + // TabletsChannel is closed without LoadChannel's lock, + // therefore it's possible for reduce_mem_usage() to be called right after close() + return _close_status; + } - // Decide which writes should be flushed to reduce mem consumption. - // The main idea is to flush at least one third of the mem_limit. - // This is mainly to solve the following scenarios. - // Suppose there are N tablets in this TabletsChannel, and the mem limit is M. - // If the data is evenly distributed, when each tablet memory accumulates to M/N, - // the reduce memory operation will be triggered. - // At this time, the value of M/N may be much smaller than the value of `write_buffer_size`. - // If we flush all the tablets at this time, each tablet will generate a lot of small files. - // So here we only flush part of the tablet, and the next time the reduce memory operation is triggered, - // the tablet that has not been flushed before will accumulate more data, thereby reducing the number of flushes. - - int64_t mem_to_flushed = mem_limit / 3; - int counter = 0; - int64_t sum = 0; - for (auto writer : writers) { - if (writer->memtable_consumption() <= 0) { - break; + // Sort the DeltaWriters by mem consumption in descend order. + std::vector<DeltaWriter*> writers; + for (auto& it : _tablet_writers) { + it.second->save_memtable_consumption_snapshot(); + writers.push_back(it.second); } - ++counter; - sum += writer->memtable_consumption(); - if (sum > mem_to_flushed) { - break; + std::sort(writers.begin(), writers.end(), + [](const DeltaWriter* lhs, const DeltaWriter* rhs) { + return lhs->get_memtable_consumption_snapshot() > + rhs->get_memtable_consumption_snapshot(); + }); + + // Decide which writes should be flushed to reduce mem consumption. + // The main idea is to flush at least one third of the mem_limit. + // This is mainly to solve the following scenarios. + // Suppose there are N tablets in this TabletsChannel, and the mem limit is M. + // If the data is evenly distributed, when each tablet memory accumulates to M/N, + // the reduce memory operation will be triggered. + // At this time, the value of M/N may be much smaller than the value of `write_buffer_size`. + // If we flush all the tablets at this time, each tablet will generate a lot of small files. + // So here we only flush part of the tablet, and the next time the reduce memory operation is triggered, + // the tablet that has not been flushed before will accumulate more data, thereby reducing the number of flushes. + + int64_t mem_to_flushed = mem_limit / 3; Review Comment: Offline discussed with xinyiZzz, some conclusion: 1. change the mem_limit / 3 to 'tabletsChannel's mem comsumption' /3, which can avoid too many small segments 2. change the default value of soft limit from 80% to 50%, which can trigger the mem table flush more frequently, and still friendly to large load than old policy. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org