xinyiZzz commented on code in PR #12716:
URL: https://github.com/apache/doris/pull/12716#discussion_r976311390
##########
be/src/runtime/tablets_channel.cpp:
##########
@@ -196,77 +196,94 @@ void TabletsChannel::_close_wait(DeltaWriter* writer,
template <typename TabletWriterAddResult>
Status TabletsChannel::reduce_mem_usage(int64_t mem_limit,
TabletWriterAddResult* response) {
- std::lock_guard<std::mutex> l(_lock);
- if (_state == kFinished) {
- // TabletsChannel is closed without LoadChannel's lock,
- // therefore it's possible for reduce_mem_usage() to be called right
after close()
- return _close_status;
- }
-
- // Sort the DeltaWriters by mem consumption in descend order.
- std::vector<DeltaWriter*> writers;
- for (auto& it : _tablet_writers) {
- it.second->save_memtable_consumption_snapshot();
- writers.push_back(it.second);
- }
- std::sort(writers.begin(), writers.end(), [](const DeltaWriter* lhs, const
DeltaWriter* rhs) {
- return lhs->get_memtable_consumption_snapshot() >
rhs->get_memtable_consumption_snapshot();
- });
+ std::vector<DeltaWriter*> writers_to_flush;
+ {
+ std::lock_guard<std::mutex> l(_lock);
+ if (_state == kFinished) {
+ // TabletsChannel is closed without LoadChannel's lock,
+ // therefore it's possible for reduce_mem_usage() to be called
right after close()
+ return _close_status;
+ }
- // Decide which writes should be flushed to reduce mem consumption.
- // The main idea is to flush at least one third of the mem_limit.
- // This is mainly to solve the following scenarios.
- // Suppose there are N tablets in this TabletsChannel, and the mem limit
is M.
- // If the data is evenly distributed, when each tablet memory accumulates
to M/N,
- // the reduce memory operation will be triggered.
- // At this time, the value of M/N may be much smaller than the value of
`write_buffer_size`.
- // If we flush all the tablets at this time, each tablet will generate a
lot of small files.
- // So here we only flush part of the tablet, and the next time the reduce
memory operation is triggered,
- // the tablet that has not been flushed before will accumulate more data,
thereby reducing the number of flushes.
-
- int64_t mem_to_flushed = mem_limit / 3;
- int counter = 0;
- int64_t sum = 0;
- for (auto writer : writers) {
- if (writer->memtable_consumption() <= 0) {
- break;
+ // Sort the DeltaWriters by mem consumption in descend order.
+ std::vector<DeltaWriter*> writers;
+ for (auto& it : _tablet_writers) {
+ it.second->save_memtable_consumption_snapshot();
+ writers.push_back(it.second);
}
- ++counter;
- sum += writer->memtable_consumption();
- if (sum > mem_to_flushed) {
- break;
+ std::sort(writers.begin(), writers.end(),
+ [](const DeltaWriter* lhs, const DeltaWriter* rhs) {
+ return lhs->get_memtable_consumption_snapshot() >
+ rhs->get_memtable_consumption_snapshot();
+ });
+
+ // Decide which writes should be flushed to reduce mem consumption.
+ // The main idea is to flush at least one third of the mem_limit.
+ // This is mainly to solve the following scenarios.
+ // Suppose there are N tablets in this TabletsChannel, and the mem
limit is M.
+ // If the data is evenly distributed, when each tablet memory
accumulates to M/N,
+ // the reduce memory operation will be triggered.
+ // At this time, the value of M/N may be much smaller than the value
of `write_buffer_size`.
+ // If we flush all the tablets at this time, each tablet will generate
a lot of small files.
+ // So here we only flush part of the tablet, and the next time the
reduce memory operation is triggered,
+ // the tablet that has not been flushed before will accumulate more
data, thereby reducing the number of flushes.
+
+ int64_t mem_to_flushed = mem_limit / 3;
Review Comment:
It seems reasonable to choose the largest tablet channel among the largest
load channels, since there are usually only 1 or very few tablet channels.
Changing `mem_limit / 3` here to `tablet channel mem usage/3` seems to be
fine.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]