This is an automated email from the ASF dual-hosted git repository. lichaoyong pushed a commit to branch branch-0.12 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/branch-0.12 by this push: new f4473fc Fix bug when use ZoneMap/BloomFiter on column with REPLACE/REPLACE_IF_NOT_NULL (#3288) f4473fc is described below commit f4473fce0a492d585a96843336b917d24e5c6ff2 Author: lichaoyong <lichaoy...@baidu.com> AuthorDate: Fri Apr 10 10:22:21 2020 +0800 Fix bug when use ZoneMap/BloomFiter on column with REPLACE/REPLACE_IF_NOT_NULL (#3288) Now, column with REPLACE/REPLACE_IF_NOT_NULL can be filtered by ZoneMap/BloomFilter when the rowset is base(version starts with zero). Always we think is an optimization. But when some case, it will occurs bug. create table test( k1 int, v1 int replace, v2 int sum ); If I have two records on different two versions 1 2 2 on version [0-10] 1 3 1 on version 11 If I perform a query select * from test where k1 = 1 and v1 = 3; The result will be 1 3 1, this is not right because of the first record is filtered. The right answer is 1 3 3, the v2 should be summed. Remove this optimization is necessity to make the result is right. --- be/src/olap/rowset/beta_rowset_writer.cpp | 1 - be/src/olap/rowset/segment_reader.cpp | 10 ++-------- be/src/olap/rowset/segment_v2/segment_writer.cpp | 16 ---------------- be/src/olap/rowset/segment_v2/segment_writer.h | 2 -- be/test/olap/rowset/segment_v2/segment_test.cpp | 4 +--- .../org/apache/doris/common/util/PropertyAnalyzer.java | 4 +--- .../org/apache/doris/common/PropertyAnalyzerTest.java | 4 ++-- 7 files changed, 6 insertions(+), 35 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 11e2d78..c75f056 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -192,7 +192,6 @@ OLAPStatus BetaRowsetWriter::_create_segment_writer() { DCHECK(wblock != nullptr); segment_v2::SegmentWriterOptions writer_options; - writer_options.whether_to_filter_value = _context.version.first == 0; _segment_writer.reset(new segment_v2::SegmentWriter( wblock.get(), _num_segment, _context.tablet_schema, writer_options)); _wblocks.push_back(std::move(wblock)); diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 8c74fa5..b6dddbf 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -491,10 +491,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b for (auto& i : _conditions->columns()) { FieldAggregationMethod aggregation = _get_aggregation_by_index(i.first); - bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE - || ((aggregation == OLAP_FIELD_AGGREGATION_REPLACE || - aggregation == OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL) - && _segment_group->version().first == 0)); + bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE); if (!is_continue) { continue; } @@ -533,10 +530,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b for (uint32_t i : _load_bf_columns) { FieldAggregationMethod aggregation = _get_aggregation_by_index(i); - bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE - || ((aggregation == OLAP_FIELD_AGGREGATION_REPLACE || - aggregation == OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL) - && _segment_group->version().first == 0)); + bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE); if (!is_continue) { continue; } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 5a354f2..a93eaf9 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -71,25 +71,9 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) { } if (column.is_bf_column()) { opts.need_bloom_filter = true; - if ((column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE - || column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL) - && !_opts.whether_to_filter_value) { - // if the column's Aggregation type is OLAP_FIELD_AGGREGATION_REPLACE or - // OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL and the segment is not in base rowset, - // do not write the bloom filter index because it is useless - opts.need_bloom_filter = false; - } } if (column.has_bitmap_index()) { opts.need_bitmap_index = true; - if ((column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE - || column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL) - && !_opts.whether_to_filter_value) { - // if the column's Aggregation type is OLAP_FIELD_AGGREGATION_REPLACE or - // OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL and the segment is not in base rowset, - // do not write the bitmap index because it is useless - opts.need_bitmap_index = false; - } } std::unique_ptr<ColumnWriter> writer( diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 2040714..4703f68 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -46,8 +46,6 @@ extern const uint32_t k_segment_magic_length; struct SegmentWriterOptions { uint32_t num_rows_per_block = 1024; - // whether to filter value column against bloom filter/zone map - bool whether_to_filter_value = false; }; class SegmentWriter { diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp index 5d40baf..a6a1638 100644 --- a/be/test/olap/rowset/segment_v2/segment_test.cpp +++ b/be/test/olap/rowset/segment_v2/segment_test.cpp @@ -1124,14 +1124,12 @@ TEST_F(SegmentReaderWriterTest, TestBloomFilterIndexUniqueModel) { // for not base segment SegmentWriterOptions opts1; - opts1.whether_to_filter_value = false; shared_ptr<Segment> seg1; build_segment(opts1, schema, schema, 100, DefaultIntGenerator, &seg1); - ASSERT_FALSE(column_contains_index(seg1->footer().columns(3), BLOOM_FILTER_INDEX)); + ASSERT_TRUE(column_contains_index(seg1->footer().columns(3), BLOOM_FILTER_INDEX)); // for base segment SegmentWriterOptions opts2; - opts2.whether_to_filter_value = true; shared_ptr<Segment> seg2; build_segment(opts2, schema, schema, 100, DefaultIntGenerator, &seg2); ASSERT_TRUE(column_contains_index(seg2->footer().columns(3), BLOOM_FILTER_INDEX)); diff --git a/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index e645727..f2a2a0d 100644 --- a/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -284,9 +284,7 @@ public class PropertyAnalyzer { throw new AnalysisException(type + " is not supported in bloom filter index. " + "invalid column: " + bfColumn); } else if (column.isKey() - || column.getAggregationType() == AggregateType.NONE - || column.getAggregationType() == AggregateType.REPLACE - || column.getAggregationType() == AggregateType.REPLACE_IF_NOT_NULL) { + || column.getAggregationType() == AggregateType.NONE) { if (!bfColumnSet.add(bfColumn)) { throw new AnalysisException("Reduplicated bloom filter column: " + bfColumn); } diff --git a/fe/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java b/fe/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java index 45430e4..3aaaf20 100644 --- a/fe/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java +++ b/fe/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java @@ -49,10 +49,10 @@ public class PropertyAnalyzerTest { columns.get(1).setIsKey(true); Map<String, String> properties = Maps.newHashMap(); - properties.put(PropertyAnalyzer.PROPERTIES_BF_COLUMNS, "k1,v1"); + properties.put(PropertyAnalyzer.PROPERTIES_BF_COLUMNS, "k1"); Set<String> bfColumns = PropertyAnalyzer.analyzeBloomFilterColumns(properties, columns); - Assert.assertEquals(Sets.newHashSet("k1", "v1"), bfColumns); + Assert.assertEquals(Sets.newHashSet("k1"), bfColumns); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org