This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 5a44f57ba4b [Pick](Row store) allow to set row_store_page_size for tables, change default value to 16KB 5a44f57ba4b is described below commit 5a44f57ba4b1ec5cb65dbd7af79da2b26ac11993 Author: Xr Ling <63634816+lxr...@users.noreply.github.com> AuthorDate: Wed Jul 17 15:19:49 2024 +0800 [Pick](Row store) allow to set row_store_page_size for tables, change default value to 16KB pick #37145 --- be/src/common/config.cpp | 2 -- be/src/common/config.h | 2 -- be/src/olap/rowset/segment_v2/options.h | 2 ++ be/src/olap/rowset/segment_v2/segment_writer.cpp | 2 +- .../rowset/segment_v2/vertical_segment_writer.cpp | 2 +- be/src/olap/tablet_meta.cpp | 3 +++ be/src/olap/tablet_schema.cpp | 4 +++ be/src/olap/tablet_schema.h | 4 +++ .../org/apache/doris/alter/CloudRollupJobV2.java | 3 ++- .../apache/doris/alter/CloudSchemaChangeJobV2.java | 3 ++- .../java/org/apache/doris/alter/RollupJobV2.java | 3 ++- .../org/apache/doris/alter/SchemaChangeJobV2.java | 3 ++- .../java/org/apache/doris/backup/RestoreJob.java | 3 ++- .../main/java/org/apache/doris/catalog/Env.java | 4 +++ .../java/org/apache/doris/catalog/OlapTable.java | 14 ++++++++++ .../org/apache/doris/catalog/TableProperty.java | 13 +++++++++ .../cloud/datasource/CloudInternalCatalog.java | 12 ++++++--- .../apache/doris/common/util/PropertyAnalyzer.java | 29 ++++++++++++++++++++ .../apache/doris/datasource/InternalCatalog.java | 27 ++++++++++++++----- .../org/apache/doris/master/ReportHandler.java | 3 ++- .../org/apache/doris/task/CreateReplicaTask.java | 6 ++++- .../java/org/apache/doris/task/AgentTaskTest.java | 3 ++- gensrc/proto/olap_file.proto | 2 ++ gensrc/thrift/AgentService.thrift | 1 + .../data/query_p0/system/test_table_options.out | 14 +++++----- .../query_p0/system/test_query_sys_tables.groovy | 1 + .../query_p0/system/test_table_options.groovy | 31 ++++++++++++++++++++++ 27 files changed, 166 insertions(+), 30 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 3590c7afd30..98243e74577 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1059,8 +1059,6 @@ DEFINE_mInt64(max_tablet_io_errors, "-1"); DEFINE_Int32(tablet_path_check_interval_seconds, "-1"); DEFINE_mInt32(tablet_path_check_batch_size, "1000"); -// Page size of row column, default 4KB -DEFINE_mInt64(row_column_page_size, "4096"); // it must be larger than or equal to 5MB DEFINE_mInt64(s3_write_buffer_size, "5242880"); // Log interval when doing s3 upload task diff --git a/be/src/common/config.h b/be/src/common/config.h index a52a0357eb1..a1b9a504278 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1102,8 +1102,6 @@ DECLARE_mInt64(max_tablet_io_errors); DECLARE_Int32(tablet_path_check_interval_seconds); DECLARE_mInt32(tablet_path_check_batch_size); -// Page size of row column, default 4KB -DECLARE_mInt64(row_column_page_size); // it must be larger than or equal to 5MB DECLARE_mInt64(s3_write_buffer_size); // Log interval when doing s3 upload task diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 19041f4c51d..93ec03df452 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -24,6 +24,8 @@ namespace segment_v2 { static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M +constexpr long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384; // default row store page size: 16KB + struct PageBuilderOptions { size_t data_page_size = DEFAULT_PAGE_SIZE; diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 729e2500384..26802ed4bab 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -255,7 +255,7 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co if (column.is_row_store_column()) { // smaller page size for row store column - opts.data_page_size = config::row_column_page_size; + opts.data_page_size = _tablet_schema->row_store_page_size(); } std::unique_ptr<ColumnWriter> writer; RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 0930325d6d8..7467e28bd79 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -218,7 +218,7 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo if (column.is_row_store_column()) { // smaller page size for row store column - opts.data_page_size = config::row_column_page_size; + opts.data_page_size = _tablet_schema->row_store_page_size(); } std::unique_ptr<ColumnWriter> writer; RETURN_IF_ERROR(ColumnWriter::create(opts, &column, _file_writer, &writer)); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index cced41a86ee..a3526781ddd 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -317,6 +317,9 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id if (tablet_schema.__isset.store_row_column) { schema->set_store_row_column(tablet_schema.store_row_column); } + if (tablet_schema.__isset.row_store_page_size) { + schema->set_row_store_page_size(tablet_schema.row_store_page_size); + } if (tablet_schema.__isset.skip_write_index_on_load) { schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load); } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index ec887f14a91..c27db2bd3d9 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -990,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _sort_type = schema.sort_type(); _sort_col_num = schema.sort_col_num(); _compression_type = schema.compression_type(); + _row_store_page_size = schema.row_store_page_size(); _schema_version = schema.schema_version(); // Default to V1 inverted index storage format for backward compatibility if not specified in schema. if (!schema.has_inverted_index_storage_format()) { @@ -1050,6 +1051,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); _sort_type = ori_tablet_schema.sort_type(); _sort_col_num = ori_tablet_schema.sort_col_num(); + _row_store_page_size = ori_tablet_schema.row_store_page_size(); // copy from table_schema_param _schema_version = version; @@ -1203,6 +1205,7 @@ void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { tablet_schema_pb->set_sort_col_num(_sort_col_num); tablet_schema_pb->set_schema_version(_schema_version); tablet_schema_pb->set_compression_type(_compression_type); + tablet_schema_pb->set_row_store_page_size(_row_store_page_size); tablet_schema_pb->set_version_col_idx(_version_col_idx); tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign( @@ -1522,6 +1525,7 @@ bool operator==(const TabletSchema& a, const TabletSchema& b) { if (a._disable_auto_compaction != b._disable_auto_compaction) return false; if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false; if (a._store_row_column != b._store_row_column) return false; + if (a._row_store_page_size != b._row_store_page_size) return false; if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false; return true; } diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 3a78f2e4748..8cf6e20208c 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -36,6 +36,7 @@ #include "common/status.h" #include "gutil/stringprintf.h" #include "olap/olap_common.h" +#include "olap/rowset/segment_v2/options.h" #include "runtime/define_primitive_type.h" #include "runtime/descriptors.h" #include "util/string_util.h" @@ -359,6 +360,8 @@ public: void set_version_col_idx(int32_t version_col_idx) { _version_col_idx = version_col_idx; } int32_t version_col_idx() const { return _version_col_idx; } segment_v2::CompressionTypePB compression_type() const { return _compression_type; } + void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; } + long row_store_page_size() const { return _row_store_page_size; } const std::vector<TabletIndex>& indexes() const { return _indexes; } bool has_inverted_index() const { @@ -508,6 +511,7 @@ private: size_t _num_rows_per_row_block = 0; CompressKind _compress_kind = COMPRESS_NONE; segment_v2::CompressionTypePB _compression_type = segment_v2::CompressionTypePB::LZ4F; + long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; size_t _next_column_unique_id = 0; std::string _auto_increment_column; diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java index 5764a8fbc3c..01027eb1396 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java @@ -209,7 +209,8 @@ public class CloudRollupJobV2 extends RollupJobV2 { tbl.getTimeSeriesCompactionEmptyRowsetsThreshold(), tbl.getTimeSeriesCompactionLevelThreshold(), tbl.disableAutoCompaction(), - tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), null); + tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), + null, tbl.rowStorePageSize()); requestBuilder.addTabletMetas(builder); } // end for rollupTablets ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java index 2c7c4c27bff..3589d0265d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java @@ -228,7 +228,8 @@ public class CloudSchemaChangeJobV2 extends SchemaChangeJobV2 { tbl.getTimeSeriesCompactionLevelThreshold(), tbl.disableAutoCompaction(), tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), - tbl.getInvertedIndexFileStorageFormat()); + tbl.getInvertedIndexFileStorageFormat(), + tbl.rowStorePageSize()); requestBuilder.addTabletMetas(builder); } // end for rollupTablets ((CloudInternalCatalog) Env.getCurrentInternalCatalog()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index ef7d7de6307..037139704b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -265,7 +265,8 @@ public class RollupJobV2 extends AlterJobV2 implements GsonPostProcessable { tbl.storeRowColumn(), binlogConfig, tbl.getRowStoreColumnsUniqueIds(tbl.getTableProperty().getCopiedRowStoreColumns()), - objectPool); + objectPool, + tbl.rowStorePageSize()); createReplicaTask.setBaseTablet(tabletIdMap.get(rollupTabletId), baseSchemaHash); if (this.storageFormat != null) { createReplicaTask.setStorageFormat(this.storageFormat); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index a0c7058f535..d58e23ebbbc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -293,7 +293,8 @@ public class SchemaChangeJobV2 extends AlterJobV2 { tbl.storeRowColumn(), binlogConfig, tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), - objectPool); + objectPool, + tbl.rowStorePageSize()); createReplicaTask.setBaseTablet(partitionIndexTabletMap.get(partitionId, shadowIdxId) .get(shadowTabletId), originSchemaHash); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 098c473d32e..adad0a9c5f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -1126,7 +1126,8 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { localTbl.storeRowColumn(), binlogConfig, localTbl.getRowStoreColumnsUniqueIds(rowStoreColumns), - objectPool); + objectPool, + localTbl.rowStorePageSize()); task.setInvertedIndexFileStorageFormat(localTbl.getInvertedIndexFileStorageFormat()); task.setInRestoreMode(true); batchTask.addTask(task); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 700b51b8a9b..92bb62043a7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -3510,6 +3510,10 @@ public class Env { sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN).append("\" = \""); sb.append(olapTable.storeRowColumn()).append("\""); } + + // row store page size + sb.append(",\n\"").append(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE).append("\" = \""); + sb.append(olapTable.rowStorePageSize()).append("\""); } // skip inverted index on load diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 81666227986..a7863fa65d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -2506,6 +2506,20 @@ public class OlapTable extends Table implements MTMVRelatedTableIf, GsonPostProc tableProperty.buildCompressionType(); } + public void setRowStorePageSize(long pageSize) { + TableProperty tableProperty = getOrCreatTableProperty(); + tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE, + Long.valueOf(pageSize).toString()); + tableProperty.buildRowStorePageSize(); + } + + public long rowStorePageSize() { + if (tableProperty != null) { + return tableProperty.rowStorePageSize(); + } + return PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; + } + public void setStorageFormat(TStorageFormat storageFormat) { TableProperty tableProperty = getOrCreatTableProperty(); tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_STORAGE_FORMAT, storageFormat.name()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java index 59c483020fe..8b6a9ec52d1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java @@ -102,6 +102,8 @@ public class TableProperty implements Writable, GsonPostProcessable { private boolean skipWriteIndexOnLoad = false; + private long rowStorePageSize = PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; + private String compactionPolicy = PropertyAnalyzer.SIZE_BASED_COMPACTION_POLICY; private long timeSeriesCompactionGoalSizeMbytes @@ -267,6 +269,17 @@ public class TableProperty implements Writable, GsonPostProcessable { return storeRowColumn; } + public TableProperty buildRowStorePageSize() { + rowStorePageSize = Long.parseLong( + properties.getOrDefault(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE, + Long.toString(PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE))); + return this; + } + + public long rowStorePageSize() { + return rowStorePageSize; + } + public TableProperty buildSkipWriteIndexOnLoad() { skipWriteIndexOnLoad = Boolean.parseBoolean( properties.getOrDefault(PropertyAnalyzer.PROPERTIES_SKIP_WRITE_INDEX_ON_LOAD, "false")); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java index 541b884da14..24e81384b93 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java @@ -101,7 +101,8 @@ public class CloudInternalCatalog extends InternalCatalog { String storagePolicy, IdGeneratorBuffer idGeneratorBuffer, BinlogConfig binlogConfig, - boolean isStorageMediumSpecified, List<Integer> clusterKeyIndexes) + boolean isStorageMediumSpecified, + List<Integer> clusterKeyIndexes, long pageSize) throws DdlException { // create base index first. Preconditions.checkArgument(tbl.getBaseIndexId() != -1); @@ -157,7 +158,7 @@ public class CloudInternalCatalog extends InternalCatalog { } Cloud.CreateTabletsRequest.Builder requestBuilder = Cloud.CreateTabletsRequest.newBuilder(); List<String> rowStoreColumns = - tbl.getTableProperty().getCopiedRowStoreColumns(); + tbl.getTableProperty().getCopiedRowStoreColumns(); for (Tablet tablet : index.getTablets()) { OlapFile.TabletMetaCloudPB.Builder builder = createTabletMetaBuilder(tbl.getId(), indexId, partitionId, tablet, tabletType, schemaHash, keysType, shortKeyColumnCount, @@ -171,7 +172,8 @@ public class CloudInternalCatalog extends InternalCatalog { tbl.getTimeSeriesCompactionLevelThreshold(), tbl.disableAutoCompaction(), tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), - tbl.getInvertedIndexFileStorageFormat()); + tbl.getInvertedIndexFileStorageFormat(), + tbl.rowStorePageSize()); requestBuilder.addTabletMetas(builder); } if (!storageVaultIdSet && ((CloudEnv) Env.getCurrentEnv()).getEnableStorageVault()) { @@ -219,7 +221,7 @@ public class CloudInternalCatalog extends InternalCatalog { Long timeSeriesCompactionTimeThresholdSeconds, Long timeSeriesCompactionEmptyRowsetsThreshold, Long timeSeriesCompactionLevelThreshold, boolean disableAutoCompaction, List<Integer> rowStoreColumnUniqueIds, - TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat) throws DdlException { + TInvertedIndexFileStorageFormat invertedIndexFileStorageFormat, long pageSize) throws DdlException { OlapFile.TabletMetaCloudPB.Builder builder = OlapFile.TabletMetaCloudPB.newBuilder(); builder.setTableId(tableId); builder.setIndexId(indexId); @@ -344,6 +346,8 @@ public class CloudInternalCatalog extends InternalCatalog { schemaBuilder.setInvertedIndexStorageFormat(OlapFile.InvertedIndexStorageFormatPB.V2); } } + schemaBuilder.setRowStorePageSize(pageSize); + OlapFile.TabletSchemaCloudPB schema = schemaBuilder.build(); builder.setSchema(schema); // rowset diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 29ed40676b3..5fe91aac8ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -95,6 +95,10 @@ public class PropertyAnalyzer { public static final String PROPERTIES_TIMEOUT = "timeout"; public static final String PROPERTIES_COMPRESSION = "compression"; + // row store page size, default 16KB + public static final String PROPERTIES_ROW_STORE_PAGE_SIZE = "row_store_page_size"; + public static final long ROW_STORE_PAGE_SIZE_DEFAULT_VALUE = 16384; + public static final String PROPERTIES_ENABLE_LIGHT_SCHEMA_CHANGE = "light_schema_change"; public static final String PROPERTIES_DISTRIBUTION_TYPE = "distribution_type"; @@ -1013,6 +1017,31 @@ public class PropertyAnalyzer { } } + public static long alignTo4K(long size) { + return (size + 4095) & ~4095; + } + + // analyzeRowStorePageSize will parse the row_store_page_size from properties + public static long analyzeRowStorePageSize(Map<String, String> properties) throws AnalysisException { + long rowStorePageSize = ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; + if (properties != null && properties.containsKey(PROPERTIES_ROW_STORE_PAGE_SIZE)) { + String rowStorePageSizeStr = properties.get(PROPERTIES_ROW_STORE_PAGE_SIZE); + try { + rowStorePageSize = alignTo4K(Long.parseLong(rowStorePageSizeStr)); + } catch (NumberFormatException e) { + throw new AnalysisException("Invalid row store page size: " + rowStorePageSizeStr); + } + + if (rowStorePageSize <= 0) { + throw new AnalysisException("Row store page size should larger than 0."); + } + + properties.remove(PROPERTIES_ROW_STORE_PAGE_SIZE); + } + + return rowStorePageSize; + } + // analyzeStorageFormat will parse the storage format from properties // sql: alter table tablet_name set ("storage_format" = "v2") // Use this sql to convert all tablets(base and rollup index) to a new format segment diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 2ffc472fab0..b66b40743dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -1559,6 +1559,10 @@ public class InternalCatalog implements CatalogIf<Database> { properties.put(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN, olapTable.storeRowColumn().toString()); } + if (!properties.containsKey(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE)) { + properties.put(PropertyAnalyzer.PROPERTIES_ROW_STORE_PAGE_SIZE, + Long.toString(olapTable.rowStorePageSize())); + } if (!properties.containsKey(PropertyAnalyzer.PROPERTIES_SKIP_WRITE_INDEX_ON_LOAD)) { properties.put(PropertyAnalyzer.PROPERTIES_SKIP_WRITE_INDEX_ON_LOAD, olapTable.skipWriteIndexOnLoad().toString()); @@ -1692,7 +1696,7 @@ public class InternalCatalog implements CatalogIf<Database> { singlePartitionDesc.isInMemory(), singlePartitionDesc.getTabletType(), storagePolicy, idGeneratorBuffer, - binlogConfig, dataProperty.isStorageMediumSpecified(), null); + binlogConfig, dataProperty.isStorageMediumSpecified(), null, olapTable.rowStorePageSize()); // TODO cluster key ids // check again @@ -1980,7 +1984,8 @@ public class InternalCatalog implements CatalogIf<Database> { String storagePolicy, IdGeneratorBuffer idGeneratorBuffer, BinlogConfig binlogConfig, - boolean isStorageMediumSpecified, List<Integer> clusterKeyIndexes) + boolean isStorageMediumSpecified, + List<Integer> clusterKeyIndexes, long rowStorePageSize) throws DdlException { // create base index first. Preconditions.checkArgument(tbl.getBaseIndexId() != -1); @@ -2064,7 +2069,7 @@ public class InternalCatalog implements CatalogIf<Database> { tbl.getTimeSeriesCompactionLevelThreshold(), tbl.storeRowColumn(), binlogConfig, tbl.getRowStoreColumnsUniqueIds(rowStoreColumns), - objectPool); + objectPool, rowStorePageSize); task.setStorageFormat(tbl.getStorageFormat()); task.setInvertedIndexFileStorageFormat(tbl.getInvertedIndexFileStorageFormat()); @@ -2439,6 +2444,16 @@ public class InternalCatalog implements CatalogIf<Database> { } olapTable.setCompressionType(compressionType); + // get row_store_page_size + long rowStorePageSize = PropertyAnalyzer.ROW_STORE_PAGE_SIZE_DEFAULT_VALUE; + try { + rowStorePageSize = PropertyAnalyzer.analyzeRowStorePageSize(properties); + } catch (AnalysisException e) { + throw new DdlException(e.getMessage()); + } + + olapTable.setRowStorePageSize(rowStorePageSize); + // check data sort properties int keyColumnSize = CollectionUtils.isEmpty(keysDesc.getClusterKeysColumnIds()) ? keysDesc.keysColumnSize() : keysDesc.getClusterKeysColumnIds().size(); @@ -2839,7 +2854,7 @@ public class InternalCatalog implements CatalogIf<Database> { idGeneratorBuffer, binlogConfigForTask, partitionInfo.getDataProperty(partitionId).isStorageMediumSpecified(), - keysDesc.getClusterKeysColumnIds()); + keysDesc.getClusterKeysColumnIds(), olapTable.rowStorePageSize()); afterCreatePartitions(db.getId(), olapTable.getId(), null, olapTable.getIndexIdList(), true); olapTable.addPartition(partition); @@ -2922,7 +2937,7 @@ public class InternalCatalog implements CatalogIf<Database> { partionStoragePolicy, idGeneratorBuffer, binlogConfigForTask, dataProperty.isStorageMediumSpecified(), - keysDesc.getClusterKeysColumnIds()); + keysDesc.getClusterKeysColumnIds(), olapTable.rowStorePageSize()); olapTable.addPartition(partition); olapTable.getPartitionInfo().getDataProperty(partition.getId()) .setStoragePolicy(partionStoragePolicy); @@ -3384,7 +3399,7 @@ public class InternalCatalog implements CatalogIf<Database> { olapTable.getPartitionInfo().getDataProperty(oldPartitionId).getStoragePolicy(), idGeneratorBuffer, binlogConfig, copiedTbl.getPartitionInfo().getDataProperty(oldPartitionId).isStorageMediumSpecified(), - clusterKeyIdxes); + clusterKeyIdxes, olapTable.rowStorePageSize()); newPartitions.add(newPartition); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index d36144af46c..f2cd9ad5843 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -886,7 +886,8 @@ public class ReportHandler extends Daemon { olapTable.storeRowColumn(), binlogConfig, olapTable.getRowStoreColumnsUniqueIds(rowStoreColumns), - objectPool); + objectPool, + olapTable.rowStorePageSize()); createReplicaTask.setIsRecoverTask(true); createReplicaTask.setInvertedIndexFileStorageFormat(olapTable diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java index 022a8be67a9..7864ce2d4c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CreateReplicaTask.java @@ -65,6 +65,7 @@ public class CreateReplicaTask extends AgentTask { private TStorageType storageType; private TStorageMedium storageMedium; private TCompressionType compressionType; + private long rowStorePageSize; private List<Column> columns; @@ -151,7 +152,8 @@ public class CreateReplicaTask extends AgentTask { boolean storeRowColumn, BinlogConfig binlogConfig, List<Integer> rowStoreColumnUniqueIds, - Map<Object, Object> objectPool) { + Map<Object, Object> objectPool, + long rowStorePageSize) { super(null, backendId, TTaskType.CREATE, dbId, tableId, partitionId, indexId, tabletId); this.replicaId = replicaId; @@ -197,6 +199,7 @@ public class CreateReplicaTask extends AgentTask { this.storeRowColumn = storeRowColumn; this.binlogConfig = binlogConfig; this.objectPool = objectPool; + this.rowStorePageSize = rowStorePageSize; } public void setIsRecoverTask(boolean isRecoverTask) { @@ -339,6 +342,7 @@ public class CreateReplicaTask extends AgentTask { tSchema.setEnableSingleReplicaCompaction(enableSingleReplicaCompaction); tSchema.setSkipWriteIndexOnLoad(skipWriteIndexOnLoad); tSchema.setStoreRowColumn(storeRowColumn); + tSchema.setRowStorePageSize(rowStorePageSize); createTabletReq.setTabletSchema(tSchema); createTabletReq.setVersion(version); diff --git a/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java index f2295dcd5bc..5eea07e5ffe 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/task/AgentTaskTest.java @@ -73,6 +73,7 @@ public class AgentTaskTest { private long version = 1L; private TStorageType storageType = TStorageType.COLUMN; + private long rowStorePageSize = 16384L; private List<Column> columns; private MarkedCountDownLatch<Long, Long> latch = new MarkedCountDownLatch<Long, Long>(3); @@ -107,7 +108,7 @@ public class AgentTaskTest { createReplicaTask = new CreateReplicaTask(backendId1, dbId, tableId, partitionId, indexId1, tabletId1, replicaId1, shortKeyNum, schemaHash1, version, KeysType.AGG_KEYS, storageType, TStorageMedium.SSD, columns, null, 0, latch, null, false, TTabletType.TABLET_TYPE_DISK, null, - TCompressionType.LZ4F, false, "", false, false, false, "", 0, 0, 0, 0, 0, false, null, null, objectPool); + TCompressionType.LZ4F, false, "", false, false, false, "", 0, 0, 0, 0, 0, false, null, null, objectPool, rowStorePageSize); // drop dropTask = new DropReplicaTask(backendId1, tabletId1, replicaId1, schemaHash1, false); diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 1f8f88801be..053d4471bc7 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -383,6 +383,7 @@ message TabletSchemaPB { optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; // column unique ids for row store columns repeated int32 row_store_column_unique_ids = 26; + optional int64 row_store_page_size = 27 [default=16384]; } message TabletSchemaCloudPB { @@ -411,6 +412,7 @@ message TabletSchemaCloudPB { optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; // column unique ids for row store columns repeated int32 row_store_column_unique_ids = 26; + optional int64 row_store_page_size = 27 [default=16384]; optional bool is_dynamic_schema = 100 [default=false]; } diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index 76066f9d566..835cbaafaae 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -47,6 +47,7 @@ struct TTabletSchema { 19: optional list<i32> cluster_key_idxes // col unique id for row store column 20: optional list<i32> row_store_col_cids + 21: optional i64 row_store_page_size = 16384; } // this enum stands for different storage format in src_backends diff --git a/regression-test/data/query_p0/system/test_table_options.out b/regression-test/data/query_p0/system/test_table_options.out index 71b7de92f83..66856b811fb 100644 --- a/regression-test/data/query_p0/system/test_table_options.out +++ b/regression-test/data/query_p0/system/test_table_options.out @@ -1,9 +1,11 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select -- -aggregate_table internal test_table_options_db AGG user_id,date,city,age,sex user_id HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"5","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_threshold":"1","time [...] -duplicate_table internal test_table_options_db DUP timestamp,type,error_code type HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"3","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_threshold":"1","time_se [...] -listtable internal test_table_options_db AGG user_id,date,timestamp,city,age,sex user_id HASH 16 3 {"min_load_replica_num":"-1","data_sort.col_num":"6","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_threshold":"1", [...] -randomtable internal test_table_options_db DUP user_id,date,timestamp RANDOM RANDOM 16 1 {"min_load_replica_num":"-1","data_sort.col_num":"3","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_threshold":"1","time_seri [...] -rangetable internal test_table_options_db AGG user_id,date,timestamp,city,age,sex user_id HASH 8 3 {"min_load_replica_num":"-1","data_sort.col_num":"6","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_threshold":"1", [...] -unique_table internal test_table_options_db UNI user_id,username user_id HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"2","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_threshold":"1","time_series_comp [...] +aggregate_table internal test_table_options_db AGG user_id,date,city,age,sex user_id HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"5","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_com [...] +duplicate_table internal test_table_options_db DUP timestamp,type,error_code type HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"3","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compac [...] +listtable internal test_table_options_db AGG user_id,date,timestamp,city,age,sex user_id HASH 16 3 {"min_load_replica_num":"-1","data_sort.col_num":"6","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_serie [...] +randomtable internal test_table_options_db DUP user_id,date,timestamp RANDOM RANDOM 16 1 {"min_load_replica_num":"-1","data_sort.col_num":"3","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compacti [...] +rangetable internal test_table_options_db AGG user_id,date,timestamp,city,age,sex user_id HASH 8 3 {"min_load_replica_num":"-1","data_sort.col_num":"6","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_serie [...] +test_row_column_page_size1 internal test_table_options_db DUP aaa aaa HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"1","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_t [...] +test_row_column_page_size2 internal test_table_options_db DUP aaa aaa HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"1","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_level_t [...] +unique_table internal test_table_options_db UNI user_id,username user_id HASH 1 1 {"min_load_replica_num":"-1","data_sort.col_num":"2","group_commit_interval_ms":"10000","data_sort.sort_type":"LEXICAL","is_being_synced":"false","binlog.enable":"false","enable_mow_light_delete":"false","binlog.ttl_seconds":"86400","inverted_index_storage_format":"V2","time_series_compaction_empty_rowsets_threshold":"5","default.replication_allocation":"tag.location.default: 1","time_series_compaction_leve [...] diff --git a/regression-test/suites/query_p0/system/test_query_sys_tables.groovy b/regression-test/suites/query_p0/system/test_query_sys_tables.groovy index b8f14da041b..72198b32ead 100644 --- a/regression-test/suites/query_p0/system/test_query_sys_tables.groovy +++ b/regression-test/suites/query_p0/system/test_query_sys_tables.groovy @@ -238,6 +238,7 @@ suite("test_query_sys_tables", "query,p0") { AS SELECT ccc as a FROM ${tbName1} """ + sql("use information_schema") qt_views("select TABLE_NAME, VIEW_DEFINITION from views where TABLE_SCHEMA = '${dbName1}'") diff --git a/regression-test/suites/query_p0/system/test_table_options.groovy b/regression-test/suites/query_p0/system/test_table_options.groovy index e53f2ba64e9..dd898d4e208 100644 --- a/regression-test/suites/query_p0/system/test_table_options.groovy +++ b/regression-test/suites/query_p0/system/test_table_options.groovy @@ -153,6 +153,37 @@ suite("test_table_options") { "replication_allocation" = "tag.location.default: 1" ); """ + + // test row column page size + sql """ + CREATE TABLE IF NOT EXISTS test_row_column_page_size1 ( + `aaa` varchar(170) NOT NULL COMMENT "", + `bbb` varchar(20) NOT NULL COMMENT "", + `ccc` INT NULL COMMENT "", + `ddd` SMALLINT NULL COMMENT "" + ) + DISTRIBUTED BY HASH(`aaa`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1", + "store_row_column" = "true" + ); + """ + + sql """ + CREATE TABLE IF NOT EXISTS test_row_column_page_size2 ( + `aaa` varchar(170) NOT NULL COMMENT "", + `bbb` varchar(20) NOT NULL COMMENT "", + `ccc` INT NULL COMMENT "", + `ddd` SMALLINT NULL COMMENT "" + ) + DISTRIBUTED BY HASH(`aaa`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1", + "store_row_column" = "true", + "row_store_page_size" = "8190" + ); + """ + qt_select """select * from information_schema.table_options where table_schema=\"${dbName}\" order by TABLE_NAME; """ sql "drop database if exists ${dbName}" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org