xiaokang commented on code in PR #34089: URL: https://github.com/apache/doris/pull/34089#discussion_r1641601545
########## fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java: ########## @@ -768,7 +773,43 @@ public static Boolean analyzeEnableDuplicateWithoutKeysByDefault(Map<String, Str + " must be `true` or `false`"); } - public static Boolean analyzeStoreRowColumn(Map<String, String> properties) throws AnalysisException { + public static List<String> analyzeRowStoreColumns(Map<String, String> properties, + List<String> columns, + boolean stripProperty) throws AnalysisException { + List<String> rowStoreColumns = Lists.newArrayList(); + String value = properties.get(PROPERTIES_ROW_STORE_COLUMNS); + // set empty row store columns by default + if (null == value) { + return null; + } + if (stripProperty) { + properties.remove(PROPERTIES_ROW_STORE_COLUMNS); + } + String[] rsColumnArr = value.split(COMMA_SEPARATOR); + rowStoreColumns.addAll(Arrays.asList(rsColumnArr)); + if (rowStoreColumns.isEmpty()) { + throw new AnalysisException(PROPERTIES_ROW_STORE_COLUMNS + " must not be empty"); + } + // check columns in column def + List<String> invalidColumns = rowStoreColumns.stream() + .filter(expectedColName -> columns.stream().noneMatch( + column -> column.equalsIgnoreCase(expectedColName))) + .collect(Collectors.toList()); + // if (invalidColumns.size() == 1 && invalidColumns.get(0).equalsIgnoreCase("__all__")) { + // // __all__ represents all the columns are encoded to row store + // rowStoreColumns.clear(); + // return rowStoreColumns; + // + if (!invalidColumns.isEmpty()) { + throw new AnalysisException( + "Column does not exist in table. Invalid columns: " + + invalidColumns.stream().collect(Collectors.joining(", ", "", ""))); + } + return rowStoreColumns; + } + + public static Boolean analyzeStoreRowColumn(Map<String, String> properties, + boolean stripProperty) throws AnalysisException { Review Comment: stripProperty -> removeProperty ########## fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java: ########## @@ -2546,17 +2548,29 @@ private boolean createOlapTable(Database db, CreateTableStmt stmt) throws UserEx } } - boolean storeRowColumn = false; + // analyze row store columns try { - storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(properties); + boolean storeRowColumn = false; + storeRowColumn = PropertyAnalyzer.analyzeStoreRowColumn(properties, true); if (storeRowColumn && !enableLightSchemaChange) { throw new DdlException( "Row store column rely on light schema change, enable light schema change first"); } + olapTable.setStoreRowColumn(storeRowColumn); + List<String> rowStoreColumns; + try { + rowStoreColumns = PropertyAnalyzer.analyzeRowStoreColumns(properties, + baseSchema.stream().map(Column::getName).collect(Collectors.toList()), true); + if (rowStoreColumns != null && rowStoreColumns.isEmpty()) { + rowStoreColumns = null; + } + olapTable.setRowStoreColumns(rowStoreColumns); Review Comment: Do you need to set `olapTable.setStoreRowColumn(true)` here? ########## be/src/olap/base_tablet.h: ########## @@ -26,6 +26,7 @@ #include "olap/rowset/segment_v2/segment.h" #include "olap/tablet_fwd.h" #include "olap/tablet_meta.h" +#include "olap/tablet_schema.h" Review Comment: useless include ########## fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java: ########## @@ -172,7 +172,8 @@ public TAlterTabletReqV2 toThrift() { if (value == null) { List<TColumn> columns = new ArrayList<TColumn>(); for (Column column : baseSchemaColumns) { - columns.add(column.toThrift()); + TColumn tColumn = column.toThrift(); Review Comment: What's the difference? I think it's legacy code after rollback. ########## fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java: ########## @@ -1223,6 +1223,18 @@ public void setBloomFilterInfo(Set<String> bfColumns, double bfFpp) { this.bfFpp = bfFpp; } + public List<Integer> getRowStoreColumnsUniqueIds(List<String> rsColumnNames) { Review Comment: rsColumnNames -> rowStoreColumns ########## fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java: ########## @@ -768,7 +773,43 @@ public static Boolean analyzeEnableDuplicateWithoutKeysByDefault(Map<String, Str + " must be `true` or `false`"); } - public static Boolean analyzeStoreRowColumn(Map<String, String> properties) throws AnalysisException { + public static List<String> analyzeRowStoreColumns(Map<String, String> properties, + List<String> columns, + boolean stripProperty) throws AnalysisException { + List<String> rowStoreColumns = Lists.newArrayList(); + String value = properties.get(PROPERTIES_ROW_STORE_COLUMNS); + // set empty row store columns by default + if (null == value) { + return null; + } + if (stripProperty) { + properties.remove(PROPERTIES_ROW_STORE_COLUMNS); + } + String[] rsColumnArr = value.split(COMMA_SEPARATOR); + rowStoreColumns.addAll(Arrays.asList(rsColumnArr)); + if (rowStoreColumns.isEmpty()) { + throw new AnalysisException(PROPERTIES_ROW_STORE_COLUMNS + " must not be empty"); + } + // check columns in column def + List<String> invalidColumns = rowStoreColumns.stream() + .filter(expectedColName -> columns.stream().noneMatch( + column -> column.equalsIgnoreCase(expectedColName))) + .collect(Collectors.toList()); + // if (invalidColumns.size() == 1 && invalidColumns.get(0).equalsIgnoreCase("__all__")) { Review Comment: delete useless code ########## gensrc/proto/olap_file.proto: ########## @@ -380,6 +380,8 @@ message TabletSchemaPB { optional bool skip_write_index_on_load = 23 [default=false]; repeated int32 cluster_key_idxes = 24; optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 [default=V1]; + // column unique ids for row store columns + repeated int32 row_store_column_cids = 26; Review Comment: suggestion name: row_store_column_ids or row_store_cids ########## fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java: ########## @@ -768,7 +773,43 @@ public static Boolean analyzeEnableDuplicateWithoutKeysByDefault(Map<String, Str + " must be `true` or `false`"); } - public static Boolean analyzeStoreRowColumn(Map<String, String> properties) throws AnalysisException { + public static List<String> analyzeRowStoreColumns(Map<String, String> properties, + List<String> columns, + boolean stripProperty) throws AnalysisException { Review Comment: stripProperty -> removeProperty ########## gensrc/thrift/PaloInternalService.thrift: ########## @@ -301,6 +301,8 @@ struct TQueryOptions { 112: optional i32 max_column_reader_num = 0 113: optional bool enable_local_merge_sort = false; + + 114: optional bool enable_short_circuit_query_access_column_store = false; Review Comment: What's the disvantage if set it default true? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java: ########## @@ -340,7 +340,7 @@ private void getColumns(Plan plan) { if (properties != null) { try { boolean storeRowColumn = - PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties)); + PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), true); Review Comment: Why new map? ########## fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java: ########## @@ -238,13 +245,18 @@ public boolean enableSingleReplicaCompaction() { public TableProperty buildStoreRowColumn() { storeRowColumn = Boolean.parseBoolean( properties.getOrDefault(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN, "false")); - // Remove deprecated prefix and try again - String deprecatedPrefix = "deprecated_"; - if (!storeRowColumn && PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN.startsWith(deprecatedPrefix)) { - storeRowColumn = Boolean.parseBoolean( - properties.getOrDefault( - PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN.substring(deprecatedPrefix.length()), "false")); + return this; + } + + public TableProperty buildRowStoreColumns() { Review Comment: can you call PropertyAnalyzer.analyzeRowStoreColumns() to reuse code. ########## fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java: ########## @@ -446,7 +446,7 @@ public void analyze(Analyzer analyzer) throws UserException { } } // add a hidden column as row store - if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties))) { + if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new HashMap<>(properties), true)) { Review Comment: Why create a new Map? ########## fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java: ########## @@ -60,16 +60,16 @@ public class AlterReplicaTask extends AgentTask { private long expiration; private String vaultId; - /** * AlterReplicaTask constructor. * */ + public AlterReplicaTask(long backendId, long dbId, long tableId, long partitionId, long rollupIndexId, long baseIndexId, long rollupTabletId, long baseTabletId, long newReplicaId, int newSchemaHash, int baseSchemaHash, long version, long jobId, AlterJobV2.JobType jobType, Map<String, Expr> defineExprs, DescriptorTable descTable, List<Column> baseSchemaColumns, Map<Object, Object> objectPool, - Expr whereClause, long expiration, String vaultId) { + Expr whereClause, long expiration, String vaultIds) { Review Comment: typo vaultIds? ########## fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java: ########## @@ -1920,6 +1946,20 @@ public int getAsInt() { } enableLightSchemaChange(db, olapTable); return; + } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN) + || properties.containsKey(PropertyAnalyzer.PROPERTIES_ROW_STORE_COLUMNS)) { + String value = properties.get(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN); + if (value != null && value.equalsIgnoreCase("false")) { + throw new DdlException("Can not alter store_row_column from true to false currently"); Review Comment: User may want to drop row store since it's space consuming. ########## be/src/olap/tablet_schema.h: ########## @@ -474,6 +478,8 @@ class TabletSchema { void update_tablet_columns(const TabletSchema& tablet_schema, const std::vector<TColumn>& t_columns); + const std::vector<int32_t>& row_columns_cids() const { return _rowstore_column_cids; } Review Comment: suggest consistent name: row_store_column_ids() ########## be/src/service/point_query_executor.cpp: ########## @@ -335,23 +406,63 @@ Status PointQueryExecutor::_lookup_row_data() { _reusable->get_data_type_serdes(), _row_read_ctxs[i]._cached_row_data.data().data, _row_read_ctxs[i]._cached_row_data.data().size, _reusable->get_col_uid_to_idx(), - *_result_block, _reusable->get_col_default_values()); + *_result_block, _reusable->get_col_default_values(), + _reusable->include_col_uids()); continue; } if (!_row_read_ctxs[i]._row_location.has_value()) { continue; } std::string value; - RETURN_IF_ERROR(_tablet->lookup_row_data( - _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), - *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(), - _profile_metrics.read_stats, value, - !config::disable_storage_row_cache /*whether write row cache*/)); - // serilize value to block, currently only jsonb row formt - vectorized::JsonbSerializeUtil::jsonb_to_block( - _reusable->get_data_type_serdes(), value.data(), value.size(), - _reusable->get_col_uid_to_idx(), *_result_block, - _reusable->get_col_default_values()); + // fill block by row store + if (_reusable->rs_column_uid() != -1) { + bool use_row_cache = !config::disable_storage_row_cache && Review Comment: If row cache can not be used for partial row store, how much performance is reduced? ########## be/src/service/point_query_executor.cpp: ########## @@ -335,23 +406,63 @@ Status PointQueryExecutor::_lookup_row_data() { _reusable->get_data_type_serdes(), _row_read_ctxs[i]._cached_row_data.data().data, _row_read_ctxs[i]._cached_row_data.data().size, _reusable->get_col_uid_to_idx(), - *_result_block, _reusable->get_col_default_values()); + *_result_block, _reusable->get_col_default_values(), + _reusable->include_col_uids()); continue; } if (!_row_read_ctxs[i]._row_location.has_value()) { continue; } std::string value; - RETURN_IF_ERROR(_tablet->lookup_row_data( - _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), - *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(), - _profile_metrics.read_stats, value, - !config::disable_storage_row_cache /*whether write row cache*/)); - // serilize value to block, currently only jsonb row formt - vectorized::JsonbSerializeUtil::jsonb_to_block( - _reusable->get_data_type_serdes(), value.data(), value.size(), - _reusable->get_col_uid_to_idx(), *_result_block, - _reusable->get_col_default_values()); + // fill block by row store + if (_reusable->rs_column_uid() != -1) { + bool use_row_cache = !config::disable_storage_row_cache && Review Comment: What's the problem if row cache is used for partial row store? I think it's still OK. ########## be/src/olap/tablet_schema.h: ########## @@ -515,6 +521,10 @@ class TabletSchema { bool _store_row_column = false; bool _skip_write_index_on_load = false; InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; + + // Contains column ids of which columns should be encoded into row store. + // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column + std::vector<int32_t> _rowstore_column_cids; Review Comment: suggest consistent name: _row_store_column_ids ########## be/src/olap/tablet_schema.cpp: ########## @@ -1034,7 +1037,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _is_in_memory = ori_tablet_schema.is_in_memory(); _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); - _store_row_column = ori_tablet_schema.store_row_column(); + _store_row_column = ori_tablet_schema._store_row_column; Review Comment: Where is columns ids copied? ########## be/src/olap/tablet_schema.h: ########## @@ -342,8 +344,10 @@ class TabletSchema { _enable_single_replica_compaction = enable_single_replica_compaction; } bool enable_single_replica_compaction() const { return _enable_single_replica_compaction; } - void set_store_row_column(bool store_row_column) { _store_row_column = store_row_column; } - bool store_row_column() const { return _store_row_column; } + // indicate if full row store column(all the columns encodes as row) exists + bool has_full_row_store_column() const { Review Comment: suggest more clear name: has_row_store_for_all_columns() -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org