xiaokang commented on code in PR #34089:
URL: https://github.com/apache/doris/pull/34089#discussion_r1641601545


##########
fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java:
##########
@@ -768,7 +773,43 @@ public static Boolean 
analyzeEnableDuplicateWithoutKeysByDefault(Map<String, Str
                 + " must be `true` or `false`");
     }
 
-    public static Boolean analyzeStoreRowColumn(Map<String, String> 
properties) throws AnalysisException {
+    public static List<String> analyzeRowStoreColumns(Map<String, String> 
properties,
+            List<String> columns,
+             boolean stripProperty) throws AnalysisException {
+        List<String> rowStoreColumns = Lists.newArrayList();
+        String value = properties.get(PROPERTIES_ROW_STORE_COLUMNS);
+        // set empty row store columns by default
+        if (null == value) {
+            return null;
+        }
+        if (stripProperty) {
+            properties.remove(PROPERTIES_ROW_STORE_COLUMNS);
+        }
+        String[] rsColumnArr = value.split(COMMA_SEPARATOR);
+        rowStoreColumns.addAll(Arrays.asList(rsColumnArr));
+        if (rowStoreColumns.isEmpty()) {
+            throw new AnalysisException(PROPERTIES_ROW_STORE_COLUMNS + " must 
not be empty");
+        }
+        // check columns in column def
+        List<String> invalidColumns = rowStoreColumns.stream()
+                .filter(expectedColName -> columns.stream().noneMatch(
+                        column -> column.equalsIgnoreCase(expectedColName)))
+                .collect(Collectors.toList());
+        // if (invalidColumns.size() == 1 && 
invalidColumns.get(0).equalsIgnoreCase("__all__")) {
+        //     // __all__ represents all the columns are encoded to row store
+        //     rowStoreColumns.clear();
+        //     return rowStoreColumns;
+        //
+        if (!invalidColumns.isEmpty()) {
+            throw new AnalysisException(
+                    "Column does not exist in table. Invalid columns: "
+                            + 
invalidColumns.stream().collect(Collectors.joining(", ", "", "")));
+        }
+        return rowStoreColumns;
+    }
+
+    public static Boolean analyzeStoreRowColumn(Map<String, String> properties,
+                       boolean stripProperty) throws AnalysisException {

Review Comment:
   stripProperty -> removeProperty



##########
fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java:
##########
@@ -2546,17 +2548,29 @@ private boolean createOlapTable(Database db, 
CreateTableStmt stmt) throws UserEx
             }
         }
 
-        boolean storeRowColumn = false;
+        // analyze row store columns
         try {
-            storeRowColumn = 
PropertyAnalyzer.analyzeStoreRowColumn(properties);
+            boolean storeRowColumn = false;
+            storeRowColumn = 
PropertyAnalyzer.analyzeStoreRowColumn(properties, true);
             if (storeRowColumn && !enableLightSchemaChange) {
                 throw new DdlException(
                         "Row store column rely on light schema change, enable 
light schema change first");
             }
+            olapTable.setStoreRowColumn(storeRowColumn);
+            List<String> rowStoreColumns;
+            try {
+                rowStoreColumns = 
PropertyAnalyzer.analyzeRowStoreColumns(properties,
+                            
baseSchema.stream().map(Column::getName).collect(Collectors.toList()), true);
+                if (rowStoreColumns != null && rowStoreColumns.isEmpty()) {
+                    rowStoreColumns = null;
+                }
+                olapTable.setRowStoreColumns(rowStoreColumns);

Review Comment:
   Do you need to set `olapTable.setStoreRowColumn(true)` here?



##########
be/src/olap/base_tablet.h:
##########
@@ -26,6 +26,7 @@
 #include "olap/rowset/segment_v2/segment.h"
 #include "olap/tablet_fwd.h"
 #include "olap/tablet_meta.h"
+#include "olap/tablet_schema.h"

Review Comment:
   useless include



##########
fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java:
##########
@@ -172,7 +172,8 @@ public TAlterTabletReqV2 toThrift() {
             if (value == null) {
                 List<TColumn> columns = new ArrayList<TColumn>();
                 for (Column column : baseSchemaColumns) {
-                    columns.add(column.toThrift());
+                    TColumn tColumn = column.toThrift();

Review Comment:
   What's the difference? I think it's legacy code after rollback.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -1223,6 +1223,18 @@ public void setBloomFilterInfo(Set<String> bfColumns, 
double bfFpp) {
         this.bfFpp = bfFpp;
     }
 
+    public List<Integer> getRowStoreColumnsUniqueIds(List<String> 
rsColumnNames) {

Review Comment:
   rsColumnNames -> rowStoreColumns



##########
fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java:
##########
@@ -768,7 +773,43 @@ public static Boolean 
analyzeEnableDuplicateWithoutKeysByDefault(Map<String, Str
                 + " must be `true` or `false`");
     }
 
-    public static Boolean analyzeStoreRowColumn(Map<String, String> 
properties) throws AnalysisException {
+    public static List<String> analyzeRowStoreColumns(Map<String, String> 
properties,
+            List<String> columns,
+             boolean stripProperty) throws AnalysisException {
+        List<String> rowStoreColumns = Lists.newArrayList();
+        String value = properties.get(PROPERTIES_ROW_STORE_COLUMNS);
+        // set empty row store columns by default
+        if (null == value) {
+            return null;
+        }
+        if (stripProperty) {
+            properties.remove(PROPERTIES_ROW_STORE_COLUMNS);
+        }
+        String[] rsColumnArr = value.split(COMMA_SEPARATOR);
+        rowStoreColumns.addAll(Arrays.asList(rsColumnArr));
+        if (rowStoreColumns.isEmpty()) {
+            throw new AnalysisException(PROPERTIES_ROW_STORE_COLUMNS + " must 
not be empty");
+        }
+        // check columns in column def
+        List<String> invalidColumns = rowStoreColumns.stream()
+                .filter(expectedColName -> columns.stream().noneMatch(
+                        column -> column.equalsIgnoreCase(expectedColName)))
+                .collect(Collectors.toList());
+        // if (invalidColumns.size() == 1 && 
invalidColumns.get(0).equalsIgnoreCase("__all__")) {

Review Comment:
   delete useless code



##########
gensrc/proto/olap_file.proto:
##########
@@ -380,6 +380,8 @@ message TabletSchemaPB {
     optional bool skip_write_index_on_load = 23 [default=false];
     repeated int32 cluster_key_idxes = 24;
     optional InvertedIndexStorageFormatPB inverted_index_storage_format = 25 
[default=V1];
+    // column unique ids for row store columns
+    repeated int32 row_store_column_cids = 26;

Review Comment:
   suggestion name: row_store_column_ids or row_store_cids



##########
fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java:
##########
@@ -768,7 +773,43 @@ public static Boolean 
analyzeEnableDuplicateWithoutKeysByDefault(Map<String, Str
                 + " must be `true` or `false`");
     }
 
-    public static Boolean analyzeStoreRowColumn(Map<String, String> 
properties) throws AnalysisException {
+    public static List<String> analyzeRowStoreColumns(Map<String, String> 
properties,
+            List<String> columns,
+             boolean stripProperty) throws AnalysisException {

Review Comment:
   stripProperty -> removeProperty



##########
gensrc/thrift/PaloInternalService.thrift:
##########
@@ -301,6 +301,8 @@ struct TQueryOptions {
   112: optional i32 max_column_reader_num = 0
 
   113: optional bool enable_local_merge_sort = false;
+
+  114: optional bool enable_short_circuit_query_access_column_store = false;

Review Comment:
   What's the disvantage if set it default true?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java:
##########
@@ -340,7 +340,7 @@ private void getColumns(Plan plan) {
         if (properties != null) {
             try {
                 boolean storeRowColumn =
-                        
PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties));
+                        
PropertyAnalyzer.analyzeStoreRowColumn(Maps.newHashMap(properties), true);

Review Comment:
   Why new map?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java:
##########
@@ -238,13 +245,18 @@ public boolean enableSingleReplicaCompaction() {
     public TableProperty buildStoreRowColumn() {
         storeRowColumn = Boolean.parseBoolean(
                 
properties.getOrDefault(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN, "false"));
-        // Remove deprecated prefix and try again
-        String deprecatedPrefix = "deprecated_";
-        if (!storeRowColumn && 
PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN.startsWith(deprecatedPrefix)) {
-            storeRowColumn = Boolean.parseBoolean(
-                properties.getOrDefault(
-                    
PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN.substring(deprecatedPrefix.length()),
 "false"));
+        return this;
+    }
+
+    public TableProperty buildRowStoreColumns() {

Review Comment:
   can you call PropertyAnalyzer.analyzeRowStoreColumns() to reuse code.



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java:
##########
@@ -446,7 +446,7 @@ public void analyze(Analyzer analyzer) throws UserException 
{
             }
         }
         // add a hidden column as row store
-        if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new 
HashMap<>(properties))) {
+        if (properties != null && PropertyAnalyzer.analyzeStoreRowColumn(new 
HashMap<>(properties), true)) {

Review Comment:
   Why create a new Map?



##########
fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java:
##########
@@ -60,16 +60,16 @@ public class AlterReplicaTask extends AgentTask {
     private long expiration;
 
     private String vaultId;
-
     /**
      * AlterReplicaTask constructor.
      *
      */
+
     public AlterReplicaTask(long backendId, long dbId, long tableId, long 
partitionId, long rollupIndexId,
             long baseIndexId, long rollupTabletId, long baseTabletId, long 
newReplicaId, int newSchemaHash,
             int baseSchemaHash, long version, long jobId, AlterJobV2.JobType 
jobType, Map<String, Expr> defineExprs,
             DescriptorTable descTable, List<Column> baseSchemaColumns, 
Map<Object, Object> objectPool,
-            Expr whereClause, long expiration, String vaultId) {
+            Expr whereClause, long expiration, String vaultIds) {

Review Comment:
   typo vaultIds?



##########
fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java:
##########
@@ -1920,6 +1946,20 @@ public int getAsInt() {
                         }
                         enableLightSchemaChange(db, olapTable);
                         return;
+                    } else if 
(properties.containsKey(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN)
+                                || 
properties.containsKey(PropertyAnalyzer.PROPERTIES_ROW_STORE_COLUMNS)) {
+                        String value = 
properties.get(PropertyAnalyzer.PROPERTIES_STORE_ROW_COLUMN);
+                        if (value != null && value.equalsIgnoreCase("false")) {
+                            throw new DdlException("Can not alter 
store_row_column from true to false currently");

Review Comment:
   User may want to drop row store since it's space consuming.



##########
be/src/olap/tablet_schema.h:
##########
@@ -474,6 +478,8 @@ class TabletSchema {
     void update_tablet_columns(const TabletSchema& tablet_schema,
                                const std::vector<TColumn>& t_columns);
 
+    const std::vector<int32_t>& row_columns_cids() const { return 
_rowstore_column_cids; }

Review Comment:
   suggest consistent name: row_store_column_ids()



##########
be/src/service/point_query_executor.cpp:
##########
@@ -335,23 +406,63 @@ Status PointQueryExecutor::_lookup_row_data() {
                     _reusable->get_data_type_serdes(),
                     _row_read_ctxs[i]._cached_row_data.data().data,
                     _row_read_ctxs[i]._cached_row_data.data().size, 
_reusable->get_col_uid_to_idx(),
-                    *_result_block, _reusable->get_col_default_values());
+                    *_result_block, _reusable->get_col_default_values(),
+                    _reusable->include_col_uids());
             continue;
         }
         if (!_row_read_ctxs[i]._row_location.has_value()) {
             continue;
         }
         std::string value;
-        RETURN_IF_ERROR(_tablet->lookup_row_data(
-                _row_read_ctxs[i]._primary_key, 
_row_read_ctxs[i]._row_location.value(),
-                *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(),
-                _profile_metrics.read_stats, value,
-                !config::disable_storage_row_cache /*whether write row 
cache*/));
-        // serilize value to block, currently only jsonb row formt
-        vectorized::JsonbSerializeUtil::jsonb_to_block(
-                _reusable->get_data_type_serdes(), value.data(), value.size(),
-                _reusable->get_col_uid_to_idx(), *_result_block,
-                _reusable->get_col_default_values());
+        // fill block by row store
+        if (_reusable->rs_column_uid() != -1) {
+            bool use_row_cache = !config::disable_storage_row_cache &&

Review Comment:
   If row cache can not be used for partial row store, how much performance is 
reduced?



##########
be/src/service/point_query_executor.cpp:
##########
@@ -335,23 +406,63 @@ Status PointQueryExecutor::_lookup_row_data() {
                     _reusable->get_data_type_serdes(),
                     _row_read_ctxs[i]._cached_row_data.data().data,
                     _row_read_ctxs[i]._cached_row_data.data().size, 
_reusable->get_col_uid_to_idx(),
-                    *_result_block, _reusable->get_col_default_values());
+                    *_result_block, _reusable->get_col_default_values(),
+                    _reusable->include_col_uids());
             continue;
         }
         if (!_row_read_ctxs[i]._row_location.has_value()) {
             continue;
         }
         std::string value;
-        RETURN_IF_ERROR(_tablet->lookup_row_data(
-                _row_read_ctxs[i]._primary_key, 
_row_read_ctxs[i]._row_location.value(),
-                *(_row_read_ctxs[i]._rowset_ptr), _reusable->tuple_desc(),
-                _profile_metrics.read_stats, value,
-                !config::disable_storage_row_cache /*whether write row 
cache*/));
-        // serilize value to block, currently only jsonb row formt
-        vectorized::JsonbSerializeUtil::jsonb_to_block(
-                _reusable->get_data_type_serdes(), value.data(), value.size(),
-                _reusable->get_col_uid_to_idx(), *_result_block,
-                _reusable->get_col_default_values());
+        // fill block by row store
+        if (_reusable->rs_column_uid() != -1) {
+            bool use_row_cache = !config::disable_storage_row_cache &&

Review Comment:
   What's the problem if row cache is used for partial row store? I think it's 
still OK.



##########
be/src/olap/tablet_schema.h:
##########
@@ -515,6 +521,10 @@ class TabletSchema {
     bool _store_row_column = false;
     bool _skip_write_index_on_load = false;
     InvertedIndexStorageFormatPB _inverted_index_storage_format = 
InvertedIndexStorageFormatPB::V1;
+
+    // Contains column ids of which columns should be encoded into row store.
+    // ATTN: For compability reason empty cids means all columns of tablet 
schema are encoded to row column
+    std::vector<int32_t> _rowstore_column_cids;

Review Comment:
   suggest consistent name: _row_store_column_ids



##########
be/src/olap/tablet_schema.cpp:
##########
@@ -1034,7 +1037,7 @@ void TabletSchema::build_current_tablet_schema(int64_t 
index_id, int32_t version
     _is_in_memory = ori_tablet_schema.is_in_memory();
     _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
     _enable_single_replica_compaction = 
ori_tablet_schema.enable_single_replica_compaction();
-    _store_row_column = ori_tablet_schema.store_row_column();
+    _store_row_column = ori_tablet_schema._store_row_column;

Review Comment:
   Where is columns ids copied?



##########
be/src/olap/tablet_schema.h:
##########
@@ -342,8 +344,10 @@ class TabletSchema {
         _enable_single_replica_compaction = enable_single_replica_compaction;
     }
     bool enable_single_replica_compaction() const { return 
_enable_single_replica_compaction; }
-    void set_store_row_column(bool store_row_column) { _store_row_column = 
store_row_column; }
-    bool store_row_column() const { return _store_row_column; }
+    // indicate if full row store column(all the columns encodes as row) exists
+    bool has_full_row_store_column() const {

Review Comment:
   suggest more clear name: has_row_store_for_all_columns()



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to