morrySnow commented on code in PR #32792:
URL: https://github.com/apache/doris/pull/32792#discussion_r1539060195


##########
fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java:
##########
@@ -100,52 +102,42 @@ public static TableStatsMeta read(DataInput dataInput) 
throws IOException {
         String json = Text.readString(dataInput);
         TableStatsMeta tableStats = GsonUtils.GSON.fromJson(json, 
TableStatsMeta.class);
         // Might be null counterintuitively, for compatible
-        if (tableStats.colNameToColStatsMeta == null) {
-            tableStats.colNameToColStatsMeta = new ConcurrentHashMap<>();
+        if (tableStats.colToColStatsMeta == null) {
+            tableStats.colToColStatsMeta = new ConcurrentHashMap<>();
+        }
+        if (tableStats.deprecatedColNameToColStatsMeta != null) {
+            tableStats.convertDeprecatedColStatsToNewVersion();
         }
         return tableStats;
     }
 
-    public long findColumnLastUpdateTime(String colName) {
-        ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(colName);
+    public long findColumnLastUpdateTime(String colName, String indexId) {
+        ColStatsMeta colStatsMeta = colToColStatsMeta.get(Pair.of(colName, 
indexId));
         if (colStatsMeta == null) {
             return 0;
         }
         return colStatsMeta.updatedTime;
     }
 
-    public ColStatsMeta findColumnStatsMeta(String colName) {
-        return colNameToColStatsMeta.get(colName);
-    }
-
-    public void removeColumn(String colName) {
-        colNameToColStatsMeta.remove(colName);
+    public ColStatsMeta findColumnStatsMeta(String colName, String indexName) {

Review Comment:
   indexName, colName



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java:
##########
@@ -1088,25 +1044,16 @@ public void registerSysJob(AnalysisInfo jobInfo, 
Map<Long, BaseAnalysisTask> tas
         analysisJobIdToTaskMap.put(jobInfo.jobId, taskInfos);
     }
 
-    // Remove col stats status from TableStats if failed load some col stats 
after analyze corresponding column so that
-    // we could make sure it would be analyzed again soon if user or system 
submit job for that column again.
-    public void removeColStatsStatus(long tblId, String colName) {
-        TableStatsMeta tableStats = findTableStatsStatus(tblId);
-        if (tableStats != null) {
-            tableStats.removeColumn(colName);
-        }
-    }
-
     public void removeTableStats(long tableId) {
         idToTblStats.remove(tableId);
     }
 
-    public ColStatsMeta findColStatsMeta(long tblId, String colName) {
+    public ColStatsMeta findColStatsMeta(long tblId, String colName, String 
indexName) {

Review Comment:
   change order of args to `tblid, indexName, colName` is better.



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java:
##########
@@ -64,7 +62,11 @@ public class TableStatsMeta implements Writable {
     public long updatedTime;
 
     @SerializedName("colNameToColStatsMeta")
-    private ConcurrentMap<String, ColStatsMeta> colNameToColStatsMeta = new 
ConcurrentHashMap<>();
+    private ConcurrentMap<String, ColStatsMeta> 
deprecatedColNameToColStatsMeta = new ConcurrentHashMap<>();
+
+    @SerializedName("colToColStatsMeta")
+    // <ColumnName, IndexId> -> ColStatsMeta
+    private ConcurrentMap<Pair<String, String>, ColStatsMeta> 
colToColStatsMeta = new ConcurrentHashMap<>();

Review Comment:
   indexId, columnName is better. why not use index -> columns -> colStats? 



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java:
##########
@@ -75,7 +75,7 @@ public AnalysisInfoBuilder(AnalysisInfo info) {
         catalogId = info.catalogId;
         dbId = info.dbId;
         tblId = info.tblId;
-        colToPartitions = info.colToPartitions;
+        jobColumnList = info.jobColumnList;

Review Comment:
   all list use plural as nam pattern. so jobColumnList -> jobColumns



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java:
##########
@@ -661,4 +657,9 @@ public List<Long> getChunkSizes() {
     public long fetchRowCount() {
         return 0;
     }
+
+    @Override
+    public List<Pair<String, String>> getColumnIndexPairList(Set<String> 
columns) {
+        return null;

Review Comment:
   return null is very danger. Api such as containsAll could not handle null. 
return empty list or remove the default impl?



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java:
##########
@@ -159,21 +151,27 @@ public void update(AnalysisInfo analyzedJob, TableIf 
tableIf) {
             if (tableIf instanceof OlapTable) {
                 rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
             }
-            if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet()
-                    .containsAll(tableIf.getBaseSchema().stream()
-                            .filter(c -> 
!StatisticsUtil.isUnsupportedType(c.getType()))
-                            
.map(Column::getName).collect(Collectors.toSet()))) {
+            if (analyzedJob.emptyJob) {
+                return;
+            }
+            if (analyzedJob.jobColumnList.containsAll(
+                    tableIf.getColumnIndexPairList(
+                    
tableIf.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet()))))
 {
                 updatedRows.set(0);
                 newPartitionLoaded.set(false);
             }
             if (tableIf instanceof OlapTable) {
                 PartitionInfo partitionInfo = ((OlapTable) 
tableIf).getPartitionInfo();
-                if (partitionInfo != null && 
analyzedJob.colToPartitions.keySet()
-                        
.containsAll(partitionInfo.getPartitionColumns().stream()
-                            
.map(Column::getName).collect(Collectors.toSet()))) {
+                if (partitionInfo != null && analyzedJob.jobColumnList
+                        
.containsAll(tableIf.getColumnIndexPairList(partitionInfo.getPartitionColumns().stream()
+                            
.map(Column::getName).collect(Collectors.toSet())))) {
                     newPartitionLoaded.set(false);
                 }
             }
         }
     }
+
+    public void convertDeprecatedColStatsToNewVersion() {
+        deprecatedColNameToColStatsMeta = null;
+    }

Review Comment:
   not impl?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -1296,34 +1296,20 @@ public boolean needReAnalyzeTable(TableStatsMeta 
tblStats) {
     }
 
     @Override
-    public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
-        TableStatsMeta tableStats = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId());
-        Set<String> allPartitions = 
getPartitionNames().stream().map(this::getPartition)
-                
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
-        if (tableStats == null) {
-            Map<String, Set<String>> ret = Maps.newHashMap();
-            for (Column col : getSchemaAllIndexes(false)) {
-                if (StatisticsUtil.isUnsupportedType(col.getType())) {
+    public List<Pair<String, String>> getColumnIndexPairList(Set<String> 
columns) {

Review Comment:
   ```suggestion
       public List<Pair<String, String>> getColumnIndexPairs(Set<String> 
columns) {
   ```



##########
fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java:
##########
@@ -144,97 +139,32 @@ public List<Column> getSchemaAllIndexes(boolean full) {
         StatisticsAutoCollector saa = new StatisticsAutoCollector();
         List<AnalysisInfo> analysisInfoList = saa.constructAnalysisInfo(new 
Database(1, "anydb"));
         Assertions.assertEquals(1, analysisInfoList.size());
-        Assertions.assertEquals("c1", 
analysisInfoList.get(0).colName.split(",")[0]);
+        Assertions.assertNull(analysisInfoList.get(0).colName);
     }
 
     @Test
-    public void testGetReAnalyzeRequiredPart0() {
+    public void testSkipWideTable() {
 
         TableIf tableIf = new OlapTable();
 
         new MockUp<OlapTable>() {
-            @Mock
-            protected Map<String, Set<String>> findReAnalyzeNeededPartitions() 
{
-                Set<String> partitionNames = new HashSet<>();
-                partitionNames.add("p1");
-                partitionNames.add("p2");
-                Map<String, Set<String>> map = new HashMap<>();
-                map.put("col1", partitionNames);
-                return map;
-            }
-
-            @Mock
-            public long getRowCount() {
-                return 100;
-            }
-
             @Mock
             public List<Column> getBaseSchema() {
                 return Lists.newArrayList(new Column("col1", Type.INT), new 
Column("col2", Type.INT));
             }
-        };
-
-        new MockUp<StatisticsUtil>() {
-            @Mock
-            public TableIf findTable(long catalogName, long dbName, long 
tblName) {
-                return tableIf;
-            }
-        };
-        AnalysisInfo analysisInfo = new 
AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL)
-                .setColToPartitions(new HashMap<>()).setAnalysisType(
-                
AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build();
-        new MockUp<AnalysisManager>() {
-
-            int count = 0;
-
-            TableStatsMeta[] tableStatsArr =
-                    new TableStatsMeta[] {new TableStatsMeta(0, analysisInfo, 
tableIf),
-                            new TableStatsMeta(0, analysisInfo, tableIf), 
null};
-
-            {
-                tableStatsArr[0].updatedRows.addAndGet(100);
-                tableStatsArr[1].updatedRows.addAndGet(0);
-            }
-
-            @Mock
-            public TableStatsMeta findTableStatsStatus(long tblId) {
-                return tableStatsArr[count++];
-            }
-        };
-
-        new MockUp<StatisticsAutoCollector>() {
-            @Mock
-            public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, 
TableIf table,
-                    Set<String> needRunPartitions) {
-                return new AnalysisInfoBuilder().build();
-            }
-        };
-        StatisticsAutoCollector statisticsAutoCollector = new 
StatisticsAutoCollector();
-        AnalysisInfo analysisInfo2 = new AnalysisInfoBuilder()
-                .setCatalogId(0)
-                .setDBId(0)
-                .setTblId(0).build();
-        
Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
-        // uncomment it when updatedRows gets ready
-        // 
Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
-        
Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
-    }
-
-    @Test
-    public void testSkipWideTable() {
-
-        TableIf tableIf = new OlapTable();
 
-        new MockUp<OlapTable>() {
             @Mock
-            public List<Column> getBaseSchema() {
-                return Lists.newArrayList(new Column("col1", Type.INT), new 
Column("col2", Type.INT));
+            public List<Pair<String, String>> 
getColumnIndexPairList(Set<String> columns) {

Review Comment:
   ```suggestion
               public List<Pair<String, String>> 
getColumnIndexPairs(Set<String> columns) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to