morrySnow commented on code in PR #32792: URL: https://github.com/apache/doris/pull/32792#discussion_r1539060195
########## fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java: ########## @@ -100,52 +102,42 @@ public static TableStatsMeta read(DataInput dataInput) throws IOException { String json = Text.readString(dataInput); TableStatsMeta tableStats = GsonUtils.GSON.fromJson(json, TableStatsMeta.class); // Might be null counterintuitively, for compatible - if (tableStats.colNameToColStatsMeta == null) { - tableStats.colNameToColStatsMeta = new ConcurrentHashMap<>(); + if (tableStats.colToColStatsMeta == null) { + tableStats.colToColStatsMeta = new ConcurrentHashMap<>(); + } + if (tableStats.deprecatedColNameToColStatsMeta != null) { + tableStats.convertDeprecatedColStatsToNewVersion(); } return tableStats; } - public long findColumnLastUpdateTime(String colName) { - ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(colName); + public long findColumnLastUpdateTime(String colName, String indexId) { + ColStatsMeta colStatsMeta = colToColStatsMeta.get(Pair.of(colName, indexId)); if (colStatsMeta == null) { return 0; } return colStatsMeta.updatedTime; } - public ColStatsMeta findColumnStatsMeta(String colName) { - return colNameToColStatsMeta.get(colName); - } - - public void removeColumn(String colName) { - colNameToColStatsMeta.remove(colName); + public ColStatsMeta findColumnStatsMeta(String colName, String indexName) { Review Comment: indexName, colName ########## fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java: ########## @@ -1088,25 +1044,16 @@ public void registerSysJob(AnalysisInfo jobInfo, Map<Long, BaseAnalysisTask> tas analysisJobIdToTaskMap.put(jobInfo.jobId, taskInfos); } - // Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that - // we could make sure it would be analyzed again soon if user or system submit job for that column again. - public void removeColStatsStatus(long tblId, String colName) { - TableStatsMeta tableStats = findTableStatsStatus(tblId); - if (tableStats != null) { - tableStats.removeColumn(colName); - } - } - public void removeTableStats(long tableId) { idToTblStats.remove(tableId); } - public ColStatsMeta findColStatsMeta(long tblId, String colName) { + public ColStatsMeta findColStatsMeta(long tblId, String colName, String indexName) { Review Comment: change order of args to `tblid, indexName, colName` is better. ########## fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java: ########## @@ -64,7 +62,11 @@ public class TableStatsMeta implements Writable { public long updatedTime; @SerializedName("colNameToColStatsMeta") - private ConcurrentMap<String, ColStatsMeta> colNameToColStatsMeta = new ConcurrentHashMap<>(); + private ConcurrentMap<String, ColStatsMeta> deprecatedColNameToColStatsMeta = new ConcurrentHashMap<>(); + + @SerializedName("colToColStatsMeta") + // <ColumnName, IndexId> -> ColStatsMeta + private ConcurrentMap<Pair<String, String>, ColStatsMeta> colToColStatsMeta = new ConcurrentHashMap<>(); Review Comment: indexId, columnName is better. why not use index -> columns -> colStats? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java: ########## @@ -75,7 +75,7 @@ public AnalysisInfoBuilder(AnalysisInfo info) { catalogId = info.catalogId; dbId = info.dbId; tblId = info.tblId; - colToPartitions = info.colToPartitions; + jobColumnList = info.jobColumnList; Review Comment: all list use plural as nam pattern. so jobColumnList -> jobColumns ########## fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java: ########## @@ -661,4 +657,9 @@ public List<Long> getChunkSizes() { public long fetchRowCount() { return 0; } + + @Override + public List<Pair<String, String>> getColumnIndexPairList(Set<String> columns) { + return null; Review Comment: return null is very danger. Api such as containsAll could not handle null. return empty list or remove the default impl? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java: ########## @@ -159,21 +151,27 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) { if (tableIf instanceof OlapTable) { rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount(); } - if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet() - .containsAll(tableIf.getBaseSchema().stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.toSet()))) { + if (analyzedJob.emptyJob) { + return; + } + if (analyzedJob.jobColumnList.containsAll( + tableIf.getColumnIndexPairList( + tableIf.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet())))) { updatedRows.set(0); newPartitionLoaded.set(false); } if (tableIf instanceof OlapTable) { PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo(); - if (partitionInfo != null && analyzedJob.colToPartitions.keySet() - .containsAll(partitionInfo.getPartitionColumns().stream() - .map(Column::getName).collect(Collectors.toSet()))) { + if (partitionInfo != null && analyzedJob.jobColumnList + .containsAll(tableIf.getColumnIndexPairList(partitionInfo.getPartitionColumns().stream() + .map(Column::getName).collect(Collectors.toSet())))) { newPartitionLoaded.set(false); } } } } + + public void convertDeprecatedColStatsToNewVersion() { + deprecatedColNameToColStatsMeta = null; + } Review Comment: not impl? ########## fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java: ########## @@ -1296,34 +1296,20 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) { } @Override - public Map<String, Set<String>> findReAnalyzeNeededPartitions() { - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId()); - Set<String> allPartitions = getPartitionNames().stream().map(this::getPartition) - .filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet()); - if (tableStats == null) { - Map<String, Set<String>> ret = Maps.newHashMap(); - for (Column col : getSchemaAllIndexes(false)) { - if (StatisticsUtil.isUnsupportedType(col.getType())) { + public List<Pair<String, String>> getColumnIndexPairList(Set<String> columns) { Review Comment: ```suggestion public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) { ``` ########## fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java: ########## @@ -144,97 +139,32 @@ public List<Column> getSchemaAllIndexes(boolean full) { StatisticsAutoCollector saa = new StatisticsAutoCollector(); List<AnalysisInfo> analysisInfoList = saa.constructAnalysisInfo(new Database(1, "anydb")); Assertions.assertEquals(1, analysisInfoList.size()); - Assertions.assertEquals("c1", analysisInfoList.get(0).colName.split(",")[0]); + Assertions.assertNull(analysisInfoList.get(0).colName); } @Test - public void testGetReAnalyzeRequiredPart0() { + public void testSkipWideTable() { TableIf tableIf = new OlapTable(); new MockUp<OlapTable>() { - @Mock - protected Map<String, Set<String>> findReAnalyzeNeededPartitions() { - Set<String> partitionNames = new HashSet<>(); - partitionNames.add("p1"); - partitionNames.add("p2"); - Map<String, Set<String>> map = new HashMap<>(); - map.put("col1", partitionNames); - return map; - } - - @Mock - public long getRowCount() { - return 100; - } - @Mock public List<Column> getBaseSchema() { return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); } - }; - - new MockUp<StatisticsUtil>() { - @Mock - public TableIf findTable(long catalogName, long dbName, long tblName) { - return tableIf; - } - }; - AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) - .setColToPartitions(new HashMap<>()).setAnalysisType( - AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build(); - new MockUp<AnalysisManager>() { - - int count = 0; - - TableStatsMeta[] tableStatsArr = - new TableStatsMeta[] {new TableStatsMeta(0, analysisInfo, tableIf), - new TableStatsMeta(0, analysisInfo, tableIf), null}; - - { - tableStatsArr[0].updatedRows.addAndGet(100); - tableStatsArr[1].updatedRows.addAndGet(0); - } - - @Mock - public TableStatsMeta findTableStatsStatus(long tblId) { - return tableStatsArr[count++]; - } - }; - - new MockUp<StatisticsAutoCollector>() { - @Mock - public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, - Set<String> needRunPartitions) { - return new AnalysisInfoBuilder().build(); - } - }; - StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); - AnalysisInfo analysisInfo2 = new AnalysisInfoBuilder() - .setCatalogId(0) - .setDBId(0) - .setTblId(0).build(); - Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); - // uncomment it when updatedRows gets ready - // Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); - Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); - } - - @Test - public void testSkipWideTable() { - - TableIf tableIf = new OlapTable(); - new MockUp<OlapTable>() { @Mock - public List<Column> getBaseSchema() { - return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); + public List<Pair<String, String>> getColumnIndexPairList(Set<String> columns) { Review Comment: ```suggestion public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org