morrySnow commented on code in PR #32792:
URL: https://github.com/apache/doris/pull/32792#discussion_r1539060195
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java:
##########
@@ -100,52 +102,42 @@ public static TableStatsMeta read(DataInput dataInput)
throws IOException {
String json = Text.readString(dataInput);
TableStatsMeta tableStats = GsonUtils.GSON.fromJson(json,
TableStatsMeta.class);
// Might be null counterintuitively, for compatible
- if (tableStats.colNameToColStatsMeta == null) {
- tableStats.colNameToColStatsMeta = new ConcurrentHashMap<>();
+ if (tableStats.colToColStatsMeta == null) {
+ tableStats.colToColStatsMeta = new ConcurrentHashMap<>();
+ }
+ if (tableStats.deprecatedColNameToColStatsMeta != null) {
+ tableStats.convertDeprecatedColStatsToNewVersion();
}
return tableStats;
}
- public long findColumnLastUpdateTime(String colName) {
- ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(colName);
+ public long findColumnLastUpdateTime(String colName, String indexId) {
+ ColStatsMeta colStatsMeta = colToColStatsMeta.get(Pair.of(colName,
indexId));
if (colStatsMeta == null) {
return 0;
}
return colStatsMeta.updatedTime;
}
- public ColStatsMeta findColumnStatsMeta(String colName) {
- return colNameToColStatsMeta.get(colName);
- }
-
- public void removeColumn(String colName) {
- colNameToColStatsMeta.remove(colName);
+ public ColStatsMeta findColumnStatsMeta(String colName, String indexName) {
Review Comment:
indexName, colName
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java:
##########
@@ -1088,25 +1044,16 @@ public void registerSysJob(AnalysisInfo jobInfo,
Map<Long, BaseAnalysisTask> tas
analysisJobIdToTaskMap.put(jobInfo.jobId, taskInfos);
}
- // Remove col stats status from TableStats if failed load some col stats
after analyze corresponding column so that
- // we could make sure it would be analyzed again soon if user or system
submit job for that column again.
- public void removeColStatsStatus(long tblId, String colName) {
- TableStatsMeta tableStats = findTableStatsStatus(tblId);
- if (tableStats != null) {
- tableStats.removeColumn(colName);
- }
- }
-
public void removeTableStats(long tableId) {
idToTblStats.remove(tableId);
}
- public ColStatsMeta findColStatsMeta(long tblId, String colName) {
+ public ColStatsMeta findColStatsMeta(long tblId, String colName, String
indexName) {
Review Comment:
change order of args to `tblid, indexName, colName` is better.
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java:
##########
@@ -64,7 +62,11 @@ public class TableStatsMeta implements Writable {
public long updatedTime;
@SerializedName("colNameToColStatsMeta")
- private ConcurrentMap<String, ColStatsMeta> colNameToColStatsMeta = new
ConcurrentHashMap<>();
+ private ConcurrentMap<String, ColStatsMeta>
deprecatedColNameToColStatsMeta = new ConcurrentHashMap<>();
+
+ @SerializedName("colToColStatsMeta")
+ // <ColumnName, IndexId> -> ColStatsMeta
+ private ConcurrentMap<Pair<String, String>, ColStatsMeta>
colToColStatsMeta = new ConcurrentHashMap<>();
Review Comment:
indexId, columnName is better. why not use index -> columns -> colStats?
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java:
##########
@@ -75,7 +75,7 @@ public AnalysisInfoBuilder(AnalysisInfo info) {
catalogId = info.catalogId;
dbId = info.dbId;
tblId = info.tblId;
- colToPartitions = info.colToPartitions;
+ jobColumnList = info.jobColumnList;
Review Comment:
all list use plural as nam pattern. so jobColumnList -> jobColumns
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java:
##########
@@ -661,4 +657,9 @@ public List<Long> getChunkSizes() {
public long fetchRowCount() {
return 0;
}
+
+ @Override
+ public List<Pair<String, String>> getColumnIndexPairList(Set<String>
columns) {
+ return null;
Review Comment:
return null is very danger. Api such as containsAll could not handle null.
return empty list or remove the default impl?
##########
fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java:
##########
@@ -159,21 +151,27 @@ public void update(AnalysisInfo analyzedJob, TableIf
tableIf) {
if (tableIf instanceof OlapTable) {
rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
}
- if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet()
- .containsAll(tableIf.getBaseSchema().stream()
- .filter(c ->
!StatisticsUtil.isUnsupportedType(c.getType()))
-
.map(Column::getName).collect(Collectors.toSet()))) {
+ if (analyzedJob.emptyJob) {
+ return;
+ }
+ if (analyzedJob.jobColumnList.containsAll(
+ tableIf.getColumnIndexPairList(
+
tableIf.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet()))))
{
updatedRows.set(0);
newPartitionLoaded.set(false);
}
if (tableIf instanceof OlapTable) {
PartitionInfo partitionInfo = ((OlapTable)
tableIf).getPartitionInfo();
- if (partitionInfo != null &&
analyzedJob.colToPartitions.keySet()
-
.containsAll(partitionInfo.getPartitionColumns().stream()
-
.map(Column::getName).collect(Collectors.toSet()))) {
+ if (partitionInfo != null && analyzedJob.jobColumnList
+
.containsAll(tableIf.getColumnIndexPairList(partitionInfo.getPartitionColumns().stream()
+
.map(Column::getName).collect(Collectors.toSet())))) {
newPartitionLoaded.set(false);
}
}
}
}
+
+ public void convertDeprecatedColStatsToNewVersion() {
+ deprecatedColNameToColStatsMeta = null;
+ }
Review Comment:
not impl?
##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -1296,34 +1296,20 @@ public boolean needReAnalyzeTable(TableStatsMeta
tblStats) {
}
@Override
- public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
- TableStatsMeta tableStats =
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId());
- Set<String> allPartitions =
getPartitionNames().stream().map(this::getPartition)
-
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
- if (tableStats == null) {
- Map<String, Set<String>> ret = Maps.newHashMap();
- for (Column col : getSchemaAllIndexes(false)) {
- if (StatisticsUtil.isUnsupportedType(col.getType())) {
+ public List<Pair<String, String>> getColumnIndexPairList(Set<String>
columns) {
Review Comment:
```suggestion
public List<Pair<String, String>> getColumnIndexPairs(Set<String>
columns) {
```
##########
fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java:
##########
@@ -144,97 +139,32 @@ public List<Column> getSchemaAllIndexes(boolean full) {
StatisticsAutoCollector saa = new StatisticsAutoCollector();
List<AnalysisInfo> analysisInfoList = saa.constructAnalysisInfo(new
Database(1, "anydb"));
Assertions.assertEquals(1, analysisInfoList.size());
- Assertions.assertEquals("c1",
analysisInfoList.get(0).colName.split(",")[0]);
+ Assertions.assertNull(analysisInfoList.get(0).colName);
}
@Test
- public void testGetReAnalyzeRequiredPart0() {
+ public void testSkipWideTable() {
TableIf tableIf = new OlapTable();
new MockUp<OlapTable>() {
- @Mock
- protected Map<String, Set<String>> findReAnalyzeNeededPartitions()
{
- Set<String> partitionNames = new HashSet<>();
- partitionNames.add("p1");
- partitionNames.add("p2");
- Map<String, Set<String>> map = new HashMap<>();
- map.put("col1", partitionNames);
- return map;
- }
-
- @Mock
- public long getRowCount() {
- return 100;
- }
-
@Mock
public List<Column> getBaseSchema() {
return Lists.newArrayList(new Column("col1", Type.INT), new
Column("col2", Type.INT));
}
- };
-
- new MockUp<StatisticsUtil>() {
- @Mock
- public TableIf findTable(long catalogName, long dbName, long
tblName) {
- return tableIf;
- }
- };
- AnalysisInfo analysisInfo = new
AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL)
- .setColToPartitions(new HashMap<>()).setAnalysisType(
-
AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build();
- new MockUp<AnalysisManager>() {
-
- int count = 0;
-
- TableStatsMeta[] tableStatsArr =
- new TableStatsMeta[] {new TableStatsMeta(0, analysisInfo,
tableIf),
- new TableStatsMeta(0, analysisInfo, tableIf),
null};
-
- {
- tableStatsArr[0].updatedRows.addAndGet(100);
- tableStatsArr[1].updatedRows.addAndGet(0);
- }
-
- @Mock
- public TableStatsMeta findTableStatsStatus(long tblId) {
- return tableStatsArr[count++];
- }
- };
-
- new MockUp<StatisticsAutoCollector>() {
- @Mock
- public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo,
TableIf table,
- Set<String> needRunPartitions) {
- return new AnalysisInfoBuilder().build();
- }
- };
- StatisticsAutoCollector statisticsAutoCollector = new
StatisticsAutoCollector();
- AnalysisInfo analysisInfo2 = new AnalysisInfoBuilder()
- .setCatalogId(0)
- .setDBId(0)
- .setTblId(0).build();
-
Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
- // uncomment it when updatedRows gets ready
- //
Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
-
Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
- }
-
- @Test
- public void testSkipWideTable() {
-
- TableIf tableIf = new OlapTable();
- new MockUp<OlapTable>() {
@Mock
- public List<Column> getBaseSchema() {
- return Lists.newArrayList(new Column("col1", Type.INT), new
Column("col2", Type.INT));
+ public List<Pair<String, String>>
getColumnIndexPairList(Set<String> columns) {
Review Comment:
```suggestion
public List<Pair<String, String>>
getColumnIndexPairs(Set<String> columns) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]