This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d278f400d4 [enhancement](show data skew) Support show avg_row_count 
for data skew of one table (#10790)
d278f400d4 is described below

commit d278f400d459caff75936588c82b07d52586d24f
Author: caiconghui <55968745+caicong...@users.noreply.github.com>
AuthorDate: Wed Jul 13 08:27:20 2022 +0800

    [enhancement](show data skew) Support show avg_row_count for data skew of 
one table (#10790)
---
 docs/.vuepress/sidebar/en/docs.js                  |  1 +
 docs/.vuepress/sidebar/zh-CN/docs.js               |  1 +
 .../Show-Statements/SHOW-DATA-SKEW.md              | 56 ++++++++++++++++++++++
 .../Show-Statements/SHOW-DATA-SKEW.md              | 56 ++++++++++++++++++++++
 .../apache/doris/analysis/ShowDataSkewStmt.java    |  2 +-
 .../org/apache/doris/catalog/MetadataViewer.java   | 21 ++++----
 .../main/java/org/apache/doris/catalog/Tablet.java |  6 +++
 .../doris/common/proc/TabletHealthProcDir.java     | 10 ++--
 .../doris/analysis/AdminShowReplicaTest.java       |  4 +-
 9 files changed, 141 insertions(+), 16 deletions(-)

diff --git a/docs/.vuepress/sidebar/en/docs.js 
b/docs/.vuepress/sidebar/en/docs.js
index 8901be9be2..111470a313 100644
--- a/docs/.vuepress/sidebar/en/docs.js
+++ b/docs/.vuepress/sidebar/en/docs.js
@@ -750,6 +750,7 @@ module.exports = [
               "SHOW-CREATE-TABLE",
               "SHOW-CREATE-MATERIALIZED-VIEW",
               "SHOW-DATA",
+              "SHOW-DATA-SKEW",
               "SHOW-DATABASE-ID",
               "SHOW-DATABASES",
               "SHOW-DELETE",
diff --git a/docs/.vuepress/sidebar/zh-CN/docs.js 
b/docs/.vuepress/sidebar/zh-CN/docs.js
index 428e9e65bd..a4ef26fc7d 100644
--- a/docs/.vuepress/sidebar/zh-CN/docs.js
+++ b/docs/.vuepress/sidebar/zh-CN/docs.js
@@ -750,6 +750,7 @@ module.exports = [
               "SHOW-CREATE-TABLE",
               "SHOW-CREATE-MATERIALIZED-VIEW",
               "SHOW-DATA",
+              "SHOW-DATA-SKEW",
               "SHOW-DATABASE-ID",
               "SHOW-DATABASES",
               "SHOW-DELETE",
diff --git 
a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md 
b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md
new file mode 100644
index 0000000000..c54587eb72
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md
@@ -0,0 +1,56 @@
+---
+{
+"title": "SHOW DATA SKEW",
+"language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## SHOW-DATA-SKEW
+
+### Name
+
+SHOW DATA SKEW
+
+### Description
+
+    This statement is used to view the data skew of a table or a partition.
+
+    grammar:
+
+        SHOW DATA SKEW FROM [db_name.]tbl_name [PARTITION (p1)];
+
+       Description:
+
+               1. Only one partition must be specified. For non-partitioned 
tables, the partition name is the same as the table name.
+               2. The result will show row count and data volume of each 
bucket under the specified partition, and the proportion of the data volume of 
each bucket in the total data volume.
+
+### Example
+
+    1. View the data skew of the table
+
+        SHOW DATA SKEW FROM db1.test PARTITION(p1);
+
+### Keywords
+
+    SHOW, DATA, SKEW
+
+### Best Practice
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md 
b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md
new file mode 100644
index 0000000000..fb1de1c6be
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW.md
@@ -0,0 +1,56 @@
+---
+{
+"title": "SHOW-DATA-SKEW",
+"language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## SHOW-DATA-SKEW
+
+### Name
+
+SHOW DATA SKEW
+
+### Description
+
+    该语句用于查看表或某个分区的数据倾斜情况。
+
+    语法:
+
+        SHOW DATA SKEW FROM [db_name.]tbl_name PARTITION (partition_name);
+
+    说明:
+
+        1. 必须指定且仅指定一个分区。对于非分区表,分区名称同表名。
+        2. 结果将展示指定分区下,各个分桶的数据行数,数据量,以及每个分桶数据量在总数据量中的占比。
+
+### Example
+
+    1. 查看表的数据倾斜情况
+
+        SHOW DATA SKEW FROM db1.test PARTITION(p1);
+
+### Keywords
+
+    SHOW,DATA,SKEW
+
+### Best Practice
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java
index 21d7a5c2a3..49cb093f05 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataSkewStmt.java
@@ -34,7 +34,7 @@ import com.google.common.collect.ImmutableList;
 // show data skew from tbl [partition(p1, p2, ...)]
 public class ShowDataSkewStmt extends ShowStmt {
     public static final ImmutableList<String> TITLE_NAMES = new 
ImmutableList.Builder<String>()
-            .add("BucketIdx").add("AvgDataSize")
+            .add("BucketIdx").add("AvgRowCount").add("AvgDataSize")
             .add("Graph").add("Percent")
             .build();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java
index 56d6411f4a..c1aa7000c7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java
@@ -282,9 +282,11 @@ public class MetadataViewer {
                 break;
             }
             DistributionInfo distributionInfo = 
partition.getDistributionInfo();
-            List<Long> tabletInfos = 
Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
+            List<Long> rowCountTabletInfos = 
Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
+            List<Long> dataSizeTabletInfos = 
Lists.newArrayListWithCapacity(distributionInfo.getBucketNum());
             for (long i = 0; i < distributionInfo.getBucketNum(); i++) {
-                tabletInfos.add(0L);
+                rowCountTabletInfos.add(0L);
+                dataSizeTabletInfos.add(0L);
             }
 
             long totalSize = 0;
@@ -292,20 +294,23 @@ public class MetadataViewer {
                 List<Long> tabletIds = mIndex.getTabletIdsInOrder();
                 for (int i = 0; i < tabletIds.size(); i++) {
                     Tablet tablet = mIndex.getTablet(tabletIds.get(i));
+                    long rowCount = tablet.getRowCount(true);
                     long dataSize = tablet.getDataSize(true);
-                    tabletInfos.set(i, tabletInfos.get(i) + dataSize);
+                    rowCountTabletInfos.set(i, rowCountTabletInfos.get(i) + 
rowCount);
+                    dataSizeTabletInfos.set(i, dataSizeTabletInfos.get(i) + 
dataSize);
                     totalSize += dataSize;
                 }
             }
 
             // graph
-            for (int i = 0; i < tabletInfos.size(); i++) {
+            for (int i = 0; i < distributionInfo.getBucketNum(); i++) {
                 List<String> row = Lists.newArrayList();
                 row.add(String.valueOf(i));
-                row.add(tabletInfos.get(i).toString());
-                row.add(graph(tabletInfos.get(i), totalSize));
-                row.add(totalSize == tabletInfos.get(i)
-                        ? "100.00%" : df.format((double) tabletInfos.get(i) / 
totalSize));
+                row.add(rowCountTabletInfos.get(i).toString());
+                row.add(dataSizeTabletInfos.get(i).toString());
+                row.add(graph(dataSizeTabletInfos.get(i), totalSize));
+                row.add(totalSize == dataSizeTabletInfos.get(i)
+                        ? "100.00%" : df.format((double) 
dataSizeTabletInfos.get(i) / totalSize));
                 result.add(row);
             }
         } finally {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index 5642455c4d..007c5410f9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -394,6 +394,12 @@ public class Tablet extends MetaObject implements Writable 
{
         return singleReplica ? 
Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
     }
 
+    public long getRowCount(boolean singleReplica) {
+        LongStream s = replicas.stream().filter(r -> r.getState() == 
ReplicaState.NORMAL)
+                .mapToLong(Replica::getRowCount);
+        return singleReplica ? 
Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
+    }
+
     /**
      * A replica is healthy only if
      * 1. the backend is available
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
 
b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
index c88377924b..fd64d76be9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
@@ -290,11 +290,11 @@ public class TabletHealthProcDir implements 
ProcDirInterface {
                 this.colocateMismatchNum += other.colocateMismatchNum;
                 this.colocateRedundantNum += other.colocateRedundantNum;
                 this.needFurtherRepairNum += other.needFurtherRepairNum;
-                this.unrecoverableNum += unrecoverableNum;
-                this.replicaCompactionTooSlowNum += 
replicaCompactionTooSlowNum;
-                this.inconsistentNum += inconsistentNum;
-                this.oversizeNum += oversizeNum;
-                this.cloningNum += cloningNum;
+                this.unrecoverableNum += other.unrecoverableNum;
+                this.replicaCompactionTooSlowNum += 
other.replicaCompactionTooSlowNum;
+                this.inconsistentNum += other.inconsistentNum;
+                this.oversizeNum += other.oversizeNum;
+                this.cloningNum += other.cloningNum;
                 return this;
             } else if (other.summary) {
                 return other.reduce(this);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java
index 7eab559cd4..81dfc8e462 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminShowReplicaTest.java
@@ -64,7 +64,7 @@ public class AdminShowReplicaTest extends TestWithFeService {
         executor = new ShowExecutor(connectContext, skewStmt);
         resultSet = executor.execute();
         Assert.assertEquals(10, resultSet.getResultRows().size());
-        Assert.assertEquals(4, resultSet.getResultRows().get(0).size());
+        Assert.assertEquals(5, resultSet.getResultRows().get(0).size());
 
         // update tablets' data size and row count
         Database db = 
Catalog.getCurrentInternalCatalog().getDbOrAnalysisException("default_cluster:test");
@@ -88,7 +88,7 @@ public class AdminShowReplicaTest extends TestWithFeService {
         resultSet = executor.execute();
         Assert.assertEquals(10, resultSet.getResultRows().size());
         Assert.assertEquals("4", resultSet.getResultRows().get(4).get(0));
-        Assert.assertEquals(4, resultSet.getResultRows().get(0).size());
+        Assert.assertEquals(5, resultSet.getResultRows().get(0).size());
     }
 
     @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to