This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c39914c0a0 [feature](partition)add default list partition (#15509)
c39914c0a0 is described below

commit c39914c0a0d31c73a0ef392e0436021e1bc568c4
Author: AlexYue <yj976240...@gmail.com>
AuthorDate: Fri Feb 24 15:24:59 2023 +0800

    [feature](partition)add default list partition (#15509)
    
    This pr implements the list default partition referred in related #15507.
    It's similar as GreenPlum's default's partition which would store all data 
not satisfying prior partition key's
    constraints and optimizer wouldn't filter default partition which means 
default partition would be scanned
    each time you try to select data from one table with default partition.
    
    User could either create one table with default partition or alter add one 
default partition.
    
    ```sql
    PARTITION LIST(key) {
    PARTITION p1 values in (xx,xx),
    PARTITION DEFAULT
    }
    
    ALTER TABLE XXX ADD PARTITION DEFAULT
    ```
    
    We don't support automatically migrate data inside default partition which 
meets newly added partition key's
    constraint to newly add partition when alter add new partition. User should 
select default partition using new
    constraints as predicate and insert them to new partition.
    
    ```sql
    insert into tbl select * from tbl partition default where partition_key=xx;
    ```
---
 be/src/exec/tablet_info.cpp                        |  14 ++-
 be/src/exec/tablet_info.h                          |   2 +
 docs/en/docs/data-table/data-partition.md          |  13 +--
 .../Alter/ALTER-TABLE-PARTITION.md                 |   2 +
 docs/zh-CN/docs/data-table/data-partition.md       |  16 ++-
 .../Alter/ALTER-TABLE-PARTITION.md                 |   2 +
 fe/fe-core/src/main/cup/sql_parser.cup             |  13 ++-
 .../apache/doris/catalog/ListPartitionInfo.java    |  11 ++-
 .../apache/doris/catalog/ListPartitionItem.java    |  10 ++
 .../org/apache/doris/catalog/PartitionItem.java    |   4 +
 .../org/apache/doris/catalog/PartitionKey.java     |  36 ++++++-
 .../apache/doris/catalog/RangePartitionItem.java   |   5 +
 .../org/apache/doris/planner/OlapTableSink.java    |   1 +
 .../doris/planner/PartitionPrunerV2Base.java       |  31 +++++-
 gensrc/thrift/Descriptors.thrift                   |   3 +-
 .../test_list_default_multi_col_partition.out      |  18 ++++
 .../list_partition/test_list_default_partition.out |  18 ++++
 .../test_list_partition_data_migration.out         |  30 ++++++
 .../test_list_default_multi_col_partition.groovy   | 110 +++++++++++++++++++++
 .../test_list_default_partition.groovy             | 110 +++++++++++++++++++++
 .../test_list_partition_data_migration.groovy      |  73 ++++++++++++++
 21 files changed, 492 insertions(+), 30 deletions(-)

diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index ca65f5c58b..d2d2e07a5c 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -246,6 +246,9 @@ Status VOlapTablePartitionParam::init() {
                 RETURN_IF_ERROR(_create_partition_keys(
                         keys, &part->in_keys.emplace_back(&_partition_block, 
-1)));
             }
+            if (t_part.__isset.is_default_partition && 
t_part.is_default_partition) {
+                _default_partition = part;
+            }
         }
 
         part->num_buckets = t_part.num_buckets;
@@ -293,14 +296,15 @@ bool VOlapTablePartitionParam::find_partition(BlockRow* 
block_row,
                                               const VOlapTablePartition** 
partition) const {
     auto it = _is_in_partition ? _partitions_map->find(block_row)
                                : _partitions_map->upper_bound(block_row);
-    if (it == _partitions_map->end()) {
-        return false;
+    // for list partition it might result in default partition
+    if (_is_in_partition) {
+        *partition = (it != _partitions_map->end()) ? it->second : 
_default_partition;
+        it = _partitions_map->end();
     }
-    if (_is_in_partition || _part_contains(it->second, block_row)) {
+    if (it != _partitions_map->end() && _part_contains(it->second, block_row)) 
{
         *partition = it->second;
-        return true;
     }
-    return false;
+    return (*partition != nullptr);
 }
 
 uint32_t VOlapTablePartitionParam::find_tablet(BlockRow* block_row,
diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h
index 38244d30ff..9753b71349 100644
--- a/be/src/exec/tablet_info.h
+++ b/be/src/exec/tablet_info.h
@@ -189,6 +189,8 @@ private:
 
     bool _is_in_partition = false;
     uint32_t _mem_usage = 0;
+    // only works when using list partition, the resource is owned by 
_partitions
+    VOlapTablePartition* _default_partition = nullptr;
 };
 
 using TabletLocation = TTabletLocation;
diff --git a/docs/en/docs/data-table/data-partition.md 
b/docs/en/docs/data-table/data-partition.md
index db31d88b41..c6506846ce 100644
--- a/docs/en/docs/data-table/data-partition.md
+++ b/docs/en/docs/data-table/data-partition.md
@@ -110,7 +110,8 @@ PARTITION BY LIST(`city`)
 (
     PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"),
     PARTITION `p_usa` VALUES IN ("New York", "San Francisco"),
-    PARTITION `p_jp` VALUES IN ("Tokyo")
+    PARTITION `p_jp` VALUES IN ("Tokyo"),
+    PARTITION `default`
 )
 DISTRIBUTED BY HASH(`user_id`) BUCKETS 16
 PROPERTIES
@@ -279,6 +280,7 @@ Range partitioning also supports batch partitioning. For 
example, you can create
     p_jp: ("Tokyo")
     ```
 
+
   * If we add Partition p_uk VALUES IN ("London"), the results will be as 
follows:
 
     ```
@@ -303,21 +305,14 @@ Range partitioning also supports batch partitioning. For 
example, you can create
   PARTITION BY LIST(`id`, `city`)
   (
       PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")),
-      PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")),
-      PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
-  )
-  ```
-
-  In the above example, we specify `id` (INT type) and `city` (VARCHAR type) 
as the partitioning columns. The partitions are as follows:
 
   ```
-  * p1_city: [("1", "Beijing"), ("1", "Shanghai")]
+  p1_city: [("1", "Beijing"), ("1", "Shanghai")]
   * p2_city: [("2", "Beijing"), ("2", "Shanghai")]
   * p3_city: [("3", "Beijing"), ("3", "Shanghai")]
   ```
 
   When data are imported, the system will compare them with the partition 
values in order, and put the data in their corresponding partitions. Examples 
are as follows:
-
   ```
   Data ---> Partition
   1, Beijing  ---> p1_city
diff --git 
a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
 
b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
index 69b81b5c18..5733fda17c 100644
--- 
a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
+++ 
b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
@@ -64,6 +64,8 @@ Notice:
 - If the bucketing method is specified, only the number of buckets can be 
modified, not the bucketing method or the bucketing column. If the bucketing 
method is specified but the number of buckets not be specified, the default 
value `10` will be used for bucket number instead of the number specified when 
the table is created. If the number of buckets modified, the bucketing method 
needs to be specified simultaneously.
 - The ["key"="value"] section can set some attributes of the partition, see 
[CREATE TABLE](../Create/CREATE-TABLE)
 - If the user does not explicitly create a partition when creating a table, 
adding a partition by ALTER is not supported
+- If the user uses list partition then they can add default partition to the 
table
+  -  ALTER TABLE ADD PARTITION DEFAULT
 
 2. Delete the partition
 
diff --git a/docs/zh-CN/docs/data-table/data-partition.md 
b/docs/zh-CN/docs/data-table/data-partition.md
index 0d690e2402..9e2080a9e3 100644
--- a/docs/zh-CN/docs/data-table/data-partition.md
+++ b/docs/zh-CN/docs/data-table/data-partition.md
@@ -112,7 +112,8 @@ PARTITION BY LIST(`city`)
 (
     PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"),
     PARTITION `p_usa` VALUES IN ("New York", "San Francisco"),
-    PARTITION `p_jp` VALUES IN ("Tokyo")
+    PARTITION `p_jp` VALUES IN ("Tokyo"),
+    PARTITION `default`
 )
 DISTRIBUTED BY HASH(`user_id`) BUCKETS 16
 PROPERTIES
@@ -284,6 +285,7 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li
        p_cn: ("Beijing", "Shanghai", "Hong Kong")
        p_usa: ("New York", "San Francisco")
        p_jp: ("Tokyo")
+       default
        ```
 
      - 当我们增加一个分区 p_uk VALUES IN ("London"),分区结果如下:
@@ -293,6 +295,7 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li
        p_usa: ("New York", "San Francisco")
        p_jp: ("Tokyo")
        p_uk: ("London")
+       default
        ```
 
      - 当我们删除分区 p_jp,分区结果如下:
@@ -301,6 +304,7 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li
        p_cn: ("Beijing", "Shanghai", "Hong Kong")
        p_usa: ("New York", "San Francisco")
        p_uk: ("London")
+       default
        ```
 
    List分区也支持**多列分区**,示例如下:
@@ -310,7 +314,8 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li
    (
        PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")),
        PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")),
-       PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
+       PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai")),
+       PARTITION `default`
    )
    ```
    
@@ -320,6 +325,7 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li
    * p1_city: [("1", "Beijing"), ("1", "Shanghai")]
    * p2_city: [("2", "Beijing"), ("2", "Shanghai")]
    * p3_city: [("3", "Beijing"), ("3", "Shanghai")]
+   * default:
    ```
    
    当用户插入数据时,分区列值会按照顺序依次比较,最终得到对应的分区。举例如下:
@@ -330,10 +336,12 @@ Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 Li
    * 1, Shanghai    ---> p1_city
    * 2, Shanghai    ---> p2_city
    * 3, Beijing     ---> p3_city
-   * 1, Tianjin     ---> 无法导入
-   * 4, Beijing     ---> 无法导入
+   * 1, Tianjin     ---> 无法导入,但在存在默认分区时会导入进默认分区
+   * 4, Beijing     ---> 无法导入,但在存在默认分区时会导入进默认分区
    ```
 
+   
List分区支持默认分区,当用户指定默认分区时,所有不满足其他分区条件约束的数据都会存放到默认分区.在查询存在默认分区的表时会默认查询默认分区的数据,默认分区**可能会拖慢查询速度**.
+
 2. **Bucket**
 
    - 如果使用了 Partition,则 `DISTRIBUTED ...` 语句描述的是数据在**各个分区内**的划分规则。如果不使用 
Partition,则描述的是对整个表的数据的划分规则。
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
 
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
index 67ce90fe86..8229ac4a00 100644
--- 
a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
+++ 
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md
@@ -64,6 +64,8 @@ partition_desc ["key"="value"]
 - 
如指定分桶方式,只能修改分桶数,不可修改分桶方式或分桶列。如果指定了分桶方式,但是没有指定分桶数,则分桶数会使用默认值10,不会使用建表时指定的分桶数。如果要指定分桶数,则必须指定分桶方式。
 - ["key"="value"] 部分可以设置分区的一些属性,具体说明见 [CREATE TABLE](../Create/CREATE-TABLE)
 - 如果建表时用户未显式创建Partition,则不支持通过ALTER的方式增加分区
+- 如果用户使用的是List Partition则可以增加default partition
+  -  ALTER TABLE ADD PARTITION DEFAULT
 
 2. 删除分区
 
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index ac621dafea..823c7b89e0 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -3128,6 +3128,11 @@ single_partition_desc ::=
         RESULT = new SinglePartitionDesc(ifNotExists, partName, desc, 
properties);
     :}
     /* list partition */
+    | KW_PARTITION opt_if_not_exists:ifNotExists ident:partName 
list_partition_key_desc:desc
+            opt_key_value_map:properties
+    {:
+        RESULT = new SinglePartitionDesc(ifNotExists, partName, desc, 
properties);
+    :}
     | KW_PARTITION opt_if_not_exists:ifNotExists ident:partName KW_VALUES 
KW_IN list_partition_key_desc:desc
         opt_key_value_map:properties
     {:
@@ -3164,7 +3169,13 @@ partition_key_desc ::=
 
 /* list partition PartitionKeyDesc */
 list_partition_key_desc ::=
-    LPAREN list_partition_values_list:keys RPAREN
+    /* empty */
+    {:
+        ArrayList<List<PartitionValue>> keys = new ArrayList();
+        keys.add(new ArrayList());
+        RESULT = PartitionKeyDesc.createIn(keys);
+    :}
+    | LPAREN list_partition_values_list:keys RPAREN
     {:
         RESULT = PartitionKeyDesc.createIn(keys);
     :}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java
index bf57814a2a..c7a6b5e5a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java
@@ -65,12 +65,18 @@ public class ListPartitionInfo extends PartitionInfo {
         // get partition key
         PartitionKeyDesc partitionKeyDesc = desc.getPartitionKeyDesc();
 
+        // we might receive one whole empty values list, we should add default 
partition value for
+        // such occasion
         for (List<PartitionValue> values : partitionKeyDesc.getInValues()) {
+            if (values.isEmpty()) {
+                continue;
+            }
             Preconditions.checkArgument(values.size() == 
partitionColumns.size(),
                     "partition key desc list size[" + values.size() + "] is 
not equal to "
                             + "partition column size[" + 
partitionColumns.size() + "]");
         }
         List<PartitionKey> partitionKeys = new ArrayList<>();
+        boolean isDefaultListPartition = false;
         try {
             for (List<PartitionValue> values : partitionKeyDesc.getInValues()) 
{
                 PartitionKey partitionKey = 
PartitionKey.createListPartitionKey(values, partitionColumns);
@@ -80,11 +86,14 @@ public class ListPartitionInfo extends PartitionInfo {
                             + partitionKeyDesc.toSql() + "] has duplicate item 
[" + partitionKey.toSql() + "].");
                 }
                 partitionKeys.add(partitionKey);
+                isDefaultListPartition = 
partitionKey.isDefaultListPartitionKey();
             }
         } catch (AnalysisException e) {
             throw new DdlException("Invalid list value format: " + 
e.getMessage());
         }
-        return new ListPartitionItem(partitionKeys);
+        ListPartitionItem item = new ListPartitionItem(partitionKeys);
+        item.setDefaultPartition(isDefaultListPartition);
+        return item;
     }
 
     private void checkNewPartitionKey(PartitionKey newKey, PartitionKeyDesc 
keyDesc,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java
index b2ac6a43b2..04577eb306 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionItem.java
@@ -29,6 +29,7 @@ public class ListPartitionItem extends PartitionItem {
     public static ListPartitionItem DUMMY_ITEM = new 
ListPartitionItem(Lists.newArrayList());
 
     private final List<PartitionKey> partitionKeys;
+    private boolean isDefaultPartition = false;
 
     public ListPartitionItem(List<PartitionKey> partitionKeys) {
         this.partitionKeys = partitionKeys;
@@ -48,6 +49,15 @@ public class ListPartitionItem extends PartitionItem {
         return partitionKeys;
     }
 
+    @Override
+    public boolean isDefaultPartition() {
+        return isDefaultPartition;
+    }
+
+    public void setDefaultPartition(boolean isDefaultPartition) {
+        this.isDefaultPartition = isDefaultPartition;
+    }
+
     @Override
     public PartitionItem getIntersect(PartitionItem newItem) {
         List<PartitionKey> newKeys = newItem.getItems();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionItem.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionItem.java
index b62ece4474..578eae340c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionItem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionItem.java
@@ -30,4 +30,8 @@ public abstract class PartitionItem implements 
Comparable<PartitionItem>, Writab
 
     public abstract PartitionItem getIntersect(PartitionItem newItem);
 
+    public boolean isDefaultPartition() {
+        return false;
+    }
+
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java
index 6f26d4ff3a..44ff7ab0b2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java
@@ -47,6 +47,7 @@ public class PartitionKey implements 
Comparable<PartitionKey>, Writable {
     private static final Logger LOG = LogManager.getLogger(PartitionKey.class);
     private List<LiteralExpr> keys;
     private List<PrimitiveType> types;
+    private boolean isDefaultListPartitionKey = false;
 
     // constructor for partition prune
     public PartitionKey() {
@@ -54,6 +55,14 @@ public class PartitionKey implements 
Comparable<PartitionKey>, Writable {
         types = Lists.newArrayList();
     }
 
+    public void setDefaultListPartition(boolean isDefaultListPartitionKey) {
+        this.isDefaultListPartitionKey = isDefaultListPartitionKey;
+    }
+
+    public boolean isDefaultListPartitionKey() {
+        return isDefaultListPartitionKey;
+    }
+
     // Factory methods
     public static PartitionKey createInfinityPartitionKey(List<Column> 
columns, boolean isMax)
             throws AnalysisException {
@@ -92,7 +101,9 @@ public class PartitionKey implements 
Comparable<PartitionKey>, Writable {
         // PARTITION BY LIST(k1, k2)
         // (
         //     PARTITION p1 VALUES IN (("1","beijing"), ("1", "shanghai")),
-        //     PARTITION p2 VALUES IN (("2","shanghai"))
+        //     PARTITION p2 VALUES IN (("2","shanghai")),
+        //     PARTITION p3 VALUES IN,
+        //     PARTITION p4,
         // )
         //
         // for single list partition:
@@ -104,18 +115,29 @@ public class PartitionKey implements 
Comparable<PartitionKey>, Writable {
         //     PARTITION p3 VALUES IN ("11", "12", "13", "14", "15"),
         //     PARTITION p4 VALUES IN ("16", "17", "18", "19", "20"),
         //     PARTITION p5 VALUES IN ("21", "22", "23", "24", "25"),
-        //     PARTITION p6 VALUES IN ("26")
+        //     PARTITION p6 VALUES IN ("26"),
+        //     PARTITION p5 VALUES IN,
+        //     PARTITION p7
         // )
         //
-        Preconditions.checkArgument(values.size() == types.size(),
-                "in value size[" + values.size() + "] is not equal to 
partition column size[" + types.size() + "].");
+        // ListPartitionInfo::createAndCheckPartitionItem has checked
+        Preconditions.checkArgument(values.size() <= types.size(),
+                "in value size[" + values.size() + "] is not less than 
partition column size[" + types.size() + "].");
 
         PartitionKey partitionKey = new PartitionKey();
         for (int i = 0; i < values.size(); i++) {
             partitionKey.keys.add(values.get(i).getValue(types.get(i)));
             partitionKey.types.add(types.get(i).getPrimitiveType());
         }
+        if (values.isEmpty()) {
+            for (int i = 0; i < types.size(); ++i) {
+                partitionKey.keys.add(LiteralExpr.createInfinity(types.get(i), 
false));
+                partitionKey.types.add(types.get(i).getPrimitiveType());
+            }
+            partitionKey.setDefaultListPartition(true);
+        }
 
+        Preconditions.checkState(partitionKey.keys.size() == types.size());
         return partitionKey;
     }
 
@@ -246,7 +268,11 @@ public class PartitionKey implements 
Comparable<PartitionKey>, Writable {
         builder.append("]; ");
 
         builder.append("keys: [");
-        builder.append(toString(keys));
+        if (isDefaultListPartitionKey()) {
+            builder.append("default key");
+        } else {
+            builder.append(toString(keys));
+        }
         builder.append("]; ");
 
         return builder.toString();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java
index db3fe21edc..603f7682a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionItem.java
@@ -40,6 +40,11 @@ public class RangePartitionItem extends PartitionItem {
         return partitionKeyRange;
     }
 
+    @Override
+    public boolean isDefaultPartition() {
+        return false;
+    }
+
     @Override
     public void write(DataOutput out) throws IOException {
         RangeUtils.writeRange(out, partitionKeyRange);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
index 726f62fff9..584788b337 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
@@ -349,6 +349,7 @@ public class OlapTableSink extends DataSink {
                     
tExprNodes.add(partitionKey.getKeys().get(i).treeToThrift().getNodes().get(0));
                 }
                 tPartition.addToInKeys(tExprNodes);
+                
tPartition.setIsDefaultPartition(partitionItem.isDefaultPartition());
             }
         }
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
index e1772509ee..e760a85d84 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java
@@ -43,6 +43,8 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
     protected final Map<String, ColumnRange> columnNameToRange;
     // used for single column partition
     protected RangeMap<ColumnBound, UniqueId> singleColumnRangeMap = null;
+    // currently only used for list partition
+    private Map.Entry<Long, PartitionItem> defaultPartition;
 
     public PartitionPrunerV2Base(Map<Long, PartitionItem> idToPartitionItem,
             List<Column> partitionColumns,
@@ -50,6 +52,7 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
         this.idToPartitionItem = idToPartitionItem;
         this.partitionColumns = partitionColumns;
         this.columnNameToRange = columnNameToRange;
+        findDefaultPartition(idToPartitionItem);
     }
 
     // pass singleColumnRangeMap from outside
@@ -61,6 +64,23 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
         this.partitionColumns = partitionColumns;
         this.columnNameToRange = columnNameToRange;
         this.singleColumnRangeMap = singleColumnRangeMap;
+        findDefaultPartition(idToPartitionItem);
+    }
+
+    private Collection<Long> handleDefaultPartition(Collection<Long> result) {
+        if (this.defaultPartition != null) {
+            Set<Long> r = result.stream().collect(Collectors.toSet());
+            r.add(this.defaultPartition.getKey());
+            return r;
+        }
+        return result;
+    }
+
+    private void findDefaultPartition(Map<Long, PartitionItem> 
idToPartitionItem) {
+        this.defaultPartition = idToPartitionItem.entrySet().stream()
+                                .filter(entry -> 
(entry.getValue().isDefaultPartition()))
+                                .findAny()
+                                .orElse(null);
     }
 
     @Override
@@ -75,13 +95,16 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
             }
         }
 
+        Collection<Long> result;
         if (partitionColumns.size() == 1) {
-            return pruneSingleColumnPartition(columnToFilters);
+            result = pruneSingleColumnPartition(columnToFilters);
         } else if (partitionColumns.size() > 1) {
-            return pruneMultipleColumnPartition(columnToFilters);
+            result = pruneMultipleColumnPartition(columnToFilters);
         } else {
-            return Lists.newArrayList();
+            result = Lists.newArrayList();
         }
+
+        return handleDefaultPartition(result);
     }
 
     abstract void genSingleColumnRangeMap();
@@ -119,7 +142,7 @@ public abstract class PartitionPrunerV2Base implements 
PartitionPruner {
         FinalFilters finalFilters = 
columnToFilters.get(partitionColumns.get(0));
         switch (finalFilters.type) {
             case CONSTANT_FALSE_FILTERS:
-                return Collections.emptyList();
+                return Collections.emptySet();
             case HAVE_FILTERS:
                 genSingleColumnRangeMap();
                 Preconditions.checkNotNull(singleColumnRangeMap);
diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift
index 9cc204cea2..39309d94b8 100644
--- a/gensrc/thrift/Descriptors.thrift
+++ b/gensrc/thrift/Descriptors.thrift
@@ -162,8 +162,9 @@ struct TOlapTablePartition {
     6: optional list<Exprs.TExprNode> start_keys
     7: optional list<Exprs.TExprNode> end_keys
     8: optional list<list<Exprs.TExprNode>> in_keys
-
     9: optional bool is_mutable = true
+    // only used in List Partition
+    10: optional bool is_default_partition;
 }
 
 struct TOlapTablePartitionParam {
diff --git 
a/regression-test/data/partition_p0/list_partition/test_list_default_multi_col_partition.out
 
b/regression-test/data/partition_p0/list_partition/test_list_default_multi_col_partition.out
new file mode 100644
index 0000000000..7c7c9bc616
--- /dev/null
+++ 
b/regression-test/data/partition_p0/list_partition/test_list_default_multi_col_partition.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      2       1       1       24453.325       1.0     1.0
+10     1       1       1       24453.325       1.0     1.0
+11     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+10     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+
+-- !sql --
+10     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+
+-- !sql --
+
diff --git 
a/regression-test/data/partition_p0/list_partition/test_list_default_partition.out
 
b/regression-test/data/partition_p0/list_partition/test_list_default_partition.out
new file mode 100644
index 0000000000..c936d7d5fc
--- /dev/null
+++ 
b/regression-test/data/partition_p0/list_partition/test_list_default_partition.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      1       1       1       24453.325       1.0     1.0
+10     1       1       1       24453.325       1.0     1.0
+11     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+10     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+
+-- !sql --
+10     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+
+-- !sql --
+
diff --git 
a/regression-test/data/partition_p0/list_partition/test_list_partition_data_migration.out
 
b/regression-test/data/partition_p0/list_partition/test_list_partition_data_migration.out
new file mode 100644
index 0000000000..9399078547
--- /dev/null
+++ 
b/regression-test/data/partition_p0/list_partition/test_list_partition_data_migration.out
@@ -0,0 +1,30 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      1       1       1       24453.325       1.0     1.0
+10     1       1       1       24453.325       1.0     1.0
+11     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+1      1       1       1       24453.325       1.0     1.0
+10     1       1       1       24453.325       1.0     1.0
+10     1       1       1       24453.325       1.0     1.0
+11     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+1
+
+-- !sql --
+1      1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+10     1       1       1       24453.325       1.0     1.0
+11     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+11     1       1       1       24453.325       1.0     1.0
+
+-- !sql --
+1      1       1       1       24453.325       1.0     1.0
+10     1       1       1       24453.325       1.0     2.0
+11     1       1       1       24453.325       1.0     1.0
+
diff --git 
a/regression-test/suites/partition_p0/list_partition/test_list_default_multi_col_partition.groovy
 
b/regression-test/suites/partition_p0/list_partition/test_list_default_multi_col_partition.groovy
new file mode 100644
index 0000000000..408609639f
--- /dev/null
+++ 
b/regression-test/suites/partition_p0/list_partition/test_list_default_multi_col_partition.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_list_default_multi_col_partition") {
+    sql "drop table if exists list_default_multi_col_par"
+    sql """
+        CREATE TABLE IF NOT EXISTS list_default_multi_col_par ( 
+            k1 tinyint NOT NULL, 
+            k2 smallint NOT NULL, 
+            k3 int NOT NULL, 
+            k4 bigint NOT NULL, 
+            k5 decimal(9, 3) NOT NULL,
+            k8 double max NOT NULL, 
+            k9 float sum NOT NULL ) 
+        AGGREGATE KEY(k1,k2,k3,k4,k5)
+        PARTITION BY LIST(k1,k2) ( 
+            PARTITION p1 VALUES IN (("1","2"),("3","4")), 
+            PARTITION p2 VALUES IN (("5","6"),("7","8")), 
+            PARTITION p3 ) 
+        DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1")
+        """
+
+    sql """insert into list_default_multi_col_par values 
(1,2,1,1,24453.325,1,1)"""
+    // the following two rows should be inserted into default partition 
successfully
+    sql """insert into list_default_multi_col_par values 
(10,1,1,1,24453.325,1,1)"""
+    sql """insert into list_default_multi_col_par values 
(11,1,1,1,24453.325,1,1)"""
+    qt_sql """select * from list_default_multi_col_par order by k1"""
+
+    List<List<Object>> result1  = sql "show partitions from 
list_default_multi_col_par"
+    logger.info("${result1}")
+    assertEquals(result1.size(), 3)
+
+    // alter table create one more default partition
+    try {
+        test {
+        sql """alter table list_default_multi_col_par add partition p5"""
+        exception "errCode = 2, detailMessage = Invalid list value format: 
errCode = 2, detailMessage = The partition key"
+    }
+    } finally {
+    }
+
+    sql """alter table list_default_multi_col_par drop partition p3"""
+
+    sql "drop table list_default_multi_col_par"
+
+
+    // create one table without default partition
+    sql """
+        CREATE TABLE IF NOT EXISTS list_default_multi_col_par ( 
+            k1 tinyint NOT NULL, 
+            k2 smallint NOT NULL, 
+            k3 int NOT NULL, 
+            k4 bigint NOT NULL, 
+            k5 decimal(9, 3) NOT NULL,
+            k8 double max NOT NULL, 
+            k9 float sum NOT NULL ) 
+        AGGREGATE KEY(k1,k2,k3,k4,k5)
+        PARTITION BY LIST(k1) ( 
+            PARTITION p1 VALUES IN ("1","2","3","4"), 
+            PARTITION p2 VALUES IN ("5","6","7","8") 
+        ) 
+        DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1")
+        """
+    // insert value which is not allowed in existing partitions
+    try {
+        test {
+        sql """insert into list_default_multi_col_par values 
(10,1,1,1,24453.325,1,1)"""
+        exception """Insert has filtered data in strict mode"""
+    }
+    } finally{
+    }
+
+    // alter table add default partition
+    sql """alter table list_default_multi_col_par add partition p3"""
+
+    // insert the formerly disallowed value
+    sql """insert into list_default_multi_col_par values 
(10,1,1,1,24453.325,1,1)"""
+
+    qt_sql """select * from list_default_multi_col_par order by k1"""
+    qt_sql """select * from list_default_multi_col_par partition p1 order by 
k1"""
+    qt_sql """select * from list_default_multi_col_par partition p3 order by 
k1"""
+
+    // drop the default partition
+    sql """alter table list_default_multi_col_par drop partition p3"""
+    qt_sql """select * from list_default_multi_col_par order by k1"""
+
+    // insert value which is not allowed in existing partitions
+    try {
+        test {
+        sql """insert into list_default_multi_col_par values 
(10,1,1,1,24453.325,1,1)"""
+        exception """Insert has filtered data in strict mode"""
+    }
+    } finally{
+    }
+    qt_sql """select * from list_default_multi_col_par order by k1"""
+}
diff --git 
a/regression-test/suites/partition_p0/list_partition/test_list_default_partition.groovy
 
b/regression-test/suites/partition_p0/list_partition/test_list_default_partition.groovy
new file mode 100644
index 0000000000..db92043e42
--- /dev/null
+++ 
b/regression-test/suites/partition_p0/list_partition/test_list_default_partition.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_list_default_partition") {
+    sql "drop table if exists list_default_par"
+    sql """
+        CREATE TABLE IF NOT EXISTS list_default_par ( 
+            k1 tinyint NOT NULL, 
+            k2 smallint NOT NULL, 
+            k3 int NOT NULL, 
+            k4 bigint NOT NULL, 
+            k5 decimal(9, 3) NOT NULL,
+            k8 double max NOT NULL, 
+            k9 float sum NOT NULL ) 
+        AGGREGATE KEY(k1,k2,k3,k4,k5)
+        PARTITION BY LIST(k1) ( 
+            PARTITION p1 VALUES IN ("1","2","3","4"), 
+            PARTITION p2 VALUES IN ("5","6","7","8"), 
+            PARTITION p3 ) 
+        DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1")
+        """
+
+    sql """insert into list_default_par values (1,1,1,1,24453.325,1,1)"""
+    // the following two rows should be inserted into default partition 
successfully
+    sql """insert into list_default_par values (10,1,1,1,24453.325,1,1)"""
+    sql """insert into list_default_par values (11,1,1,1,24453.325,1,1)"""
+    qt_sql """select * from list_default_par order by k1"""
+
+    List<List<Object>> result1  = sql "show partitions from list_default_par"
+    logger.info("${result1}")
+    assertEquals(result1.size(), 3)
+
+    // alter table create one more default partition
+    try {
+        test {
+        sql """alter table list_default_par add partition p5"""
+        exception "errCode = 2, detailMessage = Invalid list value format: 
errCode = 2, detailMessage = The partition key"
+    }
+    } finally {
+    }
+
+    sql """alter table list_default_par drop partition p3"""
+
+    sql "drop table list_default_par"
+
+
+    // create one table without default partition
+    sql """
+        CREATE TABLE IF NOT EXISTS list_default_par ( 
+            k1 tinyint NOT NULL, 
+            k2 smallint NOT NULL, 
+            k3 int NOT NULL, 
+            k4 bigint NOT NULL, 
+            k5 decimal(9, 3) NOT NULL,
+            k8 double max NOT NULL, 
+            k9 float sum NOT NULL ) 
+        AGGREGATE KEY(k1,k2,k3,k4,k5)
+        PARTITION BY LIST(k1) ( 
+            PARTITION p1 VALUES IN ("1","2","3","4"), 
+            PARTITION p2 VALUES IN ("5","6","7","8") 
+        ) 
+        DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1")
+        """
+    // insert value which is not allowed in existing partitions
+    try {
+        test {
+        sql """insert into list_default_par values (10,1,1,1,24453.325,1,1)"""
+        exception """Insert has filtered data in strict mode"""
+    }
+    } finally{
+    }
+
+    // alter table add default partition
+    sql """alter table list_default_par add partition p3"""
+
+    // insert the formerly disallowed value
+    sql """insert into list_default_par values (10,1,1,1,24453.325,1,1)"""
+
+    qt_sql """select * from list_default_par order by k1"""
+    qt_sql """select * from list_default_par partition p1 order by k1"""
+    qt_sql """select * from list_default_par partition p3 order by k1"""
+
+    // drop the default partition
+    sql """alter table list_default_par drop partition p3"""
+    qt_sql """select * from list_default_par order by k1"""
+
+    // insert value which is not allowed in existing partitions
+    try {
+        test {
+        sql """insert into list_default_par values (10,1,1,1,24453.325,1,1)"""
+        exception """Insert has filtered data in strict mode"""
+    }
+    } finally{
+    }
+    qt_sql """select * from list_default_par order by k1"""
+}
diff --git 
a/regression-test/suites/partition_p0/list_partition/test_list_partition_data_migration.groovy
 
b/regression-test/suites/partition_p0/list_partition/test_list_partition_data_migration.groovy
new file mode 100644
index 0000000000..1e03922473
--- /dev/null
+++ 
b/regression-test/suites/partition_p0/list_partition/test_list_partition_data_migration.groovy
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_list_partition_data_migration") {
+    sql "drop table if exists list_par_data_migration"
+    sql """
+        CREATE TABLE IF NOT EXISTS list_par_data_migration ( 
+            k1 tinyint NOT NULL, 
+            k2 smallint NOT NULL, 
+            k3 int NOT NULL, 
+            k4 bigint NOT NULL, 
+            k5 decimal(9, 3) NOT NULL,
+            k8 double max NOT NULL, 
+            k9 float sum NOT NULL ) 
+        AGGREGATE KEY(k1,k2,k3,k4,k5)
+        PARTITION BY LIST(k1) ( 
+            PARTITION p1 VALUES IN ("1","2","3","4"), 
+            PARTITION p2 VALUES IN ("5","6","7","8"), 
+            PARTITION p3 ) 
+        DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1")
+        """
+
+    sql """insert into list_par_data_migration values 
(1,1,1,1,24453.325,1,1)"""
+    sql """insert into list_par_data_migration values 
(10,1,1,1,24453.325,1,1)"""
+    sql """insert into list_par_data_migration values 
(11,1,1,1,24453.325,1,1)"""
+    qt_sql """select * from list_par_data_migration order by k1"""
+
+    List<List<Object>> result1  = sql "show partitions from 
list_par_data_migration"
+    logger.info("${result1}")
+    assertEquals(result1.size(), 3)
+
+    // alter table create one more default partition
+    try {
+        test {
+        sql """alter table list_par_data_migration add partition p5"""
+        exception "errCode = 2, detailMessage = Invalid list value format: 
errCode = 2, detailMessage = The partition key"
+    }
+    } finally {
+    }
+
+    sql """alter table list_par_data_migration add partition p4 values in 
("10")"""
+    sql """insert into list_par_data_migration values 
(10,1,1,1,24453.325,1,1)"""
+    qt_sql """select * from list_par_data_migration order by k1"""
+    qt_sql """select count(*) from list_par_data_migration partition p4"""
+
+    sql """insert into list_par_data_migration select * from 
list_par_data_migration partition p3 where k1=10 order by k1"""
+
+    // it seems the return orders of partitions might be random
+    // and we have no way sort the result by the order of partition
+    // qt_sql """select * from list_par_data_migration order by k1"""
+    qt_sql """select * from list_par_data_migration partition p1 order by k1"""
+    qt_sql """select * from list_par_data_migration partition p3 order by k1"""
+
+    sql """delete from list_par_data_migration partition p3 where k1=10"""
+
+    qt_sql """select * from list_par_data_migration partition p3 order by k1"""
+
+    qt_sql """select * from list_par_data_migration order by k1"""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to