This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push:
new 69e86174374 update any-value/approx-count-distinct/avg-weighted (#2687)
69e86174374 is described below
commit 69e861743744064c1bc233d11898459211bd9d17
Author: Pxl <[email protected]>
AuthorDate: Fri Aug 1 19:42:51 2025 +0800
update any-value/approx-count-distinct/avg-weighted (#2687)
---
.../sql-functions/aggregate-functions/any-value.md | 274 +++++++++++++++++++-
.../aggregate-functions/approx-count-distinct.md | 277 ++++++++++++++++++++-
.../aggregate-functions/avg-weighted.md | 87 +++++--
.../sql-functions/aggregate-functions/avg.md | 140 +++++++----
.../sql-functions/aggregate-functions/any-value.md | 184 +++++++++++++-
.../aggregate-functions/approx-count-distinct.md | 275 +++++++++++++++++++-
.../aggregate-functions/avg-weighted.md | 87 +++++--
.../sql-functions/aggregate-functions/avg.md | 142 +++++++----
8 files changed, 1297 insertions(+), 169 deletions(-)
diff --git a/docs/sql-manual/sql-functions/aggregate-functions/any-value.md
b/docs/sql-manual/sql-functions/aggregate-functions/any-value.md
index d9c4de99405..44dedda3537 100644
--- a/docs/sql-manual/sql-functions/aggregate-functions/any-value.md
+++ b/docs/sql-manual/sql-functions/aggregate-functions/any-value.md
@@ -17,29 +17,287 @@ Returns any value from the expression or column in the
group. If there is a non-
```sql
ANY_VALUE(<expr>)
+ANY(<expr>)
```
## Parameters
| Parameter | Description |
| -- | -- |
-| `<expr>` | The column or expression to be aggregated. |
+| `<expr>` | The column or expression to be aggregated. Supported types are
String, Date, DateTime, IPv4, IPv6, Bool, TinyInt, SmallInt, Integer, BigInt,
LargeInt, Float, Double, Decimal. |
## Return Value
Returns any non-NULL value if a non-NULL value exists, otherwise returns NULL.
+The return type is consistent with the input expr type.
## Example
```sql
-select id, any_value(name) from cost2 group by id;
+-- setup
+create table t1(
+ k1 int,
+ k_string varchar(100),
+ k_date date,
+ k_datetime datetime,
+ k_ipv4 ipv4,
+ k_ipv6 ipv6,
+ k_bool boolean,
+ k_tinyint tinyint,
+ k_smallint smallint,
+ k_bigint bigint,
+ k_largeint largeint,
+ k_float float,
+ k_double double,
+ k_decimal decimal(10, 2)
+) distributed by hash (k1) buckets 1
+properties ("replication_num"="1");
+insert into t1 values
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (1, 'banana', '2023-01-02', '2023-01-02 11:00:00', '192.168.1.2',
'2001:db8::1', false, 20, 200, 2000, 20000, 2.2, 2.22, 20.02),
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', true, 30, 300, 3000, 30000, 3.3, 3.33, 30.03),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', false, 40, 400, 4000, 40000, 4.4, 4.44, 40.04),
+ (2, 'grape', '2023-02-02', '2023-02-02 13:00:00', '10.0.0.2',
'2001:db8::3', true, 50, 500, 5000, 50000, 5.5, 5.55, 50.05),
+ (3, null, null, null, null, null, null, null, null, null, null, null,
null, null);
```
+```sql
+select any_value(k_string) from t1;
+```
+
+String type: Get any value from all k_string values, NULL values are excluded.
+
+```text
++---------------------+
+| any_value(k_string) |
++---------------------+
+| orange |
++---------------------+
+```
+
+```sql
+select any_value(k_date) from t1;
+```
+
+Date type: Get any value from all k_date values.
+
+```text
++-------------------+
+| any_value(k_date) |
++-------------------+
+| 2023-01-01 |
++-------------------+
+```
+
+```sql
+select any_value(k_datetime) from t1;
+```
+
+DateTime type: Get any value from all k_datetime values.
+
+```text
++-----------------------+
+| any_value(k_datetime) |
++-----------------------+
+| 2023-01-01 10:00:00 |
++-----------------------+
+```
+
+```sql
+select any_value(k_ipv4) from t1;
+```
+
+IPv4 type: Get any value from all k_ipv4 values.
+
+```text
++-------------------+
+| any_value(k_ipv4) |
++-------------------+
+| 192.168.1.1 |
++-------------------+
+```
+
+```sql
+select any_value(k_ipv6) from t1;
+```
+
+IPv6 type: Get any value from all k_ipv6 values.
+
+```text
++-------------------+
+| any_value(k_ipv6) |
++-------------------+
+| ::1 |
++-------------------+
+```
+
+```sql
+select any_value(k_bool) from t1;
+```
+
+Bool type: Get any value from all k_bool values.
+
+```text
++-------------------+
+| any_value(k_bool) |
++-------------------+
+| 1 |
++-------------------+
+```
+
+```sql
+select any_value(k_tinyint) from t1;
+```
+
+TinyInt type: Get any value from all k_tinyint values.
+
+```text
++----------------------+
+| any_value(k_tinyint) |
++----------------------+
+| 10 |
++----------------------+
+```
+
+```sql
+select any_value(k_smallint) from t1;
+```
+
+SmallInt type: Get any value from all k_smallint values.
+
+```text
++-----------------------+
+| any_value(k_smallint) |
++-----------------------+
+| 100 |
++-----------------------+
+```
+
+```sql
+select any_value(k1) from t1;
+```
+
+Integer type: Get any value from all k1 values.
+
+```text
++---------------+
+| any_value(k1) |
++---------------+
+| 1 |
++---------------+
+```
+
+```sql
+select any_value(k_bigint) from t1;
+```
+
+BigInt type: Get any value from all k_bigint values.
+
+```text
++---------------------+
+| any_value(k_bigint) |
++---------------------+
+| 1000 |
++---------------------+
+```
+
+```sql
+select any_value(k_largeint) from t1;
+```
+
+LargeInt type: Get any value from all k_largeint values.
+
+```text
++-----------------------+
+| any_value(k_largeint) |
++-----------------------+
+| 10000 |
++-----------------------+
+```
+
+```sql
+select any_value(k_float) from t1;
+```
+
+Float type: Get any value from all k_float values.
+
+```text
++--------------------+
+| any_value(k_float) |
++--------------------+
+| 1.1 |
++--------------------+
+```
+
+```sql
+select any_value(k_double) from t1;
+```
+
+Double type: Get any value from all k_double values.
+
+```text
++---------------------+
+| any_value(k_double) |
++---------------------+
+| 1.11 |
++---------------------+
+```
+
+```sql
+select any_value(k_decimal) from t1;
+```
+
+Decimal type: Get any value from all k_decimal values.
+
+```text
++----------------------+
+| any_value(k_decimal) |
++----------------------+
+| 10.01 |
++----------------------+
+```
+
+```sql
+select k1, any_value(k_string) from t1 group by k1;
+```
+
+Group by k1 and get any value from k_string in each group. When all records in
a group are NULL, returns NULL.
+
+```text
++------+---------------------+
+| k1 | any_value(k_string) |
++------+---------------------+
+| 1 | apple |
+| 2 | orange |
+| 3 | NULL |
++------+---------------------+
+```
+
+```sql
+select any(k_string) from t1;
+```
+
+Using alias ANY, same effect as ANY_VALUE.
+
+```text
++---------------+
+| any(k_string) |
++---------------+
+| orange |
++---------------+
+```
+
+```sql
+select any_value(k_string) from t1 where k1 = 999;
+```
+
+When query result is empty, returns NULL.
+
```text
-+------+-------------------+
-| id | any_value(`name`) |
-+------+-------------------+
-| 3 | jack |
-| 2 | jack |
-+------+-------------------+
++---------------------+
+| any_value(k_string) |
++---------------------+
+| NULL |
++---------------------+
```
diff --git
a/docs/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
b/docs/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
index b8d2854f785..7aef4f13a52 100644
--- a/docs/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
+++ b/docs/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
@@ -7,35 +7,294 @@
## Description
-The APPROX_COUNT_DISTINCT function is implemented based on the HyperLogLog
algorithm, which uses a fixed size of memory to estimate the column base. The
algorithm is based on the assumption of a null distribution in the tails, and
the accuracy depends on the data distribution. Based on the fixed bucket size
used by Doris, the relative standard error of the algorithm is 0.8125%.
+Returns the number of distinct non-NULL elements.
+This function is implemented based on the HyperLogLog algorithm, which uses a
fixed size of memory to estimate the column base. The algorithm is based on the
assumption of a null distribution in the tails, and the accuracy depends on the
data distribution. Based on the fixed bucket size used by Doris, the relative
standard error of the algorithm is 0.8125%.
For a more detailed and specific analysis, see [related
paper](https://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf)
## Syntax
```sql
APPROX_COUNT_DISTINCT(<expr>)
+NDV(<expr>)
```
## Parameters
| Parameters | Description |
| -- | -- |
-| `<expr>` | The expression needs to be obtained |
+| `<expr>` | The expression to get the value. Supported types are String,
Date, DateTime, IPv4, IPv6, Bool, TinyInt, SmallInt, Integer, BigInt, LargeInt,
Float, Double, Decimal. |
## Return Value
Returns a value of type BIGINT.
-### Example
+## Example
```sql
-select approx_count_distinct(query_id) from log_statis group by datetime;
+-- setup
+create table t1(
+ k1 int,
+ k_string varchar(100),
+ k_date date,
+ k_datetime datetime,
+ k_ipv4 ipv4,
+ k_ipv6 ipv6,
+ k_bool boolean,
+ k_tinyint tinyint,
+ k_smallint smallint,
+ k_bigint bigint,
+ k_largeint largeint,
+ k_float float,
+ k_double double,
+ k_decimal decimal(10, 2)
+) distributed by hash (k1) buckets 1
+properties ("replication_num"="1");
+insert into t1 values
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (1, 'banana', '2023-01-02', '2023-01-02 11:00:00', '192.168.1.2',
'2001:db8::1', false, 20, 200, 2000, 20000, 2.2, 2.22, 20.02),
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', true, 30, 300, 3000, 30000, 3.3, 3.33, 30.03),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', false, 40, 400, 4000, 40000, 4.4, 4.44, 40.04),
+ (2, 'grape', '2023-02-02', '2023-02-02 13:00:00', '10.0.0.2',
'2001:db8::3', true, 50, 500, 5000, 50000, 5.5, 5.55, 50.05),
+ (3, null, null, null, null, null, null, null, null, null, null, null,
null, null);
```
+```sql
+select approx_count_distinct(k_string) from t1;
+```
+
+String type: Calculate the approximate distinct count of all k_string values,
NULL values are not included in the calculation.
+
+```text
++---------------------------------+
+| approx_count_distinct(k_string) |
++---------------------------------+
+| 4 |
++---------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_date) from t1;
+```
+
+Date type: Calculate the approximate distinct count of all k_date values.
+
+```text
++-------------------------------+
+| approx_count_distinct(k_date) |
++-------------------------------+
+| 4 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_datetime) from t1;
+```
+
+DateTime type: Calculate the approximate distinct count of all k_datetime
values.
+
+```text
++-----------------------------------+
+| approx_count_distinct(k_datetime) |
++-----------------------------------+
+| 4 |
++-----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_ipv4) from t1;
+```
+
+IPv4 type: Calculate the approximate distinct count of all k_ipv4 values.
+
+```text
++-------------------------------+
+| approx_count_distinct(k_ipv4) |
++-------------------------------+
+| 4 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_ipv6) from t1;
+```
+
+IPv6 type: Calculate the approximate distinct count of all k_ipv6 values.
+
+```text
++-------------------------------+
+| approx_count_distinct(k_ipv6) |
++-------------------------------+
+| 4 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_bool) from t1;
+```
+
+Bool type: Calculate the approximate distinct count of all k_bool values.
+
+```text
++-------------------------------+
+| approx_count_distinct(k_bool) |
++-------------------------------+
+| 2 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_tinyint) from t1;
+```
+
+TinyInt type: Calculate the approximate distinct count of all k_tinyint values.
+
+```text
++----------------------------------+
+| approx_count_distinct(k_tinyint) |
++----------------------------------+
+| 5 |
++----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_smallint) from t1;
+```
+
+SmallInt type: Calculate the approximate distinct count of all k_smallint
values.
+
+```text
++-----------------------------------+
+| approx_count_distinct(k_smallint) |
++-----------------------------------+
+| 5 |
++-----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k1) from t1;
+```
+
+Integer type: Calculate the approximate distinct count of all k1 values.
+
+```text
++---------------------------+
+| approx_count_distinct(k1) |
++---------------------------+
+| 3 |
++---------------------------+
+```
+
+```sql
+select approx_count_distinct(k_bigint) from t1;
+```
+
+BigInt type: Calculate the approximate distinct count of all k_bigint values.
+
+```text
++---------------------------------+
+| approx_count_distinct(k_bigint) |
++---------------------------------+
+| 5 |
++---------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_largeint) from t1;
+```
+
+LargeInt type: Calculate the approximate distinct count of all k_largeint
values.
+
+```text
++-----------------------------------+
+| approx_count_distinct(k_largeint) |
++-----------------------------------+
+| 5 |
++-----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_float) from t1;
+```
+
+Float type: Calculate the approximate distinct count of all k_float values.
+
+```text
++--------------------------------+
+| approx_count_distinct(k_float) |
++--------------------------------+
+| 5 |
++--------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_double) from t1;
+```
+
+Double type: Calculate the approximate distinct count of all k_double values.
+
+```text
++---------------------------------+
+| approx_count_distinct(k_double) |
++---------------------------------+
+| 5 |
++---------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_decimal) from t1;
+```
+
+Decimal type: Calculate the approximate distinct count of all k_decimal values.
+
+```text
++----------------------------------+
+| approx_count_distinct(k_decimal) |
++----------------------------------+
+| 5 |
++----------------------------------+
+```
+
+```sql
+select k1, approx_count_distinct(k_string) from t1 group by k1;
+```
+
+Group by k1 and calculate the approximate distinct count of k_string in each
group. When all records in the group are NULL, returns 0.
+
+```text
++------+---------------------------------+
+| k1 | approx_count_distinct(k_string) |
++------+---------------------------------+
+| 1 | 2 |
+| 2 | 2 |
+| 3 | 0 |
++------+---------------------------------+
+```
+
+```sql
+select ndv(k_string) from t1;
+```
+
+Using alias NDV has the same effect as APPROX_COUNT_DISTINCT.
+
+```text
++---------------+
+| ndv(k_string) |
++---------------+
+| 4 |
++---------------+
+```
+
+```sql
+select approx_count_distinct(k_string) from t1 where k1 = 999;
+```
+
+When the query result is empty, returns 0.
+
```text
-+-----------------+
-| approx_count_distinct(`query_id`) |
-+-----------------+
-| 17721 |
-+-----------------+
++---------------------------------+
+| approx_count_distinct(k_string) |
++---------------------------------+
+| 0 |
++---------------------------------+
```
diff --git a/docs/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
b/docs/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
index fddbd9c1c13..7168f6b1792 100644
--- a/docs/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
+++ b/docs/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
@@ -7,7 +7,7 @@
## Description
-Calculates the weighted arithmetic mean, i.e., the result is the sum of the
products of corresponding values and weights, divided by the total sum of
weights. If the total sum of weights is 0, it will return NaN.
+Calculates the weighted arithmetic mean, i.e., the result is the sum of the
products of corresponding values and weights, divided by the total sum of
weights. If the total sum of weights is 0, it will return NaN. The calculation
is always performed using Double type.
## Syntax
@@ -19,38 +19,89 @@ AVG_WEIGHTED(<x>, <weight>)
| Parameter | Description |
| -- | -- |
-| `<x>` | It is the numeric expression for which the average needs to be
calculated, and can be a column name, constant, or a complex numeric
expression. |
-| `<weight>` | It is a numeric expression, typically a column name, constant,
or the result of another numeric calculation. |
+| `<x>` | It is the numeric expression for which the average needs to be
calculated, and can be a column name, constant, or a complex numeric
expression. Supported type is Double. |
+| `<weight>` | It is a numeric expression, typically a column name, constant,
or the result of another numeric calculation. Supported type is Double. |
## Return Value
The sum of the products of corresponding values and weights is accumulated,
divided by the total sum of weights. If the total sum of weights equals 0, NaN
will be returned.
+The return type is always Double.
## Example
```sql
-select k1,k2 from test_doris_avg_weighted;
+-- setup
+create table t1(
+ k1 int,
+ k2 int,
+ k3 decimal(10, 2),
+ k4 double,
+ category varchar(50)
+) distributed by hash (k1) buckets 1
+properties ("replication_num"="1");
+insert into t1 values
+ (10, 100, 5.5, 1.0, 'A'),
+ (20, 200, 10.0, 2.0, 'A'),
+ (30, 300, 15.5, 3.0, 'B'),
+ (40, 400, 20.0, 4.0, 'B'),
+ (50, 0, 25.0, 0.0, 'C'),
+ (60, 600, 30.0, 5.0, 'C');
```
+```sql
+select avg_weighted(k2, k1) from t1;
+```
+
+Calculate the weighted average of all records: (100*10 + 200*20 + 300*30 +
400*40 + 0*50 + 600*60) / (10+20+30+40+50+60) ≈ 314.2857
+
```text
-+------+------+
-| k1 | k2 |
-+------+------+
-| 10 | 100 |
-| 20 | 200 |
-| 30 | 300 |
-| 40 | 400 |
-+------+------+
++----------------------+
+| avg_weighted(k2, k1) |
++----------------------+
+| 314.2857142857143 |
++----------------------+
```
```sql
-select avg_weighted(k2,k1) from test_doris_avg_weighted;
+select category, avg_weighted(k2, k1) from t1 group by category;
+```
+
+Group by category and calculate the weighted average for each group.
+
+```text
++----------+----------------------+
+| category | avg_weighted(k2, k1) |
++----------+----------------------+
+| A | 166.66666666666666 |
+| B | 357.14285714285717 |
+| C | 327.27272727272725 |
++----------+----------------------+
```
+```sql
+select avg_weighted(k2, 0) from t1;
+```
+
+When all weights are 0, returns NaN.
+
+```text
++---------------------+
+| avg_weighted(k2, 0) |
++---------------------+
+| NaN |
++---------------------+
+```
+
+```sql
+select avg_weighted(k2, k1) from t1 where k1 > 100;
+```
+
+When the query result is empty, returns NULL.
+
```text
-+--------------------------------------+
-| avg_weighted(k2, cast(k1 as DOUBLE)) |
-+--------------------------------------+
-| 300 |
-+--------------------------------------+
++----------------------+
+| avg_weighted(k2, k1) |
++----------------------+
+| NULL |
++----------------------+
```
diff --git a/docs/sql-manual/sql-functions/aggregate-functions/avg.md
b/docs/sql-manual/sql-functions/aggregate-functions/avg.md
index a342b615e27..d1fce4afe56 100644
--- a/docs/sql-manual/sql-functions/aggregate-functions/avg.md
+++ b/docs/sql-manual/sql-functions/aggregate-functions/avg.md
@@ -19,7 +19,7 @@ AVG([DISTINCT] <expr>)
| Parameter | Description |
| -- | -- |
-| `<expr>` | It is an expression or column, typically a numeric column or an
expression that can be converted to a numeric value. |
+| `<expr>` | It is an expression or column, typically a numeric column or an
expression that can be converted to a numeric value. Supported types are
TinyInt, SmallInt, Integer, BigInt, LargeInt, Double, Decimal. |
| `[DISTINCT]` | It is an optional keyword that indicates the calculation of
the average value after removing duplicate values from expr. |
## Return Value
@@ -32,105 +32,143 @@ For decimal type input, return value will be decimal as
well, other numric type
```sql
-- setup
create table t1(
- k1 int,
- kd decimalv3(10, 5),
- kstr varchar(100),
- kstr_invalid varchar(100),
- knull int,
- kbigint bigint
-) distributed by hash (k1) buckets 1
+ k_tinyint tinyint,
+ k_smallint smallint,
+ k_int int,
+ k_bigint bigint,
+ k_largeint largeint,
+ k_double double,
+ k_decimal decimalv3(10, 5),
+ k_null_int int
+) distributed by hash (k_int) buckets 1
properties ("replication_num"="1");
insert into t1 values
- (1, 222.222, '1.5', 'test', null, 100),
- (2, 444.444, '2.5', '1', null, 100),
- (3, null, '3.5', '2', null, 1);
+ (1, 10, 100, 1000, 10000, 1.1, 222.222, null),
+ (2, 20, 200, 2000, 20000, 2.2, 444.444, null),
+ (3, 30, 300, 3000, 30000, 3.3, null, null);
```
+```sql
+select avg(k_tinyint) from t1;
+```
+
+TinyInt type average calculation, the average of [1,2,3] is 2.
+
+```text
++----------------+
+| avg(k_tinyint) |
++----------------+
+| 2 |
++----------------+
+```
```sql
-select avg(k1) from t1;
+select avg(k_smallint) from t1;
```
-The average of [1,2,3] is 2.
+SmallInt type average calculation, the average of [10,20,30] is 20.
```text
-+---------+
-| avg(k1) |
-+---------+
-| 2 |
-+---------+
++-----------------+
+| avg(k_smallint) |
++-----------------+
+| 20 |
++-----------------+
```
+```sql
+select avg(k_int) from t1;
+```
+
+Integer type average calculation, the average of [100,200,300] is 200.
+
+```text
++------------+
+| avg(k_int) |
++------------+
+| 200 |
++------------+
+```
+
+```sql
+select avg(k_bigint) from t1;
+```
+
+BigInt type average calculation, the average of [1000,2000,3000] is 2000.
+
+```text
++---------------+
+| avg(k_bigint) |
++---------------+
+| 2000 |
++---------------+
+```
```sql
-select avg(kd) from t1;
+select avg(k_largeint) from t1;
```
-The average of [222.222,444.444,null] is 333.333.
+LargeInt type average calculation, the average of [10000,20000,30000] is 20000.
```text
-+-----------+
-| avg(kd) |
-+-----------+
-| 333.33300 |
-+-----------+
++-----------------+
+| avg(k_largeint) |
++-----------------+
+| 20000 |
++-----------------+
```
```sql
-select avg(kstr) from t1;
+select avg(k_double) from t1;
```
-The input Varchar type will be implicitly converted to Double.
-The average of [1.5,2.5,3.5] is 2.5.
+Double type average calculation, the average of [1.1,2.2,3.3] is approximately
2.2.
```text
-+-----------+
-| avg(kstr) |
-+-----------+
-| 2.5 |
-+-----------+
+| avg(k_double) |
++--------------------+
+| 2.1999999999999997 |
```
```sql
-select avg(kstr_invalid) from t1;
+select avg(k_decimal) from t1;
```
-Invalid strings will be converted to NULL values during implicit conversion.
-The average of [null,1,2] is 1.5.
+Decimal type average calculation, the average of [222.222,444.444,null] is
333.333.
```text
-+-------------------+
-| avg(kstr_invalid) |
-+-------------------+
-| 1.5 |
-+-------------------+
++----------------+
+| avg(k_decimal) |
++----------------+
+| 333.33300 |
++----------------+
```
```sql
-select avg(knull) from t1;
+select avg(k_null_int) from t1;
```
For cases where all input data are NULL values, return NULL value.
```text
-+------------+
-| avg(knull) |
-+------------+
-| NULL |
-+------------+
++-----------------+
+| avg(k_null_int) |
++-----------------+
+| NULL |
++-----------------+
```
```sql
-select avg(distinct kbigint) from t1;
+select avg(distinct k_bigint) from t1;
```
-After deduplication, [100,100,1] becomes [100,1], with an average of 50.5.
+Using the DISTINCT keyword for deduplication calculation, [1000,2000,3000]
after deduplication has an average of 2000.
```text
+-----------------------+
-| avg(distinct kbigint) |
+| avg(distinct k_bigint) |
+-----------------------+
-| 50.5 |
+| 2000 |
+-----------------------+
```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/any-value.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/any-value.md
index d3fbdccfdd6..b802162b62c 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/any-value.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/any-value.md
@@ -17,29 +17,203 @@
```sql
ANY_VALUE(<expr>)
+ANY(<expr>)
```
## 参数
| 参数 | 说明 |
| -- | -- |
-| `<expr>` | 要聚合的列或表达式 |
+| `<expr>` | 要聚合的列或表达式,支持类型为
String,Date,DateTime,IPv4,IPv6,Bool,TinyInt,SmallInt,Integer,BigInt,LargeInt,Float,Double,Decimal。
|
## 返回值
如果存在非 NULL 值,返回任意非 NULL 值,否则返回 NULL。
+返回值的类型与输入的 expr 类型一致。
## 举例
```sql
-select id, any_value(name) from cost2 group by id;
+-- setup
+create table t1(
+ k1 int,
+ k_string varchar(100),
+ k_date date,
+ k_datetime datetime,
+ k_ipv4 ipv4,
+ k_ipv6 ipv6,
+ k_bool boolean,
+ k_tinyint tinyint,
+ k_smallint smallint,
+ k_bigint bigint,
+ k_largeint largeint,
+ k_float float,
+ k_double double,
+ k_decimal decimal(10, 2)
+) distributed by hash (k1) buckets 1
+properties ("replication_num"="1");
+insert into t1 values
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (1, 'banana', '2023-01-02', '2023-01-02 11:00:00', '192.168.1.2',
'2001:db8::1', false, 20, 200, 2000, 20000, 2.2, 2.22, 20.02),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', true, 30, 300, 3000, 30000, 3.3, 3.33, 30.03),
+ (2, null, null, null, null, null, null, null, null, null, null, null,
null, null),
+ (3, null, null, null, null, null, null, null, null, null, null, null,
null, null);
```
+```sql
+select k1, any_value(k_string) from t1 group by k1;
+```
+
+String 类型:对于每个分组,返回任意一个非 NULL 值。
+
```text
+------+-------------------+
-| id | any_value(`name`) |
+| k1 | any_value(k_string) |
+------+-------------------+
-| 3 | jack |
-| 2 | jack |
+| 1 | apple |
+| 2 | orange |
+| 3 | NULL |
+------+-------------------+
```
+
+```sql
+select k1, any_value(k_date) from t1 group by k1;
+```
+
+Date 类型:返回任意一个非 NULL 的日期值。
+
+```text
++------+-----------------+
+| k1 | any_value(k_date) |
++------+-----------------+
+| 1 | 2023-01-01 |
+| 2 | 2023-02-01 |
+| 3 | NULL |
++------+-----------------+
+```
+
+```sql
+select k1, any_value(k_datetime) from t1 group by k1;
+```
+
+DateTime 类型:返回任意一个非 NULL 的日期时间值。
+
+```text
++------+---------------------+
+| k1 | any_value(k_datetime) |
++------+---------------------+
+| 1 | 2023-01-01 10:00:00 |
+| 2 | 2023-02-01 12:00:00 |
+| 3 | NULL |
++------+---------------------+
+```
+
+```sql
+select k1, any_value(k_ipv4) from t1 group by k1;
+```
+
+IPv4 类型:返回任意一个非 NULL 的 IPv4 地址值。
+
+```text
++------+-----------------+
+| k1 | any_value(k_ipv4) |
++------+-----------------+
+| 1 | 192.168.1.1 |
+| 2 | 10.0.0.1 |
+| 3 | NULL |
++------+-----------------+
+```
+
+```sql
+select k1, any_value(k_ipv6) from t1 group by k1;
+```
+
+IPv6 类型:返回任意一个非 NULL 的 IPv6 地址值。
+
+```text
++------+-----------------+
+| k1 | any_value(k_ipv6) |
++------+-----------------+
+| 1 | ::1 |
+| 2 | 2001:db8::2 |
+| 3 | NULL |
++------+-----------------+
+```
+
+```sql
+select k1, any_value(k_bool) from t1 group by k1;
+```
+
+Bool 类型:返回任意一个非 NULL 的布尔值。
+
+```text
++------+-----------------+
+| k1 | any_value(k_bool) |
++------+-----------------+
+| 1 | 1 |
+| 2 | 1 |
+| 3 | NULL |
++------+-----------------+
+```
+
+```sql
+select k1, any_value(k_tinyint) from t1 group by k1;
+```
+
+TinyInt 类型:返回任意一个非 NULL 的微小整数值。
+
+```text
++------+--------------------+
+| k1 | any_value(k_tinyint) |
++------+--------------------+
+| 1 | 10 |
+| 2 | 30 |
+| 3 | NULL |
++------+--------------------+
+```
+
+```sql
+select k1, any_value(k_decimal) from t1 group by k1;
+```
+
+Decimal 类型:返回任意一个非 NULL 的高精度小数值。
+
+```text
++------+--------------------+
+| k1 | any_value(k_decimal) |
++------+--------------------+
+| 1 | 10.01 |
+| 2 | 30.03 |
+| 3 | NULL |
++------+--------------------+
+```
+
+```sql
+select any_value(k_string) from t1 where k1 = 3;
+```
+
+当组内所有值都为 NULL 时,返回 NULL。
+
+```text
++-------------------+
+| any_value(k_string) |
++-------------------+
+| NULL |
++-------------------+
+```
+
+```sql
+select k1, any(k_string) from t1 group by k1;
+```
+
+使用别名 ANY 的效果与 ANY_VALUE 相同。
+
+```text
++------+---------------+
+| k1 | any(k_string) |
++------+---------------+
+| 1 | apple |
+| 2 | orange |
+| 3 | NULL |
++------+---------------+
+```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
index 337e169e0ca..2067414d6da 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/approx-count-distinct.md
@@ -7,7 +7,8 @@
## 描述
-APPROX_COUNT_DISTINCT 函数基于 HyperLogLog 算法实现,使用固定大小的内存估算列基数。
+返回非 NULL 的不同元素数量。
+基于 HyperLogLog 算法实现,使用固定大小的内存估算列基数。
该算法基于尾部零分布假设进行计算,具体精确程度取决于数据分布。基于 Doris 使用的固定桶大小,该算法相对标准误差为 0.8125%
更详细具体的分析,详见[相关论文](https://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf)
@@ -15,13 +16,14 @@ APPROX_COUNT_DISTINCT 函数基于 HyperLogLog 算法实现,使用固定大小
```sql
APPROX_COUNT_DISTINCT(<expr>)
+NDV(<expr>)
```
## 参数说明
| 参数 | 说明 |
| -- | -- |
-| `<expr>` | 需要获取值的表达式 |
+| `<expr>` | 需要获取值的表达式,支持类型为
String,Date,DateTime,IPv4,IPv6,Bool,TinyInt,SmallInt,Integer,BigInt,LargeInt,Float,Double,Decimal。|
## 返回值
@@ -30,13 +32,270 @@ APPROX_COUNT_DISTINCT(<expr>)
## 举例
```sql
-select approx_count_distinct(query_id) from log_statis group by datetime;
+-- setup
+create table t1(
+ k1 int,
+ k_string varchar(100),
+ k_date date,
+ k_datetime datetime,
+ k_ipv4 ipv4,
+ k_ipv6 ipv6,
+ k_bool boolean,
+ k_tinyint tinyint,
+ k_smallint smallint,
+ k_bigint bigint,
+ k_largeint largeint,
+ k_float float,
+ k_double double,
+ k_decimal decimal(10, 2)
+) distributed by hash (k1) buckets 1
+properties ("replication_num"="1");
+insert into t1 values
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (1, 'banana', '2023-01-02', '2023-01-02 11:00:00', '192.168.1.2',
'2001:db8::1', false, 20, 200, 2000, 20000, 2.2, 2.22, 20.02),
+ (1, 'apple', '2023-01-01', '2023-01-01 10:00:00', '192.168.1.1', '::1',
true, 10, 100, 1000, 10000, 1.1, 1.11, 10.01),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', true, 30, 300, 3000, 30000, 3.3, 3.33, 30.03),
+ (2, 'orange', '2023-02-01', '2023-02-01 12:00:00', '10.0.0.1',
'2001:db8::2', false, 40, 400, 4000, 40000, 4.4, 4.44, 40.04),
+ (2, 'grape', '2023-02-02', '2023-02-02 13:00:00', '10.0.0.2',
'2001:db8::3', true, 50, 500, 5000, 50000, 5.5, 5.55, 50.05),
+ (3, null, null, null, null, null, null, null, null, null, null, null,
null, null);
```
+```sql
+select approx_count_distinct(k_string) from t1;
+```
+
+String 类型:计算所有 k_string 值的近似去重数量,NULL 值不参与计算。
+
+```text
++---------------------------------+
+| approx_count_distinct(k_string) |
++---------------------------------+
+| 4 |
++---------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_date) from t1;
+```
+
+Date 类型:计算所有 k_date 值的近似去重数量。
+
+```text
++-------------------------------+
+| approx_count_distinct(k_date) |
++-------------------------------+
+| 4 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_datetime) from t1;
+```
+
+DateTime 类型:计算所有 k_datetime 值的近似去重数量。
+
+```text
++-----------------------------------+
+| approx_count_distinct(k_datetime) |
++-----------------------------------+
+| 4 |
++-----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_ipv4) from t1;
+```
+
+IPv4 类型:计算所有 k_ipv4 值的近似去重数量。
+
+```text
++-------------------------------+
+| approx_count_distinct(k_ipv4) |
++-------------------------------+
+| 4 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_ipv6) from t1;
+```
+
+IPv6 类型:计算所有 k_ipv6 值的近似去重数量。
+
+```text
++-------------------------------+
+| approx_count_distinct(k_ipv6) |
++-------------------------------+
+| 4 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_bool) from t1;
+```
+
+Bool 类型:计算所有 k_bool 值的近似去重数量。
+
+```text
++-------------------------------+
+| approx_count_distinct(k_bool) |
++-------------------------------+
+| 2 |
++-------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_tinyint) from t1;
+```
+
+TinyInt 类型:计算所有 k_tinyint 值的近似去重数量。
+
+```text
++----------------------------------+
+| approx_count_distinct(k_tinyint) |
++----------------------------------+
+| 5 |
++----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_smallint) from t1;
+```
+
+SmallInt 类型:计算所有 k_smallint 值的近似去重数量。
+
+```text
++-----------------------------------+
+| approx_count_distinct(k_smallint) |
++-----------------------------------+
+| 5 |
++-----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k1) from t1;
+```
+
+Integer 类型:计算所有 k1 值的近似去重数量。
+
+```text
++---------------------------+
+| approx_count_distinct(k1) |
++---------------------------+
+| 3 |
++---------------------------+
+```
+
+```sql
+select approx_count_distinct(k_bigint) from t1;
+```
+
+BigInt 类型:计算所有 k_bigint 值的近似去重数量。
+
+```text
++---------------------------------+
+| approx_count_distinct(k_bigint) |
++---------------------------------+
+| 5 |
++---------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_largeint) from t1;
+```
+
+LargeInt 类型:计算所有 k_largeint 值的近似去重数量。
+
+```text
++-----------------------------------+
+| approx_count_distinct(k_largeint) |
++-----------------------------------+
+| 5 |
++-----------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_float) from t1;
+```
+
+Float 类型:计算所有 k_float 值的近似去重数量。
+
+```text
++--------------------------------+
+| approx_count_distinct(k_float) |
++--------------------------------+
+| 5 |
++--------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_double) from t1;
+```
+
+Double 类型:计算所有 k_double 值的近似去重数量。
+
+```text
++---------------------------------+
+| approx_count_distinct(k_double) |
++---------------------------------+
+| 5 |
++---------------------------------+
+```
+
+```sql
+select approx_count_distinct(k_decimal) from t1;
+```
+
+Decimal 类型:计算所有 k_decimal 值的近似去重数量。
+
+```text
++----------------------------------+
+| approx_count_distinct(k_decimal) |
++----------------------------------+
+| 5 |
++----------------------------------+
+```
+
+```sql
+select k1, approx_count_distinct(k_string) from t1 group by k1;
+```
+
+按 k1 分组,计算每组中 k_string 的近似去重数量。组内记录都为 NULL 时,返回 0。
+
+```text
++------+---------------------------------+
+| k1 | approx_count_distinct(k_string) |
++------+---------------------------------+
+| 1 | 2 |
+| 2 | 2 |
+| 3 | 0 |
++------+---------------------------------+
+```
+
+```sql
+select ndv(k_string) from t1;
+```
+
+使用别名 NDV 的效果与 APPROX_COUNT_DISTINCT 相同。
+
+```text
++---------------+
+| ndv(k_string) |
++---------------+
+| 4 |
++---------------+
+```
+
+```sql
+select approx_count_distinct(k_string) from t1 where k1 = 999;
+```
+
+当查询结果为空时,返回 0。
+
```text
-+-----------------+
-| approx_count_distinct(`query_id`) |
-+-----------------+
-| 17721 |
-+-----------------+
++---------------------------------+
+| approx_count_distinct(k_string) |
++---------------------------------+
+| 0 |
++---------------------------------+
```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
index 31bca72ccec..7c2263dc9b0 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg-weighted.md
@@ -7,7 +7,7 @@
## 描述
-计算加权算术平均值,即返回结果为:所有对应数值和权重的乘积相累加,除总的权重和。如果所有的权重和等于 0, 将返回 NaN。
+计算加权算术平均值,即返回结果为:所有对应数值和权重的乘积相累加,除总的权重和。如果所有的权重和等于 0, 将返回 NaN。计算过程中总是用 Double
类型进行计算。
## 语法
@@ -19,38 +19,89 @@ AVG_WEIGHTED(<x>, <weight>)
| 参数 | 说明 |
| -- | -- |
-| `<x>` | 是需要计算平均值的数值表达式,可以是一个列名、常量或复杂的数值表达式 |
-| `<weight>` | 是一个数值表达式,通常可以是一个列名、常量或其他数值计算结果 |
+| `<x>` | 是需要计算平均值的数值表达式,可以是一个列名、常量或复杂的数值表达式,支持类型为 Double。 |
+| `<weight>` | 是一个数值表达式,通常可以是一个列名、常量或其他数值计算结果,支持类型为 Double。 |
## 返回值
所有对应数值和权重的乘积相累加,除总的权重和,如果所有的权重和等于 0, 将返回 NaN。
+返回值的类型总是为 Double 。
## 举例
```sql
-select k1,k2 from test_doris_avg_weighted;
+-- setup
+create table t1(
+ k1 int,
+ k2 int,
+ k3 decimal(10, 2),
+ k4 double,
+ category varchar(50)
+) distributed by hash (k1) buckets 1
+properties ("replication_num"="1");
+insert into t1 values
+ (10, 100, 5.5, 1.0, 'A'),
+ (20, 200, 10.0, 2.0, 'A'),
+ (30, 300, 15.5, 3.0, 'B'),
+ (40, 400, 20.0, 4.0, 'B'),
+ (50, 0, 25.0, 0.0, 'C'),
+ (60, 600, 30.0, 5.0, 'C');
```
+```sql
+select avg_weighted(k2, k1) from t1;
+```
+
+计算所有记录的加权平均值:(100*10 + 200*20 + 300*30 + 400*40 + 0*50 + 600*60) /
(10+20+30+40+50+60) ≈ 314.2857
+
```text
-+------+------+
-| k1 | k2 |
-+------+------+
-| 10 | 100 |
-| 20 | 200 |
-| 30 | 300 |
-| 40 | 400 |
-+------+------+
++----------------------+
+| avg_weighted(k2, k1) |
++----------------------+
+| 314.2857142857143 |
++----------------------+
```
```sql
-select avg_weighted(k2,k1) from test_doris_avg_weighted;
+select category, avg_weighted(k2, k1) from t1 group by category;
+```
+
+按类别分组计算加权平均值。
+
+```text
++----------+----------------------+
+| category | avg_weighted(k2, k1) |
++----------+----------------------+
+| A | 166.66666666666666 |
+| B | 357.14285714285717 |
+| C | 327.27272727272725 |
++----------+----------------------+
```
+```sql
+select avg_weighted(k2, 0) from t1;
+```
+
+当所有权重都为 0 时,返回 NaN。
+
+```text
++---------------------+
+| avg_weighted(k2, 0) |
++---------------------+
+| NaN |
++---------------------+
+```
+
+```sql
+select avg_weighted(k2, k1) from t1 where k1 > 100;
+```
+
+当查询结果为空时,返回 NULL。
+
```text
-+--------------------------------------+
-| avg_weighted(k2, cast(k1 as DOUBLE)) |
-+--------------------------------------+
-| 300 |
-+--------------------------------------+
++----------------------+
+| avg_weighted(k2, k1) |
++----------------------+
+| NULL |
++----------------------+
```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg.md
index df8ff06d4b7..aa1dc31bf4b 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-functions/aggregate-functions/avg.md
@@ -19,8 +19,8 @@ AVG([DISTINCT] <expr>)
| 参数 | 说明 |
| -- | -- |
-| `<expr>` | 是一个表达式或列,通常是一个数值列或者能够转换为数值的表达式 |
-| `[DISTINCT]` | 是一个可选的关键字,表示对 expr 中的重复值进行去重后再计算平均值 |
+| `<expr>` | 是一个表达式或列,通常是一个数值列或者能够转换为数值的表达式,支持类型为
TinyInt,SmallInt,Integer,BigInt,LargeInt,Double,Decimal 。|
+| `[DISTINCT]` | 是一个可选的关键字,表示对 expr 中的重复值进行去重后再计算平均值。 |
## 返回值
@@ -32,105 +32,143 @@ AVG([DISTINCT] <expr>)
```sql
-- setup
create table t1(
- k1 int,
- kd decimalv3(10, 5),
- kstr varchar(100),
- kstr_invalid varchar(100),
- knull int,
- kbigint bigint
-) distributed by hash (k1) buckets 1
+ k_tinyint tinyint,
+ k_smallint smallint,
+ k_int int,
+ k_bigint bigint,
+ k_largeint largeint,
+ k_double double,
+ k_decimal decimalv3(10, 5),
+ k_null_int int
+) distributed by hash (k_int) buckets 1
properties ("replication_num"="1");
insert into t1 values
- (1, 222.222, '1.5', 'test', null, 100),
- (2, 444.444, '2.5', '1', null, 100),
- (3, null, '3.5', '2', null, 1);
+ (1, 10, 100, 1000, 10000, 1.1, 222.222, null),
+ (2, 20, 200, 2000, 20000, 2.2, 444.444, null),
+ (3, 30, 300, 3000, 30000, 3.3, null, null);
```
+```sql
+select avg(k_tinyint) from t1;
+```
+
+TinyInt 类型的平均值计算,[1,2,3]的平均值为2。
+
+```text
++----------------+
+| avg(k_tinyint) |
++----------------+
+| 2 |
++----------------+
+```
```sql
-select avg(k1) from t1;
+select avg(k_smallint) from t1;
```
-[1,2,3]的平均值为2。
+SmallInt 类型的平均值计算,[10,20,30]的平均值为20。
```text
-+---------+
-| avg(k1) |
-+---------+
-| 2 |
-+---------+
++-----------------+
+| avg(k_smallint) |
++-----------------+
+| 20 |
++-----------------+
```
+```sql
+select avg(k_int) from t1;
+```
+
+Integer 类型的平均值计算,[100,200,300]的平均值为200。
+
+```text
++------------+
+| avg(k_int) |
++------------+
+| 200 |
++------------+
+```
+
+```sql
+select avg(k_bigint) from t1;
+```
+
+BigInt 类型的平均值计算,[1000,2000,3000]的平均值为2000。
+
+```text
++---------------+
+| avg(k_bigint) |
++---------------+
+| 2000 |
++---------------+
+```
```sql
-select avg(kd) from t1;
+select avg(k_largeint) from t1;
```
-[222.222,444.444,null]的平均值为333.333。
+LargeInt 类型的平均值计算,[10000,20000,30000]的平均值为20000。
```text
-+-----------+
-| avg(kd) |
-+-----------+
-| 333.33300 |
-+-----------+
++-----------------+
+| avg(k_largeint) |
++-----------------+
+| 20000 |
++-----------------+
```
```sql
-select avg(kstr) from t1;
+select avg(k_double) from t1;
```
-输入的 Varchar 类型会被隐式转换为 Double。
-[1.5,2.5,3.5]的平均值为2.5。
+Double 类型的平均值计算,[1.1,2.2,3.3]的平均值为2.2。
```text
-+-----------+
-| avg(kstr) |
-+-----------+
-| 2.5 |
-+-----------+
+| avg(k_double) |
++--------------------+
+| 2.1999999999999997 |
```
```sql
-select avg(kstr_invalid) from t1;
+select avg(k_decimal) from t1;
```
-非法的字符串会在隐式转换中变成 NULL 值。
-[null,1,2]的平均值为1.5。
+Decimal 类型的平均值计算,[222.222,444.444,null]的平均值为333.333。
```text
-+-------------------+
-| avg(kstr_invalid) |
-+-------------------+
-| 1.5 |
-+-------------------+
++----------------+
+| avg(k_decimal) |
++----------------+
+| 333.33300 |
++----------------+
```
```sql
-select avg(knull) from t1;
+select avg(k_null_int) from t1;
```
对于输入数据均为 NULL 值的情况,返回 NULL 值。
```text
-+------------+
-| avg(knull) |
-+------------+
-| NULL |
-+------------+
++-----------------+
+| avg(k_null_int) |
++-----------------+
+| NULL |
++-----------------+
```
```sql
-select avg(distinct kbigint) from t1;
+select avg(distinct k_bigint) from t1;
```
-[100,100,1]去重之后为[100,1],平均值为50.5。
+使用 DISTINCT 关键字进行去重计算,[1000,2000,3000]去重后平均值为2000。
```text
+-----------------------+
-| avg(distinct kbigint) |
+| avg(distinct k_bigint) |
+-----------------------+
-| 50.5 |
+| 2000 |
+-----------------------+
```
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]