This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch document in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 3fd1a813e47ff164ea22d21b1981c9b59155cc76 Author: yaqian.zhang <598593...@qq.com> AuthorDate: Tue Jun 16 14:39:23 2020 +0800 Add doc for intersect_count and intersect_value --- website/_docs31/tutorial/sql_reference.cn.md | 45 ++++++++++++++++++++++++++++ website/_docs31/tutorial/sql_reference.md | 45 ++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/website/_docs31/tutorial/sql_reference.cn.md b/website/_docs31/tutorial/sql_reference.cn.md index 01912b6..2bfe755 100644 --- a/website/_docs31/tutorial/sql_reference.cn.md +++ b/website/_docs31/tutorial/sql_reference.cn.md @@ -47,6 +47,8 @@ Apache Kylin 使用 Apache Calcite 做 SQL 解析和优化。作为一款 OLAP [SUSTRING](#SUBSTRING) [COALESCE](#COALESCE) [STDDEV_SUM](#STDDEV_SUM) +[INTERSECT_COUNT](#INTERSECT_COUNT) +[INTERSECT_VALUE](#INTERSECT_VALUE) ## 数据类型 [数据类型](#datatype) @@ -393,6 +395,49 @@ from ( group by A {% endhighlight %} +## [INTERSECT_COUNT]{#INTERSECT_COUNT} +INTERSECT_COUNT函数用于计算留存率,计算留存率的measure必须经过count_distinct精确去重的预计算。 +例子1: 参考[intersect_count](http://kylin.apache.org/blog/2016/11/28/intersect-count/) +{% highlight Groff markup %} +select city, version, +intersect_count(uuid, dt, array['20161014']) as first_day, +intersect_count(uuid, dt, array['20161015']) as second_day, +intersect_count(uuid, dt, array['20161016']) as third_day, +intersect_count(uuid, dt, array['20161014', '20161015']) as retention_oneday, +intersect_count(uuid, dt, array['20161014', '20161015', '20161016']) as retention_twoday +from visit_log +where dt in ('2016104', '20161015', '20161016') +group by city, version +{% endhighlight %} + +例子2: 参考KYLIN-4314 +{% highlight Groff markup %} +select city, version, +intersect_count(uuid, dt, array['20161014']) as first_day, +intersect_count(uuid, dt, array['20161015']) as second_day, +intersect_count(uuid, dt, array['20161016']) as third_day, +intersect_count(uuid, dt, array['20161014', '20161015']) as retention_oneday, +intersect_count(uuid, dt, array['20161014|20161015', '20161016']) as retention_twoday +from visit_log +where dt in ('2016104', '20161015', '20161016') +group by city, version +{% endhighlight %} + +## [INTERSECT_VALUE]{#INTERSECT_VALUE} +INTERSECT_COUNT函数用于返回留存值的bitmap明细,使用它之前必须经过count_distinct精确去重的预计算。 +例子: +{% highlight Groff markup %} +select city, version, +intersect_value(uuid, dt, array['20161014']) as first_day, +intersect_value(uuid, dt, array['20161015']) as second_day, +intersect_value(uuid, dt, array['20161016']) as third_day, +intersect_value(uuid, dt, array['20161014', '20161015']) as retention_oneday, +intersect_value(uuid, dt, array['20161014|20161015', '20161016']) as retention_twoday +from visit_log +where dt in ('2016104', '20161015', '20161016') +group by city, version +{% endhighlight %} + ## 数据类型 {#datatype} | ---------- | ---------- | ---------- | ---------- | -------------------- | diff --git a/website/_docs31/tutorial/sql_reference.md b/website/_docs31/tutorial/sql_reference.md index ffe6032..1e8b18f 100644 --- a/website/_docs31/tutorial/sql_reference.md +++ b/website/_docs31/tutorial/sql_reference.md @@ -46,6 +46,8 @@ Apache Kylin relies on Apache Calcite to parse and optimize the SQL statements. [SUSTRING](#SUBSTRING) [COALESCE](#COALESCE) [STDDEV_SUM](#STDDEV_SUM) +[INTERSECT_COUNT](#INTERSECT_COUNT) +[INTERSECT_VALUE](#INTERSECT_VALUE) ## Data Type @@ -396,6 +398,49 @@ from ( group by A {% endhighlight %} +## [INTERSECT_COUNT]{#INTERSECT_COUNT} +INTERSECT_COUNT is used to calculate the retention rate. The measure to be calculated have defined precisely count distinct measure. +Example 1: Refer to [intersect_count](http://kylin.apache.org/blog/2016/11/28/intersect-count/) +{% highlight Groff markup %} +select city, version, +intersect_count(uuid, dt, array['20161014']) as first_day, +intersect_count(uuid, dt, array['20161015']) as second_day, +intersect_count(uuid, dt, array['20161016']) as third_day, +intersect_count(uuid, dt, array['20161014', '20161015']) as retention_oneday, +intersect_count(uuid, dt, array['20161014', '20161015', '20161016']) as retention_twoday +from visit_log +where dt in ('2016104', '20161015', '20161016') +group by city, version +{% endhighlight %} + +Example 1: Refer to KYLIN-4314 +{% highlight Groff markup %} +select city, version, +intersect_count(uuid, dt, array['20161014']) as first_day, +intersect_count(uuid, dt, array['20161015']) as second_day, +intersect_count(uuid, dt, array['20161016']) as third_day, +intersect_count(uuid, dt, array['20161014', '20161015']) as retention_oneday, +intersect_count(uuid, dt, array['20161014|20161015', '20161016']) as retention_twoday +from visit_log +where dt in ('2016104', '20161015', '20161016') +group by city, version +{% endhighlight %} + +## [INTERSECT_VALUE]{#INTERSECT_VALUE} +INTERSECT_COUNT returns the bitmap details of the retained value. The measure to be calculated have defined precisely count distinct measure. +Example: +{% highlight Groff markup %} +select city, version, +intersect_value(uuid, dt, array['20161014']) as first_day, +intersect_value(uuid, dt, array['20161015']) as second_day, +intersect_value(uuid, dt, array['20161016']) as third_day, +intersect_value(uuid, dt, array['20161014', '20161015']) as retention_oneday, +intersect_value(uuid, dt, array['20161014|20161015', '20161016']) as retention_twoday +from visit_log +where dt in ('2016104', '20161015', '20161016') +group by city, version +{% endhighlight %} + ## DATA TYPE {#DATATYPE} | ---------- | ---------- | ---------- | ---------- | -------------------- |