This is an automated email from the ASF dual-hosted git repository.

kosiew pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 85a35954 Add docstring examples for Aggregate window functions (#1418)
85a35954 is described below

commit 85a3595444e7946dc4eaa166cb4843bee2bf2f07
Author: Nick <[email protected]>
AuthorDate: Wed Mar 18 22:06:31 2026 -0400

    Add docstring examples for Aggregate window functions (#1418)
    
    * Add docstring examples for Aggregate window functions
    
    Add example usage to docstrings for Aggregate window functions to improve 
documentation.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
    
    * Remove for example for example docstring
    
    * Actually remove all for example calls in favor of docstrings
    
    * Remove builtins
    
    * Make google docstyle
    
    * Fix bad merge leading to duplicate xample
    
    ---------
    
    Co-authored-by: Claude Opus 4.6 <[email protected]>
---
 python/datafusion/functions.py | 127 ++++++++++++++++++++++++++++-------------
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index a4933a74..e85d710e 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -3323,10 +3323,6 @@ def array_agg(
         filter: If provided, only compute against rows for which the filter is 
True
         order_by: Order the resultant array values. Accepts column names or 
expressions.
 
-    For example::
-
-        df.aggregate([], array_agg(col("a"), order_by="b"))
-
     Examples:
     ---------
     >>> ctx = dfn.SessionContext()
@@ -4047,9 +4043,14 @@ def first_value(
             column names or expressions.
         null_treatment: Assign whether to respect or ignore null values.
 
-    For example::
-
-        df.aggregate([], first_value(col("a"), order_by="ts"))
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 20, 30]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.first_value(dfn.col("a")).alias("v")]
+        ... )
+        >>> result.collect_column("v")[0].as_py()
+        10
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -4084,9 +4085,14 @@ def last_value(
             column names or expressions.
         null_treatment: Assign whether to respect or ignore null values.
 
-    For example::
-
-        df.aggregate([], last_value(col("a"), order_by="ts"))
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 20, 30]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.last_value(dfn.col("a")).alias("v")]
+        ... )
+        >>> result.collect_column("v")[0].as_py()
+        30
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -4123,9 +4129,14 @@ def nth_value(
             column names or expressions.
         null_treatment: Assign whether to respect or ignore null values.
 
-    For example::
-
-        df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 20, 30]})
+        >>> result = df.aggregate(
+        ...     [], [dfn.functions.nth_value(dfn.col("a"), 2).alias("v")]
+        ... )
+        >>> result.collect_column("v")[0].as_py()
+        20
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -4303,9 +4314,14 @@ def lead(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        lead(col("b"), order_by="ts")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.select(
+        ...     dfn.col("a"), dfn.functions.lead(dfn.col("a"), shift_offset=1,
+        ...     default_value=0, order_by="a").alias("lead"))
+        >>> result.sort(dfn.col("a")).collect_column("lead").to_pylist()
+        [2, 3, 0]
     """
     if not isinstance(default_value, pa.Scalar) and default_value is not None:
         default_value = pa.scalar(default_value)
@@ -4358,9 +4374,14 @@ def lag(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        lag(col("b"), order_by="ts")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+        >>> result = df.select(
+        ...     dfn.col("a"), dfn.functions.lag(dfn.col("a"), shift_offset=1,
+        ...     default_value=0, order_by="a").alias("lag"))
+        >>> result.sort(dfn.col("a")).collect_column("lag").to_pylist()
+        [0, 1, 2]
     """
     if not isinstance(default_value, pa.Scalar):
         default_value = pa.scalar(default_value)
@@ -4403,9 +4424,13 @@ def row_number(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        row_number(order_by="points")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 20, 30]})
+        >>> result = df.select(
+        ...     dfn.col("a"), 
dfn.functions.row_number(order_by="a").alias("rn"))
+        >>> result.sort(dfn.col("a")).collect_column("rn").to_pylist()
+        [1, 2, 3]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4447,9 +4472,14 @@ def rank(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        rank(order_by="points")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 10, 20]})
+        >>> result = df.select(
+        ...     dfn.col("a"), dfn.functions.rank(order_by="a").alias("rnk")
+        ... )
+        >>> result.sort(dfn.col("a")).collect_column("rnk").to_pylist()
+        [1, 1, 3]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4486,9 +4516,13 @@ def dense_rank(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        dense_rank(order_by="points")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 10, 20]})
+        >>> result = df.select(
+        ...     dfn.col("a"), 
dfn.functions.dense_rank(order_by="a").alias("dr"))
+        >>> result.sort(dfn.col("a")).collect_column("dr").to_pylist()
+        [1, 1, 2]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4526,9 +4560,14 @@ def percent_rank(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
 
-        percent_rank(order_by="points")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 20, 30]})
+        >>> result = df.select(
+        ...     dfn.col("a"), 
dfn.functions.percent_rank(order_by="a").alias("pr"))
+        >>> result.sort(dfn.col("a")).collect_column("pr").to_pylist()
+        [0.0, 0.5, 1.0]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4566,9 +4605,17 @@ def cume_dist(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        cume_dist(order_by="points")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [1., 2., 2., 3.]})
+        >>> result = df.select(
+        ...     dfn.col("a"),
+        ...     dfn.functions.cume_dist(
+        ...         order_by="a"
+        ...     ).alias("cd")
+        ... )
+        >>> result.collect_column("cd").to_pylist()
+        [0.25..., 0.75..., 0.75..., 1.0...]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4610,9 +4657,13 @@ def ntile(
         order_by: Set ordering within the window frame. Accepts
             column names or expressions.
 
-    For example::
-
-        ntile(3, order_by="points")
+    Examples:
+        >>> ctx = dfn.SessionContext()
+        >>> df = ctx.from_pydict({"a": [10, 20, 30, 40]})
+        >>> result = df.select(
+        ...     dfn.col("a"), dfn.functions.ntile(2, order_by="a").alias("nt"))
+        >>> result.sort(dfn.col("a")).collect_column("nt").to_pylist()
+        [1, 1, 2, 2]
     """
     partition_by_raw = expr_list_to_raw_expr_list(partition_by)
     order_by_raw = sort_list_to_raw_sort_list(order_by)
@@ -4648,10 +4699,6 @@ def string_agg(
         order_by: Set the ordering of the expression to evaluate. Accepts
             column names or expressions.
 
-    For example::
-
-        df.aggregate([], string_agg(col("a"), ",", order_by="b"))
-
     Examples:
     ---------
     >>> ctx = dfn.SessionContext()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to