This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new d160af39fea2 [MINOR][PYTHON][DOCS] Fix the doctest of `pivot`
d160af39fea2 is described below

commit d160af39fea29c020e414ca18cb9a8340115110f
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sun Nov 2 20:24:19 2025 -0800

    [MINOR][PYTHON][DOCS] Fix the doctest of `pivot`
    
    ### What changes were proposed in this pull request?
    
    ### Why are the changes needed?
    Fix the doctest of `pivot`, to make sure the example works
    
    ### Does this PR introduce _any_ user-facing change?
    doc-only change
    
    ### How was this patch tested?
    enabled doc-test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #52814 from zhengruifeng/py_test_pivot.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 2063c365860bb1620fdd4106ce227089d2691bf0)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 python/pyspark/sql/group.py | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 05021aabb50f..939f7ff6b610 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -456,7 +456,7 @@ class GroupedData(PandasGroupedOpsMixin):
 
         Examples
         --------
-        >>> from pyspark.sql import Row
+        >>> from pyspark.sql import Row, functions as sf
         >>> df1 = spark.createDataFrame([
         ...     Row(course="dotNET", year=2012, earnings=10000),
         ...     Row(course="Java", year=2012, earnings=20000),
@@ -474,28 +474,30 @@ class GroupedData(PandasGroupedOpsMixin):
         |dotNET|2013|   48000|
         |  Java|2013|   30000|
         +------+----+--------+
+
         >>> df2 = spark.createDataFrame([
         ...     Row(training="expert", sales=Row(course="dotNET", year=2012, 
earnings=10000)),
         ...     Row(training="junior", sales=Row(course="Java", year=2012, 
earnings=20000)),
         ...     Row(training="expert", sales=Row(course="dotNET", year=2012, 
earnings=5000)),
         ...     Row(training="junior", sales=Row(course="dotNET", year=2013, 
earnings=48000)),
         ...     Row(training="expert", sales=Row(course="Java", year=2013, 
earnings=30000)),
-        ... ])  # doctest: +SKIP
-        >>> df2.show()  # doctest: +SKIP
-        +--------+--------------------+
-        |training|               sales|
-        +--------+--------------------+
-        |  expert|{dotNET, 2012, 10...|
-        |  junior| {Java, 2012, 20000}|
-        |  expert|{dotNET, 2012, 5000}|
-        |  junior|{dotNET, 2013, 48...|
-        |  expert| {Java, 2013, 30000}|
-        +--------+--------------------+
+        ... ])
+        >>> df2.show(truncate=False)
+        +--------+---------------------+
+        |training|sales                |
+        +--------+---------------------+
+        |expert  |{dotNET, 2012, 10000}|
+        |junior  |{Java, 2012, 20000}  |
+        |expert  |{dotNET, 2012, 5000} |
+        |junior  |{dotNET, 2013, 48000}|
+        |expert  |{Java, 2013, 30000}  |
+        +--------+---------------------+
 
         Compute the sum of earnings for each year by course with each course 
as a separate column
 
         >>> df1.groupBy("year").pivot(
-        ...     "course", ["dotNET", 
"Java"]).sum("earnings").sort("year").show()
+        ...    "course", ["dotNET", "Java"]
+        ... ).sum("earnings").sort("year").show()
         +----+------+-----+
         |year|dotNET| Java|
         +----+------+-----+
@@ -512,9 +514,10 @@ class GroupedData(PandasGroupedOpsMixin):
         |2012|20000| 15000|
         |2013|30000| 48000|
         +----+-----+------+
-        >>> df2.groupBy(
-        ...     
"sales.year").pivot("sales.course").sum("sales.earnings").sort("year").show()
-        ... # doctest: +SKIP
+
+        >>> df2.groupBy("sales.year").pivot(
+        ...     "sales.course"
+        ... ).agg(sf.sum("sales.earnings")).sort("year").show()
         +----+-----+------+
         |year| Java|dotNET|
         +----+-----+------+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to