This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 779a526a015d [SPARK-50396][PYTHON][DOCS] Refine the docstring for
datetime functions - part 3
779a526a015d is described below
commit 779a526a015d6e03dd69443a28bf5d17837bd93e
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sat Nov 23 11:33:16 2024 +0100
[SPARK-50396][PYTHON][DOCS] Refine the docstring for datetime functions -
part 3
### What changes were proposed in this pull request?
Refine the docstring for datetime functions
### Why are the changes needed?
to improve docs and test coverage
### Does this PR introduce _any_ user-facing change?
doc-only changes
### How was this patch tested?
new doctests
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #48935 from zhengruifeng/py_doc_8.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
python/pyspark/sql/functions/builtin.py | 494 ++++++++++++++++++++++++++------
1 file changed, 410 insertions(+), 84 deletions(-)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index d2873a388617..68b51440278c 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -8864,6 +8864,13 @@ def curdate() -> Column:
:class:`~pyspark.sql.Column`
current date.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.now`
+ :meth:`pyspark.sql.functions.current_date`
+ :meth:`pyspark.sql.functions.current_timestamp`
+ :meth:`pyspark.sql.functions.localtimestamp`
+
Examples
--------
>>> import pyspark.sql.functions as sf
@@ -8893,6 +8900,13 @@ def current_date() -> Column:
:class:`~pyspark.sql.Column`
current date.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.now`
+ :meth:`pyspark.sql.functions.curdate`
+ :meth:`pyspark.sql.functions.current_timestamp`
+ :meth:`pyspark.sql.functions.localtimestamp`
+
Examples
--------
>>> from pyspark.sql import functions as sf
@@ -8920,14 +8934,26 @@ def current_timezone() -> Column:
Examples
--------
- >>> from pyspark.sql import functions as sf
>>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+
+ >>> from pyspark.sql import functions as sf
>>> spark.range(1).select(sf.current_timezone()).show()
+-------------------+
| current_timezone()|
+-------------------+
|America/Los_Angeles|
+-------------------+
+
+ Switch the timezone to Shanghai.
+
+ >>> spark.conf.set("spark.sql.session.timeZone", "Asia/Shanghai")
+ >>> spark.range(1).select(sf.current_timezone()).show()
+ +------------------+
+ |current_timezone()|
+ +------------------+
+ | Asia/Shanghai|
+ +------------------+
+
>>> spark.conf.unset("spark.sql.session.timeZone")
"""
return _invoke_function("current_timezone")
@@ -8949,6 +8975,13 @@ def current_timestamp() -> Column:
:class:`~pyspark.sql.Column`
current date and time.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.now`
+ :meth:`pyspark.sql.functions.curdate`
+ :meth:`pyspark.sql.functions.current_date`
+ :meth:`pyspark.sql.functions.localtimestamp`
+
Examples
--------
>>> from pyspark.sql import functions as sf
@@ -8974,6 +9007,13 @@ def now() -> Column:
:class:`~pyspark.sql.Column`
current timestamp at the start of query evaluation.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.curdate`
+ :meth:`pyspark.sql.functions.current_date`
+ :meth:`pyspark.sql.functions.current_timestamp`
+ :meth:`pyspark.sql.functions.localtimestamp`
+
Examples
--------
>>> from pyspark.sql import functions as sf
@@ -9004,6 +9044,13 @@ def localtimestamp() -> Column:
:class:`~pyspark.sql.Column`
current local date and time.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.now`
+ :meth:`pyspark.sql.functions.curdate`
+ :meth:`pyspark.sql.functions.current_date`
+ :meth:`pyspark.sql.functions.current_timestamp`
+
Examples
--------
>>> from pyspark.sql import functions as sf
@@ -9044,6 +9091,15 @@ def date_format(date: "ColumnOrName", format: str) ->
Column:
format: literal string
format to use to represent datetime values.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.to_date`
+ :meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
+ :meth:`pyspark.sql.functions.try_to_timestamp`
+
Returns
-------
:class:`~pyspark.sql.Column`
@@ -9130,6 +9186,18 @@ def year(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
year part of the date/timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Examples
--------
Example 1: Extract the year from a string column representing dates
@@ -9209,6 +9277,18 @@ def quarter(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
quarter of the date/timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Examples
--------
Example 1: Extract the quarter from a string column representing dates
@@ -9288,6 +9368,19 @@ def month(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
month part of the date/timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
+ :meth:`pyspark.sql.functions.monthname`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Examples
--------
Example 1: Extract the month from a string column representing dates
@@ -9368,6 +9461,12 @@ def dayofweek(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
day of the week for given date/timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.dayofyear`
+ :meth:`pyspark.sql.functions.dayofmonth`
+
Examples
--------
Example 1: Extract the day of the week from a string column representing
dates
@@ -9442,6 +9541,12 @@ def dayofmonth(col: "ColumnOrName") -> Column:
col : :class:`~pyspark.sql.Column` or column name
target date/timestamp column to work on.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.dayofyear`
+ :meth:`pyspark.sql.functions.dayofweek`
+
Returns
-------
:class:`~pyspark.sql.Column`
@@ -9523,6 +9628,22 @@ def day(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
day of the month for given date/timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
+ :meth:`pyspark.sql.functions.dayname`
+ :meth:`pyspark.sql.functions.dayofyear`
+ :meth:`pyspark.sql.functions.dayofmonth`
+ :meth:`pyspark.sql.functions.dayofweek`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Examples
--------
Example 1: Extract the day of the month from a string column representing
dates
@@ -9602,6 +9723,12 @@ def dayofyear(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
day of the year for given date/timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.dayofyear`
+ :meth:`pyspark.sql.functions.dayofmonth`
+
Examples
--------
Example 1: Extract the day of the year from a string column representing
dates
@@ -9681,6 +9808,18 @@ def hour(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
hour part of the timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Examples
--------
Example 1: Extract the hours from a string column representing timestamp
@@ -9728,6 +9867,18 @@ def minute(col: "ColumnOrName") -> Column:
col : :class:`~pyspark.sql.Column` or column name
target date/timestamp column to work on.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.second`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Returns
-------
:class:`~pyspark.sql.Column`
@@ -9785,6 +9936,18 @@ def second(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
`seconds` part of the timestamp as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.extract`
+ :meth:`pyspark.sql.functions.datepart`
+ :meth:`pyspark.sql.functions.date_part`
+
Examples
--------
Example 1: Extract the seconds from a string column representing timestamp
@@ -9839,6 +10002,10 @@ def weekofyear(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
`week` of the year for given date as integer.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.weekday`
+
Examples
--------
Example 1: Extract the week of the year from a string column representing
dates
@@ -9915,6 +10082,11 @@ def weekday(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ...,
6 = Sunday).
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.weekofyear`
+
Examples
--------
Example 1: Extract the day of the week from a string column representing
dates
@@ -9991,6 +10163,11 @@ def monthname(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
the three-letter abbreviation of month name for date/timestamp (Jan,
Feb, Mar...)
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.dayname`
+
Examples
--------
Example 1: Extract the month name from a string column representing dates
@@ -10067,6 +10244,11 @@ def dayname(col: "ColumnOrName") -> Column:
:class:`~pyspark.sql.Column`
the three-letter abbreviation of day name for date/timestamp (Mon,
Tue, Wed...)
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.monthname`
+
Examples
--------
Example 1: Extract the weekday name from a string column representing dates
@@ -10147,6 +10329,13 @@ def extract(field: Column, source: "ColumnOrName") ->
Column:
See Also
--------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
:meth:`pyspark.sql.functions.datepart`
:meth:`pyspark.sql.functions.date_part`
@@ -10195,6 +10384,13 @@ def date_part(field: Column, source: "ColumnOrName")
-> Column:
See Also
--------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
:meth:`pyspark.sql.functions.datepart`
:meth:`pyspark.sql.functions.extract`
@@ -10243,6 +10439,13 @@ def datepart(field: Column, source: "ColumnOrName") ->
Column:
See Also
--------
+ :meth:`pyspark.sql.functions.year`
+ :meth:`pyspark.sql.functions.quarter`
+ :meth:`pyspark.sql.functions.month`
+ :meth:`pyspark.sql.functions.day`
+ :meth:`pyspark.sql.functions.hour`
+ :meth:`pyspark.sql.functions.minute`
+ :meth:`pyspark.sql.functions.second`
:meth:`pyspark.sql.functions.date_part`
:meth:`pyspark.sql.functions.extract`
@@ -10780,7 +10983,11 @@ def to_date(col: "ColumnOrName", format: Optional[str]
= None) -> Column:
See Also
--------
:meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
:meth:`pyspark.sql.functions.try_to_timestamp`
+ :meth:`pyspark.sql.functions.date_format`
Examples
--------
@@ -11018,7 +11225,12 @@ def to_timestamp(col: "ColumnOrName", format:
Optional[str] = None) -> Column:
See Also
--------
:meth:`pyspark.sql.functions.to_date`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
+ :meth:`pyspark.sql.functions.to_unix_timestamp`
:meth:`pyspark.sql.functions.try_to_timestamp`
+ :meth:`pyspark.sql.functions.date_format`
Examples
--------
@@ -11072,6 +11284,8 @@ def try_to_timestamp(col: "ColumnOrName", format:
Optional["ColumnOrName"] = Non
--------
:meth:`pyspark.sql.functions.to_date`
:meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
+ :meth:`pyspark.sql.functions.date_format`
Examples
--------
@@ -11646,6 +11860,9 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz:
Union[Column, str]) -> Col
See Also
--------
:meth:`pyspark.sql.functions.to_utc_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
Examples
--------
@@ -11712,6 +11929,9 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz:
Union[Column, str]) -> Colum
See Also
--------
:meth:`pyspark.sql.functions.from_utc_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
Examples
--------
@@ -12034,22 +12254,22 @@ def window(
Parameters
----------
- timeColumn : :class:`~pyspark.sql.Column`
+ timeColumn : :class:`~pyspark.sql.Column` or column name
The column or the expression to use as the timestamp for windowing by
time.
The time column must be of TimestampType or TimestampNTZType.
- windowDuration : str
+ windowDuration : literal string
A string specifying the width of the window, e.g. `10 minutes`,
`1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
valid duration identifiers. Note that the duration is a fixed length of
time, and does not vary over time according to a calendar. For example,
`1 day` always means 86,400,000 milliseconds, not a calendar day.
- slideDuration : str, optional
+ slideDuration : literal string, optional
A new window will be generated every `slideDuration`. Must be less than
or equal to the `windowDuration`. Check
`org.apache.spark.unsafe.types.CalendarInterval` for valid duration
identifiers. This duration is likewise absolute, and does not vary
according to a calendar.
- startTime : str, optional
+ startTime : literal string, optional
The offset with respect to 1970-01-01 00:00:00 UTC with which to start
window intervals. For example, in order to have hourly tumbling
windows that
start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15...
provide
@@ -12060,24 +12280,30 @@ def window(
:class:`~pyspark.sql.Column`
the column for computed results.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.window_time`
+ :meth:`pyspark.sql.functions.session_window`
+
Examples
--------
>>> import datetime
>>> from pyspark.sql import functions as sf
- >>> df = spark.createDataFrame(
- ... [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
- ... ).toDF("date", "val")
- >>> w = df.groupBy(sf.window("date", "5
seconds")).agg(sf.sum("val").alias("sum"))
- >>> w.select(
- ... w.window.start.cast("string").alias("start"),
- ... w.window.end.cast("string").alias("end"),
- ... "sum"
- ... ).show()
- +-------------------+-------------------+---+
- | start| end|sum|
- +-------------------+-------------------+---+
- |2016-03-11 09:00:05|2016-03-11 09:00:10| 1|
- +-------------------+-------------------+---+
+ >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7),
1)], ['dt', 'v'])
+ >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v'))
+ >>> df2.show(truncate=False)
+ +------------------------------------------+------+
+ |window |sum(v)|
+ +------------------------------------------+------+
+ |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1 |
+ +------------------------------------------+------+
+
+ >>> df2.printSchema()
+ root
+ |-- window: struct (nullable = false)
+ | |-- start: timestamp (nullable = true)
+ | |-- end: timestamp (nullable = true)
+ |-- sum(v): long (nullable = true)
"""
from pyspark.sql.classic.column import _to_java_column
@@ -12123,7 +12349,7 @@ def window_time(
Parameters
----------
- windowColumn : :class:`~pyspark.sql.Column`
+ windowColumn : :class:`~pyspark.sql.Column` or column name
The window column of a window aggregate records.
Returns
@@ -12131,29 +12357,29 @@ def window_time(
:class:`~pyspark.sql.Column`
the column for computed results.
- Notes
- -----
- Supports Spark Connect.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.window`
+ :meth:`pyspark.sql.functions.session_window`
Examples
--------
>>> import datetime
- >>> df = spark.createDataFrame(
- ... [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
- ... ).toDF("date", "val")
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7),
1)], ['dt', 'v'])
Group the data into 5 second time windows and aggregate as sum.
- >>> w = df.groupBy(window("date", "5
seconds")).agg(sum("val").alias("sum"))
+ >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v'))
Extract the window event time using the window_time function.
- >>> w.select(
- ... w.window.end.cast("string").alias("end"),
- ... window_time(w.window).cast("string").alias("window_time"),
- ... "sum"
- ... ).collect()
- [Row(end='2016-03-11 09:00:10', window_time='2016-03-11 09:00:09.999999',
sum=1)]
+ >>> df2.select('*', sf.window_time('window')).show(truncate=False)
+
+------------------------------------------+------+--------------------------+
+ |window |sum(v)|window_time(window)
|
+
+------------------------------------------+------+--------------------------+
+ |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1 |2016-03-11
09:00:09.999999|
+
+------------------------------------------+------+--------------------------+
"""
from pyspark.sql.classic.column import _to_java_column
@@ -12187,10 +12413,10 @@ def session_window(timeColumn: "ColumnOrName",
gapDuration: Union[Column, str])
Parameters
----------
- timeColumn : :class:`~pyspark.sql.Column` or str
+ timeColumn : :class:`~pyspark.sql.Column` or column name
The column name or column to use as the timestamp for windowing by
time.
The time column must be of TimestampType or TimestampNTZType.
- gapDuration : :class:`~pyspark.sql.Column` or str
+ gapDuration : :class:`~pyspark.sql.Column` or literal string
A Python string literal or column specifying the timeout of the
session. It could be
static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that
specifies gap
duration dynamically based on the input row.
@@ -12200,17 +12426,29 @@ def session_window(timeColumn: "ColumnOrName",
gapDuration: Union[Column, str])
:class:`~pyspark.sql.Column`
the column for computed results.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.window`
+ :meth:`pyspark.sql.functions.window_time`
+
Examples
--------
- >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date",
"val")
- >>> w = df.groupBy(session_window("date", "5
seconds")).agg(sum("val").alias("sum"))
- >>> w.select(w.session_window.start.cast("string").alias("start"),
- ... w.session_window.end.cast("string").alias("end"),
"sum").collect()
- [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
- >>> w = df.groupBy(session_window("date", lit("5
seconds"))).agg(sum("val").alias("sum"))
- >>> w.select(w.session_window.start.cast("string").alias("start"),
- ... w.session_window.end.cast("string").alias("end"),
"sum").collect()
- [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([('2016-03-11 09:00:07', 1)], ['dt', 'v'])
+ >>> df2 = df.groupBy(sf.session_window('dt', '5 seconds')).agg(sf.sum('v'))
+ >>> df2.show(truncate=False)
+ +------------------------------------------+------+
+ |session_window |sum(v)|
+ +------------------------------------------+------+
+ |{2016-03-11 09:00:07, 2016-03-11 09:00:12}|1 |
+ +------------------------------------------+------+
+
+ >>> df2.printSchema()
+ root
+ |-- session_window: struct (nullable = false)
+ | |-- start: timestamp (nullable = true)
+ | |-- end: timestamp (nullable = true)
+ |-- sum(v): long (nullable = true)
"""
from pyspark.sql.classic.column import _to_java_column
@@ -12240,37 +12478,57 @@ def to_unix_timestamp(
Parameters
----------
- timestamp : :class:`~pyspark.sql.Column` or str
+ timestamp : :class:`~pyspark.sql.Column` or column name
Input column or strings.
- format : :class:`~pyspark.sql.Column` or str, optional
+ format : :class:`~pyspark.sql.Column` or column name, optional
format to use to convert UNIX timestamp values.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.to_date`
+ :meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
+
Examples
--------
>>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
- Example 1: Using default format 'yyyy-MM-dd HH:mm:ss' parses the timestamp
string.
+ Example 1: Using default format to parse the timestamp string.
>>> import pyspark.sql.functions as sf
- >>> time_df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['dt'])
- >>> time_df.select(sf.to_unix_timestamp('dt').alias('unix_time')).show()
- +----------+
- | unix_time|
- +----------+
- |1428520332|
- +----------+
+ >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+ >>> df.select('*', sf.to_unix_timestamp('ts')).show()
+ +-------------------+------------------------------------------+
+ | ts|to_unix_timestamp(ts, yyyy-MM-dd HH:mm:ss)|
+ +-------------------+------------------------------------------+
+ |2015-04-08 12:12:12| 1428520332|
+ +-------------------+------------------------------------------+
- Example 2: Using user-specified format 'yyyy-MM-dd' parses the timestamp
string.
+ Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date
string.
>>> import pyspark.sql.functions as sf
- >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
- >>> time_df.select(
- ... sf.to_unix_timestamp('dt',
sf.lit('yyyy-MM-dd')).alias('unix_time')).show()
- +----------+
- | unix_time|
- +----------+
- |1428476400|
- +----------+
+ >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+ >>> df.select('*', sf.to_unix_timestamp(df.dt,
sf.lit('yyyy-MM-dd'))).show()
+ +----------+---------------------------------+
+ | dt|to_unix_timestamp(dt, yyyy-MM-dd)|
+ +----------+---------------------------------+
+ |2015-04-08| 1428476400|
+ +----------+---------------------------------+
+
+ Example 3: Using a format column to represent different formats.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame(
+ ... [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')],
['dt', 'fmt'])
+ >>> df.select('*', sf.to_unix_timestamp('dt', 'fmt')).show()
+ +----------+----------+--------------------------+
+ | dt| fmt|to_unix_timestamp(dt, fmt)|
+ +----------+----------+--------------------------+
+ |2015-04-08|yyyy-MM-dd| 1428476400|
+ |2025+01+09|yyyy+MM+dd| 1736409600|
+ +----------+----------+--------------------------+
>>> spark.conf.unset("spark.sql.session.timeZone")
"""
@@ -12286,29 +12544,63 @@ def to_timestamp_ltz(
format: Optional["ColumnOrName"] = None,
) -> Column:
"""
- Parses the `timestamp` with the `format` to a timestamp without time zone.
+ Parses the `timestamp` with the `format` to a timestamp with time zone.
Returns null with invalid input.
.. versionadded:: 3.5.0
Parameters
----------
- timestamp : :class:`~pyspark.sql.Column` or str
+ timestamp : :class:`~pyspark.sql.Column` or column name
Input column or strings.
- format : :class:`~pyspark.sql.Column` or str, optional
+ format : :class:`~pyspark.sql.Column` or column name, optional
format to use to convert type `TimestampType` timestamp values.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.to_date`
+ :meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ntz`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
+ :meth:`pyspark.sql.functions.to_unix_timestamp`
+ :meth:`pyspark.sql.functions.date_format`
+
Examples
--------
- >>> df = spark.createDataFrame([("2016-12-31",)], ["e"])
- >>> df.select(to_timestamp_ltz(df.e,
lit("yyyy-MM-dd")).alias('r')).collect()
- ... # doctest: +SKIP
- [Row(r=datetime.datetime(2016, 12, 31, 0, 0))]
+ Example 1: Using default format to parse the timestamp string.
- >>> df = spark.createDataFrame([("2016-12-31",)], ["e"])
- >>> df.select(to_timestamp_ltz(df.e).alias('r')).collect()
- ... # doctest: +SKIP
- [Row(r=datetime.datetime(2016, 12, 31, 0, 0))]
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+ >>> df.select('*', sf.to_timestamp_ltz('ts')).show()
+ +-------------------+--------------------+
+ | ts|to_timestamp_ltz(ts)|
+ +-------------------+--------------------+
+ |2015-04-08 12:12:12| 2015-04-08 12:12:12|
+ +-------------------+--------------------+
+
+ Example 2: Using user-specified format to parse the date string.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([('2016-12-31',)], ['dt'])
+ >>> df.select('*', sf.to_timestamp_ltz(df.dt, sf.lit('yyyy-MM-dd'))).show()
+ +----------+--------------------------------+
+ | dt|to_timestamp_ltz(dt, yyyy-MM-dd)|
+ +----------+--------------------------------+
+ |2016-12-31| 2016-12-31 00:00:00|
+ +----------+--------------------------------+
+
+ Example 3: Using a format column to represent different formats.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame(
+ ... [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')],
['dt', 'fmt'])
+ >>> df.select('*', sf.to_timestamp_ltz('dt', 'fmt')).show()
+ +----------+----------+-------------------------+
+ | dt| fmt|to_timestamp_ltz(dt, fmt)|
+ +----------+----------+-------------------------+
+ |2015-04-08|yyyy-MM-dd| 2015-04-08 00:00:00|
+ |2025+01+09|yyyy+MM+dd| 2025-01-09 00:00:00|
+ +----------+----------+-------------------------+
"""
if format is not None:
return _invoke_function_over_columns("to_timestamp_ltz", timestamp,
format)
@@ -12329,22 +12621,56 @@ def to_timestamp_ntz(
Parameters
----------
- timestamp : :class:`~pyspark.sql.Column` or str
+ timestamp : :class:`~pyspark.sql.Column` or column name
Input column or strings.
- format : :class:`~pyspark.sql.Column` or str, optional
+ format : :class:`~pyspark.sql.Column` or column name, optional
format to use to convert type `TimestampNTZType` timestamp values.
+ See Also
+ --------
+ :meth:`pyspark.sql.functions.to_date`
+ :meth:`pyspark.sql.functions.to_timestamp`
+ :meth:`pyspark.sql.functions.to_timestamp_ltz`
+ :meth:`pyspark.sql.functions.to_utc_timestamp`
+ :meth:`pyspark.sql.functions.to_unix_timestamp`
+ :meth:`pyspark.sql.functions.date_format`
+
Examples
--------
- >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
- >>> df.select(to_timestamp_ntz(df.e,
lit("yyyy-MM-dd")).alias('r')).collect()
- ... # doctest: +SKIP
- [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
+ Example 1: Using default format to parse the timestamp string.
- >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
- >>> df.select(to_timestamp_ntz(df.e).alias('r')).collect()
- ... # doctest: +SKIP
- [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+ >>> df.select('*', sf.to_timestamp_ntz('ts')).show()
+ +-------------------+--------------------+
+ | ts|to_timestamp_ntz(ts)|
+ +-------------------+--------------------+
+ |2015-04-08 12:12:12| 2015-04-08 12:12:12|
+ +-------------------+--------------------+
+
+ Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date
string.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([('2016-12-31',)], ['dt'])
+ >>> df.select('*', sf.to_timestamp_ntz(df.dt, sf.lit('yyyy-MM-dd'))).show()
+ +----------+--------------------------------+
+ | dt|to_timestamp_ntz(dt, yyyy-MM-dd)|
+ +----------+--------------------------------+
+ |2016-12-31| 2016-12-31 00:00:00|
+ +----------+--------------------------------+
+
+ Example 3: Using a format column to represent different formats.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame(
+ ... [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')],
['dt', 'fmt'])
+ >>> df.select('*', sf.to_timestamp_ntz('dt', 'fmt')).show()
+ +----------+----------+-------------------------+
+ | dt| fmt|to_timestamp_ntz(dt, fmt)|
+ +----------+----------+-------------------------+
+ |2015-04-08|yyyy-MM-dd| 2015-04-08 00:00:00|
+ |2025+01+09|yyyy+MM+dd| 2025-01-09 00:00:00|
+ +----------+----------+-------------------------+
"""
if format is not None:
return _invoke_function_over_columns("to_timestamp_ntz", timestamp,
format)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]