This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a0fb5f0fff80 [SPARK-51564] TIME parsing in the 12hr clock format
a0fb5f0fff80 is described below
commit a0fb5f0fff807fdf3b9ed4827756531e6e362ba1
Author: Uros Bojanic <[email protected]>
AuthorDate: Sat Jul 19 16:59:07 2025 +0200
[SPARK-51564] TIME parsing in the 12hr clock format
### What changes were proposed in this pull request?
Update STRING to TIME parsing logic to support 12hr time format.
### Why are the changes needed?
Extending the support for TIME type to 12hr time (AM/PM).
### Does this PR introduce _any_ user-facing change?
Yes, TIME can now be represented using 12hr formats.
### How was this patch tested?
Updated relevant unit tests for STRING to TIME parsing logic.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51549 from uros-db/12hr_format.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../sql/catalyst/util/SparkDateTimeUtils.scala | 54 +++++++++++-
.../sql/catalyst/util/DateTimeUtilsSuite.scala | 98 +++++++++++++++++++++-
2 files changed, 146 insertions(+), 6 deletions(-)
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index c6846ef480ba..9ce9d14ed316 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -717,14 +717,62 @@ trait SparkDateTimeUtils {
*/
def stringToTime(s: UTF8String): Option[Long] = {
try {
- val (segments, zoneIdOpt, justTime) = parseTimestampString(s)
+ // Check for the AM/PM suffix.
+ val trimmed = s.trimRight
+ val numChars = trimmed.numChars()
+ var (isAM, isPM, hasSuffix) = (false, false, false)
+ if (numChars > 2) {
+ val lc = trimmed.getChar(numChars - 1)
+ if (lc == 'M' || lc == 'm') {
+ val slc = trimmed.getChar(numChars - 2)
+ isAM = slc == 'A' || slc == 'a'
+ isPM = slc == 'P' || slc == 'p'
+ hasSuffix = isAM || isPM
+ }
+ }
+ val timeString = if (hasSuffix) {
+ trimmed.substring(0, numChars - 2)
+ } else {
+ trimmed
+ }
+
+ val (segments, zoneIdOpt, justTime) = parseTimestampString(timeString)
+
// If the input string can't be parsed as a time, or it contains not only
// the time part or has time zone information, return None.
if (segments.isEmpty || !justTime || zoneIdOpt.isDefined) {
return None
}
- val nanoseconds = MICROSECONDS.toNanos(segments(6))
- val localTime = LocalTime.of(segments(3), segments(4), segments(5),
nanoseconds.toInt)
+
+ // Unpack the segments.
+ var (hr, min, sec, ms) = (segments(3), segments(4), segments(5),
segments(6))
+
+ // Handle AM/PM conversion in separate cases.
+ if (!hasSuffix) {
+ // For 24-hour format, validate hour range: 0-23.
+ if (hr < 0 || hr > 23) {
+ return None
+ }
+ } else {
+ // For 12-hour format, validate hour range: 1-12.
+ if (hr < 1 || hr > 12) {
+ return None
+ }
+ // For 12-hour format, convert to 24-hour format.
+ if (isAM) {
+ // AM: 12:xx:xx becomes 00:xx:xx, 1-11:xx:xx stays the same.
+ if (hr == 12) {
+ hr = 0
+ }
+ } else {
+ // PM: 12:xx:xx stays 12:xx:xx, 1-11:xx:xx becomes 13-23:xx:xx.
+ if (hr != 12) {
+ hr += 12
+ }
+ }
+ }
+
+ val localTime = LocalTime.of(hr, min, sec,
MICROSECONDS.toNanos(ms).toInt)
Some(localTimeToNanos(localTime))
} catch {
case NonFatal(_) => None
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 2af311107027..a13a7b50223d 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -1138,6 +1138,9 @@ class DateTimeUtilsSuite extends SparkFunSuite with
Matchers with SQLHelper {
assert(stringToTime(UTF8String.fromString(str)) === expected)
}
+ // Existing 24-hour format tests.
+
+ // Various valid 24-hour format tests.
checkStringToTime("00:00", Some(localTime()))
checkStringToTime("00:00:00", Some(localTime()))
checkStringToTime("00:00:00.1", Some(localTime(micros = 100000)))
@@ -1153,9 +1156,98 @@ class DateTimeUtilsSuite extends SparkFunSuite with
Matchers with SQLHelper {
checkStringToTime("1:2:3.0", Some(localTime(hour = 1, minute = 2, sec =
3)))
checkStringToTime("T1:02:3.04", Some(localTime(hour = 1, minute = 2, sec =
3, micros = 40000)))
- // Negative tests
- Seq("2025-03-09 00:00:00", "00", "00:01:02 UTC").foreach { invalidTime =>
- checkStringToTime(invalidTime, None)
+ checkStringToTime("00:00 ", Some(localTime()))
+ checkStringToTime(" 00:00", Some(localTime()))
+ checkStringToTime(" 00:00 ", Some(localTime()))
+ checkStringToTime("1:2:3.0 ", Some(localTime(hour = 1, minute = 2, sec =
3)))
+ checkStringToTime(" 1:2:3.0", Some(localTime(hour = 1, minute = 2, sec =
3)))
+ checkStringToTime(" 1:2:3.0 ", Some(localTime(hour = 1, minute = 2, sec =
3)))
+
+ // Invalid 24-hour format tests (out of range).
+ Seq("24:00:00", "25:00:00", "-1:00:00", "23:60:00", "23:00:60",
"99:99:99").foreach {
+ invalidTime =>
+ checkStringToTime(invalidTime, None)
+ }
+
+ // 12-hour format tests (with AM/PM).
+
+ // Midnight hour [12 AM, 1 AM).
+ checkStringToTime("12:00:00 AM",
+ Some(localTime(0, 0, 0, 0)))
+ checkStringToTime("12:30:45 AM",
+ Some(localTime(0, 30, 45, 0)))
+ checkStringToTime("12:59:59.999 AM",
+ Some(localTime(0, 59, 59, 999000)))
+ checkStringToTime("12:59:59.999999 AM",
+ Some(localTime(0, 59, 59, 999999)))
+
+ // Morning hours [1AM, 12PM).
+ checkStringToTime("1:00:00 AM",
+ Some(localTime(hour = 1, minute = 0, sec = 0)))
+ checkStringToTime("11:59:59 AM",
+ Some(localTime(hour = 11, minute = 59, sec = 59)))
+ checkStringToTime("5:30:15.123456 AM",
+ Some(localTime(hour = 5, minute = 30, sec = 15, micros = 123456)))
+
+ // Noon hour [12 PM, 1PM).
+ checkStringToTime("12:00:00 PM",
+ Some(localTime(hour = 12, minute = 0, sec = 0)))
+ checkStringToTime("12:30:45 PM",
+ Some(localTime(hour = 12, minute = 30, sec = 45)))
+ checkStringToTime("12:59:59.999 PM",
+ Some(localTime(hour = 12, minute = 59, sec = 59, micros = 999000)))
+ checkStringToTime("12:59:59.999999 PM",
+ Some(localTime(hour = 12, minute = 59, sec = 59, micros = 999999)))
+
+ // Afternoon hours [1PM, 12AM).
+ checkStringToTime("1:00:00 PM",
+ Some(localTime(hour = 13, minute = 0, sec = 0)))
+ checkStringToTime("11:59:59 PM",
+ Some(localTime(hour = 23, minute = 59, sec = 59)))
+ checkStringToTime("6:45:30.987654 PM",
+ Some(localTime(hour = 18, minute = 45, sec = 30, micros = 987654)))
+ checkStringToTime("11:59:59.999 PM",
+ Some(localTime(hour = 23, minute = 59, sec = 59, micros = 999000)))
+ checkStringToTime("11:59:59.999999 PM",
+ Some(localTime(hour = 23, minute = 59, sec = 59, micros = 999999)))
+
+ // Test without space before AM/PM.
+ checkStringToTime("12:00:00AM", Some(localTime(hour = 0, minute = 0, sec =
0)))
+ checkStringToTime("12:00:00PM", Some(localTime(hour = 12, minute = 0, sec
= 0)))
+ checkStringToTime("3:30:45AM", Some(localTime(hour = 3, minute = 30, sec =
45)))
+ checkStringToTime("9:15:20PM", Some(localTime(hour = 21, minute = 15, sec
= 20)))
+
+ // Test case insensitive.
+ checkStringToTime("10:30:00Am ", Some(localTime(hour = 10, minute = 30,
sec = 0)))
+ checkStringToTime("10:30:00 am", Some(localTime(hour = 10, minute = 30,
sec = 0)))
+ checkStringToTime("2:45:30 Pm", Some(localTime(hour = 14, minute = 45, sec
= 30)))
+ checkStringToTime("2:45:30pm ", Some(localTime(hour = 14, minute = 45, sec
= 30)))
+ checkStringToTime("7:00:00aM", Some(localTime(hour = 7, minute = 0, sec =
0)))
+ checkStringToTime("8:00:00Pm", Some(localTime(hour = 20, minute = 0, sec =
0)))
+
+ // Invalid 12-hour format tests (out of range).
+ Seq(
+ "0:00:00 AM",
+ "0:00:00 PM",
+ "13:00:00 AM",
+ "13:00:00 PM",
+ "24:00:00 AM",
+ "24:00:00 PM",
+ "12:60:00 AM",
+ "12:60:00 PM",
+ "12:00:60 AM",
+ "12:00:60 PM",
+ "99:99:99 AM",
+ "99:99:99 PM"
+ ).foreach {
+ invalidTime =>
+ checkStringToTime(invalidTime, None)
+ }
+
+ // Negative tests (invalid time string).
+ Seq("2025-03-09 00:00:00", "00", "00:01:02 UTC", "XYZ", "ABCD", " ",
"").foreach {
+ invalidTime =>
+ checkStringToTime(invalidTime, None)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]