This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a0fb5f0fff80 [SPARK-51564] TIME parsing in the 12hr clock format a0fb5f0fff80 is described below commit a0fb5f0fff807fdf3b9ed4827756531e6e362ba1 Author: Uros Bojanic <uros.boja...@databricks.com> AuthorDate: Sat Jul 19 16:59:07 2025 +0200 [SPARK-51564] TIME parsing in the 12hr clock format ### What changes were proposed in this pull request? Update STRING to TIME parsing logic to support 12hr time format. ### Why are the changes needed? Extending the support for TIME type to 12hr time (AM/PM). ### Does this PR introduce _any_ user-facing change? Yes, TIME can now be represented using 12hr formats. ### How was this patch tested? Updated relevant unit tests for STRING to TIME parsing logic. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51549 from uros-db/12hr_format. Authored-by: Uros Bojanic <uros.boja...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql/catalyst/util/SparkDateTimeUtils.scala | 54 +++++++++++- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 98 +++++++++++++++++++++- 2 files changed, 146 insertions(+), 6 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index c6846ef480ba..9ce9d14ed316 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -717,14 +717,62 @@ trait SparkDateTimeUtils { */ def stringToTime(s: UTF8String): Option[Long] = { try { - val (segments, zoneIdOpt, justTime) = parseTimestampString(s) + // Check for the AM/PM suffix. + val trimmed = s.trimRight + val numChars = trimmed.numChars() + var (isAM, isPM, hasSuffix) = (false, false, false) + if (numChars > 2) { + val lc = trimmed.getChar(numChars - 1) + if (lc == 'M' || lc == 'm') { + val slc = trimmed.getChar(numChars - 2) + isAM = slc == 'A' || slc == 'a' + isPM = slc == 'P' || slc == 'p' + hasSuffix = isAM || isPM + } + } + val timeString = if (hasSuffix) { + trimmed.substring(0, numChars - 2) + } else { + trimmed + } + + val (segments, zoneIdOpt, justTime) = parseTimestampString(timeString) + // If the input string can't be parsed as a time, or it contains not only // the time part or has time zone information, return None. if (segments.isEmpty || !justTime || zoneIdOpt.isDefined) { return None } - val nanoseconds = MICROSECONDS.toNanos(segments(6)) - val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt) + + // Unpack the segments. + var (hr, min, sec, ms) = (segments(3), segments(4), segments(5), segments(6)) + + // Handle AM/PM conversion in separate cases. + if (!hasSuffix) { + // For 24-hour format, validate hour range: 0-23. + if (hr < 0 || hr > 23) { + return None + } + } else { + // For 12-hour format, validate hour range: 1-12. + if (hr < 1 || hr > 12) { + return None + } + // For 12-hour format, convert to 24-hour format. + if (isAM) { + // AM: 12:xx:xx becomes 00:xx:xx, 1-11:xx:xx stays the same. + if (hr == 12) { + hr = 0 + } + } else { + // PM: 12:xx:xx stays 12:xx:xx, 1-11:xx:xx becomes 13-23:xx:xx. + if (hr != 12) { + hr += 12 + } + } + } + + val localTime = LocalTime.of(hr, min, sec, MICROSECONDS.toNanos(ms).toInt) Some(localTimeToNanos(localTime)) } catch { case NonFatal(_) => None diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 2af311107027..a13a7b50223d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -1138,6 +1138,9 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(stringToTime(UTF8String.fromString(str)) === expected) } + // Existing 24-hour format tests. + + // Various valid 24-hour format tests. checkStringToTime("00:00", Some(localTime())) checkStringToTime("00:00:00", Some(localTime())) checkStringToTime("00:00:00.1", Some(localTime(micros = 100000))) @@ -1153,9 +1156,98 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { checkStringToTime("1:2:3.0", Some(localTime(hour = 1, minute = 2, sec = 3))) checkStringToTime("T1:02:3.04", Some(localTime(hour = 1, minute = 2, sec = 3, micros = 40000))) - // Negative tests - Seq("2025-03-09 00:00:00", "00", "00:01:02 UTC").foreach { invalidTime => - checkStringToTime(invalidTime, None) + checkStringToTime("00:00 ", Some(localTime())) + checkStringToTime(" 00:00", Some(localTime())) + checkStringToTime(" 00:00 ", Some(localTime())) + checkStringToTime("1:2:3.0 ", Some(localTime(hour = 1, minute = 2, sec = 3))) + checkStringToTime(" 1:2:3.0", Some(localTime(hour = 1, minute = 2, sec = 3))) + checkStringToTime(" 1:2:3.0 ", Some(localTime(hour = 1, minute = 2, sec = 3))) + + // Invalid 24-hour format tests (out of range). + Seq("24:00:00", "25:00:00", "-1:00:00", "23:60:00", "23:00:60", "99:99:99").foreach { + invalidTime => + checkStringToTime(invalidTime, None) + } + + // 12-hour format tests (with AM/PM). + + // Midnight hour [12 AM, 1 AM). + checkStringToTime("12:00:00 AM", + Some(localTime(0, 0, 0, 0))) + checkStringToTime("12:30:45 AM", + Some(localTime(0, 30, 45, 0))) + checkStringToTime("12:59:59.999 AM", + Some(localTime(0, 59, 59, 999000))) + checkStringToTime("12:59:59.999999 AM", + Some(localTime(0, 59, 59, 999999))) + + // Morning hours [1AM, 12PM). + checkStringToTime("1:00:00 AM", + Some(localTime(hour = 1, minute = 0, sec = 0))) + checkStringToTime("11:59:59 AM", + Some(localTime(hour = 11, minute = 59, sec = 59))) + checkStringToTime("5:30:15.123456 AM", + Some(localTime(hour = 5, minute = 30, sec = 15, micros = 123456))) + + // Noon hour [12 PM, 1PM). + checkStringToTime("12:00:00 PM", + Some(localTime(hour = 12, minute = 0, sec = 0))) + checkStringToTime("12:30:45 PM", + Some(localTime(hour = 12, minute = 30, sec = 45))) + checkStringToTime("12:59:59.999 PM", + Some(localTime(hour = 12, minute = 59, sec = 59, micros = 999000))) + checkStringToTime("12:59:59.999999 PM", + Some(localTime(hour = 12, minute = 59, sec = 59, micros = 999999))) + + // Afternoon hours [1PM, 12AM). + checkStringToTime("1:00:00 PM", + Some(localTime(hour = 13, minute = 0, sec = 0))) + checkStringToTime("11:59:59 PM", + Some(localTime(hour = 23, minute = 59, sec = 59))) + checkStringToTime("6:45:30.987654 PM", + Some(localTime(hour = 18, minute = 45, sec = 30, micros = 987654))) + checkStringToTime("11:59:59.999 PM", + Some(localTime(hour = 23, minute = 59, sec = 59, micros = 999000))) + checkStringToTime("11:59:59.999999 PM", + Some(localTime(hour = 23, minute = 59, sec = 59, micros = 999999))) + + // Test without space before AM/PM. + checkStringToTime("12:00:00AM", Some(localTime(hour = 0, minute = 0, sec = 0))) + checkStringToTime("12:00:00PM", Some(localTime(hour = 12, minute = 0, sec = 0))) + checkStringToTime("3:30:45AM", Some(localTime(hour = 3, minute = 30, sec = 45))) + checkStringToTime("9:15:20PM", Some(localTime(hour = 21, minute = 15, sec = 20))) + + // Test case insensitive. + checkStringToTime("10:30:00Am ", Some(localTime(hour = 10, minute = 30, sec = 0))) + checkStringToTime("10:30:00 am", Some(localTime(hour = 10, minute = 30, sec = 0))) + checkStringToTime("2:45:30 Pm", Some(localTime(hour = 14, minute = 45, sec = 30))) + checkStringToTime("2:45:30pm ", Some(localTime(hour = 14, minute = 45, sec = 30))) + checkStringToTime("7:00:00aM", Some(localTime(hour = 7, minute = 0, sec = 0))) + checkStringToTime("8:00:00Pm", Some(localTime(hour = 20, minute = 0, sec = 0))) + + // Invalid 12-hour format tests (out of range). + Seq( + "0:00:00 AM", + "0:00:00 PM", + "13:00:00 AM", + "13:00:00 PM", + "24:00:00 AM", + "24:00:00 PM", + "12:60:00 AM", + "12:60:00 PM", + "12:00:60 AM", + "12:00:60 PM", + "99:99:99 AM", + "99:99:99 PM" + ).foreach { + invalidTime => + checkStringToTime(invalidTime, None) + } + + // Negative tests (invalid time string). + Seq("2025-03-09 00:00:00", "00", "00:01:02 UTC", "XYZ", "ABCD", " ", "").foreach { + invalidTime => + checkStringToTime(invalidTime, None) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org