This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new a8fd878ec2c branch-3.0: [fix](mc)Fixed the issue that maxcompute catalog can only read part of the timestamp data #49600 (#49705) a8fd878ec2c is described below commit a8fd878ec2cd535e03d04d1d11a9bd6e677693f7 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Tue Apr 22 14:16:56 2025 +0800 branch-3.0: [fix](mc)Fixed the issue that maxcompute catalog can only read part of the timestamp data #49600 (#49705) Cherry-picked from #49600 Co-authored-by: daidai <changyu...@selectdb.com> --- .../doris/maxcompute/MaxComputeColumnValue.java | 63 ++++++--------------- .../maxcompute/source/MaxComputeScanNode.java | 2 +- .../maxcompute/test_max_compute_timestamp.out | Bin 3944 -> 5359 bytes .../maxcompute/test_max_compute_timestamp.groovy | 54 ++++++++++++++++++ 4 files changed, 71 insertions(+), 48 deletions(-) diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java index 5a49395780e..ec6c40a1376 100644 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java +++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java @@ -29,9 +29,9 @@ import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeStampMicroTZVector; +import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; @@ -39,7 +39,7 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder; +import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.log4j.Logger; @@ -237,48 +237,12 @@ public class MaxComputeColumnValue implements ColumnValue { if (timestampType.getUnit() == org.apache.arrow.vector.types.TimeUnit.MILLISECOND) { //DATETIME result = convertToLocalDateTime((TimeStampMilliTZVector) column, idx); } else if (timestampType.getTimezone() == null) { // TIMESTAMP_NTZ - NullableTimeStampNanoHolder valueHoder = new NullableTimeStampNanoHolder(); - ((TimeStampNanoVector) column).get(idx, valueHoder); - long timestampNanos = valueHoder.value; - - result = LocalDateTime.ofEpochSecond(timestampNanos / 1_000_000_000, - (int) (timestampNanos % 1_000_000_000), java.time.ZoneOffset.UTC); + NullableTimeStampMicroHolder valueHoder = new NullableTimeStampMicroHolder(); + ((TimeStampMicroVector) column).get(idx, valueHoder); + result = microsToInstant(valueHoder.value).atZone(java.time.ZoneOffset.UTC).toLocalDateTime(); } else { // TIMESTAMP - result = convertToLocalDateTime((TimeStampNanoTZVector) column, idx); + result = convertToLocalDateTime((TimeStampMicroTZVector) column, idx); } - - /* - timestampType.getUnit() - result = switch (timestampType.getUnit()) { - case MICROSECOND -> convertToLocalDateTime((TimeStampMicroTZVector) column, idx); - case SECOND -> convertToLocalDateTime((TimeStampSecTZVector) column, idx); - case MILLISECOND -> convertToLocalDateTime((TimeStampMilliTZVector) column, idx); - case NANOSECOND -> convertToLocalDateTime((TimeStampNanoTZVector) column, idx); - }; - - Because : - MaxCompute type => Doris Type - DATETIME => ScalarType.createDatetimeV2Type(3) - TIMESTAMP_NTZ => ScalarType.createDatetimeV2Type(6); - - and - TableBatchReadSession - .withArrowOptions ( - ArrowOptions.newBuilder() - .withDatetimeUnit(TimestampUnit.MILLI) - .withTimestampUnit(TimestampUnit.NANO) - .build() - ) - , - TIMESTAMP_NTZ is NTZ => column is TimeStampNanoVector - - So: - case SECOND -> convertToLocalDateTime((TimeStampSecTZVector) column, idx); - case MICROSECOND -> convertToLocalDateTime((TimeStampMicroTZVector) column, idx); - case NANOSECOND -> convertToLocalDateTime((TimeStampNanoTZVector) column, idx); - may never be used. - */ - return result; } @@ -333,9 +297,14 @@ public class MaxComputeColumnValue implements ColumnValue { return LocalDateTime.ofInstant(Instant.ofEpochMilli(timestampMillis), timeZone); } - public LocalDateTime convertToLocalDateTime(TimeStampNanoTZVector nanoTZVector, int index) { - long timestampNano = nanoTZVector.get(index); - return Instant.ofEpochSecond(timestampNano / 1_000_000_000, timestampNano % 1_000_000_000) - .atZone(timeZone).toLocalDateTime(); + public LocalDateTime convertToLocalDateTime(TimeStampMicroTZVector nanoTZVector, int index) { + long timestampMicro = nanoTZVector.get(index); + return microsToInstant(timestampMicro).atZone(timeZone).toLocalDateTime(); + } + + private static Instant microsToInstant(long timestampMicro) { + long epochSecond = Math.floorDiv(timestampMicro, 1_000_000); + long microAdjustment = timestampMicro - epochSecond * 1_000_000; + return Instant.ofEpochSecond(epochSecond, microAdjustment * 1000); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index 9235c237134..063aeea68eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -202,7 +202,7 @@ public class MaxComputeScanNode extends FileQueryScanNode { .withArrowOptions( ArrowOptions.newBuilder() .withDatetimeUnit(TimestampUnit.MILLI) - .withTimestampUnit(TimestampUnit.NANO) + .withTimestampUnit(TimestampUnit.MICRO) .build() ).buildBatchReadSession(); } diff --git a/regression-test/data/external_table_p2/maxcompute/test_max_compute_timestamp.out b/regression-test/data/external_table_p2/maxcompute/test_max_compute_timestamp.out index 125148aabc6..bcc9710d485 100644 Binary files a/regression-test/data/external_table_p2/maxcompute/test_max_compute_timestamp.out and b/regression-test/data/external_table_p2/maxcompute/test_max_compute_timestamp.out differ diff --git a/regression-test/suites/external_table_p2/maxcompute/test_max_compute_timestamp.groovy b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_timestamp.groovy index c7e8d7d035c..b6c1cc54d83 100644 --- a/regression-test/suites/external_table_p2/maxcompute/test_max_compute_timestamp.groovy +++ b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_timestamp.groovy @@ -31,6 +31,56 @@ INSERT INTO TABLE timestamp_tb1 VALUES(timestamp "2023-02-02 00:00:00.123456789" drop table if EXISTS timestamp_tb2; CREATE TABLE timestamp_tb2 (col1 TIMESTAMP,col2 TIMESTAMP_NTZ); INSERT INTO TABLE timestamp_tb2 VALUES(timestamp "2023-02-02 00:00:00.123456", timestamp_ntz "2023-02-02 00:00:00.123456" ); + + +drop table if EXISTS datetime_tb2; +CREATE TABLE datetime_tb2 (col1 datetime); +INSERT INTO TABLE datetime_tb2 VALUES + (datetime '0001-01-01 00:00:00'), + (datetime '1523-03-10 08:15:30'), + (datetime '1969-02-02 00:00:00'), + (datetime '1969-12-31 00:00:01'), + (datetime "2023-02-02 00:00:00"), + (datetime '3256-07-22 14:45:10'), + (datetime '4789-09-05 20:30:45'), + (datetime '6210-12-17 03:55:20'), + (datetime '7854-05-29 12:10:05'), + (datetime '9234-11-11 18:40:50'), + (datetime '9999-12-31 23:59:59'); + + + + +drop table if EXISTS timestamp_tb3; +CREATE TABLE timestamp_tb3 (col1 TIMESTAMP,col2 TIMESTAMP_NTZ); +INSERT INTO TABLE timestamp_tb3 VALUES + (timestamp '0001-01-01 00:00:00.000000', timestamp_ntz '0001-01-01 00:00:00.000000'), + (timestamp '1523-03-10 08:15:30.987654', timestamp_ntz '1523-03-10 08:15:30.987654'), + (timestamp '1969-02-02 00:00:00.543210', timestamp_ntz '1969-02-02 00:00:00.543210'), + (timestamp '1969-12-31 00:00:01.678901', timestamp_ntz '1969-12-31 00:00:01.678901'), + (timestamp '2023-02-02 00:00:00.123456', timestamp_ntz '2023-02-02 00:00:00.123456'), + (timestamp '3256-07-22 14:45:10.234567', timestamp_ntz '3256-07-22 14:45:10.234567'), + (timestamp '4789-09-05 20:30:45.876543', timestamp_ntz '4789-09-05 20:30:45.876543'), + (timestamp '6210-12-17 03:55:20.345678', timestamp_ntz '6210-12-17 03:55:20.345678'), + (timestamp '7854-05-29 12:10:05.456789', timestamp_ntz '7854-05-29 12:10:05.456789'), + (timestamp '9234-11-11 18:40:50.567890', timestamp_ntz '9234-11-11 18:40:50.567890'), + (timestamp '9999-12-31 23:59:59.999999', timestamp_ntz '9999-12-31 23:59:59.999999'); + + +drop table if EXISTS timestamp_tb4; +CREATE TABLE timestamp_tb4 (col1 TIMESTAMP,col2 TIMESTAMP_NTZ); +INSERT INTO TABLE timestamp_tb4 VALUES + (timestamp '0001-01-01 00:00:00.654321789', timestamp_ntz '0001-01-01 00:00:00.654321789'), + (timestamp '1523-03-10 08:15:30.987654123', timestamp_ntz '1523-03-10 08:15:30.987654123'), + (timestamp '1969-02-02 00:00:00.543210567', timestamp_ntz '1969-02-02 00:00:00.543210567'), + (timestamp '1969-12-31 00:00:01.678901234', timestamp_ntz '1969-12-31 00:00:01.678901234'), + (timestamp '2023-02-02 00:00:00.123456890', timestamp_ntz '2023-02-02 00:00:00.123456890'), + (timestamp '3256-07-22 14:45:10.234567345', timestamp_ntz '3256-07-22 14:45:10.234567345'), + (timestamp '4789-09-05 20:30:45.876543678', timestamp_ntz '4789-09-05 20:30:45.876543678'), + (timestamp '6210-12-17 03:55:20.345678901', timestamp_ntz '6210-12-17 03:55:20.345678901'), + (timestamp '7854-05-29 12:10:05.456789432', timestamp_ntz '7854-05-29 12:10:05.456789432'), + (timestamp '9234-11-11 18:40:50.567890765', timestamp_ntz '9234-11-11 18:40:50.567890765'), + (timestamp '9999-12-31 23:59:59.999999876', timestamp_ntz '9999-12-31 23:59:59.999999876'); */ suite("test_max_compute_timestamp", "p2,external,maxcompute,external_remote,external_remote_maxcompute") { @@ -57,6 +107,10 @@ suite("test_max_compute_timestamp", "p2,external,maxcompute,external_remote,exte sql """ switch ${mc_catalog_name} """ sql """ use ${mc_db}""" + qt_0_1 """ select * from datetime_tb2 order by col1""" + qt_0_2 """ select * from timestamp_tb3 order by col1 """ + qt_0_3 """ select * from timestamp_tb4 order by col1 """ + sql """ set time_zone = "Asia/Shanghai" """ qt_1_1 """ select * from datetime_tb1;""" qt_1_2 """ select * from datetime_tb1 where col1 > "2023-02-02 00:00:00.000";""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org