This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 9735291f02 Refector DateTime field specs to reduce overhead (#8960) 9735291f02 is described below commit 9735291f025cb470c28baa085030ceaa02b761e3 Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Thu Jun 30 06:21:01 2022 -0700 Refector DateTime field specs to reduce overhead (#8960) - Integrate the validation logic into the constructor to avoid the overhead of extra validation and regex match - Remove the unnecessary format reconstruction - Cache `DateTimeFormatSpec` and `DateTimeGranularitySpec` within `DateTimeFieldSpec` to avoid parsing the spec string multiple times - Use the new introduced pattern format in #8632 - This can improve the performance of the `dateTimeConvert` function --- .../routing/segmentpruner/TimeSegmentPruner.java | 2 +- .../routing/timeboundary/TimeBoundaryManager.java | 4 +- .../broker/broker/HelixBrokerStarterTest.java | 7 +- .../routing/segmentpruner/SegmentPrunerTest.java | 3 +- .../pinot/common/data/DateTimeFormatSpecTest.java | 87 ++---- .../apache/pinot/common/data/FieldSpecTest.java | 2 +- pinot-common/src/test/resources/schemaTest.schema | 2 +- .../java/org/apache/pinot/compat/StreamOp.java | 3 +- .../controller/util/AutoAddInvertedIndex.java | 3 +- .../realtime/HLRealtimeSegmentDataManager.java | 3 +- .../processing/timehandler/EpochTimeHandler.java | 2 +- .../apache/pinot/core/util/SchemaUtilsTest.java | 77 +++-- .../batch/common/SegmentGenerationTaskRunner.java | 2 +- .../hadoop/job/mappers/SegmentCreationMapper.java | 2 +- .../job/mappers/SegmentPreprocessingMapper.java | 37 ++- .../job/preprocess/DataPreprocessingHelper.java | 2 +- .../spark/jobs/SparkSegmentCreationFunction.java | 11 +- .../pinot/plugin/inputformat/avro/AvroUtils.java | 10 +- .../recordtransformer/NullValueTransformer.java | 5 +- .../pinot/segment/local/utils/IngestionUtils.java | 2 +- .../pinot/segment/local/utils/SchemaUtils.java | 41 +-- .../local/segment/readers/PinotSegmentUtil.java | 4 +- .../spi/creator/SegmentGeneratorConfig.java | 2 +- .../creator/name/SegmentNameGeneratorFactory.java | 2 +- .../NormalizedDateSegmentNameGeneratorTest.java | 32 +- .../apache/pinot/spi/data/DateTimeFieldSpec.java | 32 +- .../pinot/spi/data/DateTimeFormatPatternSpec.java | 120 +++---- .../apache/pinot/spi/data/DateTimeFormatSpec.java | 346 +++++++++++---------- .../pinot/spi/data/DateTimeFormatUnitSpec.java | 35 +-- .../pinot/spi/data/DateTimeGranularitySpec.java | 98 +++--- .../java/org/apache/pinot/spi/data/Schema.java | 28 +- .../java/org/apache/pinot/spi/utils/JsonUtils.java | 10 +- .../org/apache/pinot/spi/utils/StringUtil.java | 32 ++ .../spi/data/DateTimeFormatPatternSpecTest.java | 72 ++++- .../pinot/spi/data/DateTimeFormatSpecTest.java | 73 ++++- 35 files changed, 651 insertions(+), 542 deletions(-) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java index ff9df4e004..0b57c38422 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java @@ -85,7 +85,7 @@ public class TimeSegmentPruner implements SegmentPruner { DateTimeFieldSpec dateTimeSpec = schema.getSpecForTimeColumn(_timeColumn); Preconditions.checkNotNull(dateTimeSpec, "Field spec must be specified in schema for time column: %s of table: %s", _timeColumn, _tableNameWithType); - _timeFormatSpec = new DateTimeFormatSpec(dateTimeSpec.getFormat()); + _timeFormatSpec = dateTimeSpec.getFormatSpec(); } @Override diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java index e781443c1c..10ae73b18a 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java @@ -78,7 +78,7 @@ public class TimeBoundaryManager { DateTimeFieldSpec dateTimeSpec = schema.getSpecForTimeColumn(_timeColumn); Preconditions.checkNotNull(dateTimeSpec, "Field spec must be specified in schema for time column: %s of table: %s", _timeColumn, _offlineTableName); - _timeFormatSpec = new DateTimeFormatSpec(dateTimeSpec.getFormat()); + _timeFormatSpec = dateTimeSpec.getFormatSpec(); Preconditions.checkNotNull(_timeFormatSpec.getColumnUnit(), "Time unit must be configured in the field spec for time column: %s of table: %s", _timeColumn, _offlineTableName); @@ -91,7 +91,7 @@ public class TimeBoundaryManager { _timeOffsetMs = isHourlyTable ? TimeUnit.HOURS.toMillis(1) : TimeUnit.DAYS.toMillis(1); LOGGER.info("Constructed TimeBoundaryManager with timeColumn: {}, timeFormat: {}, isHourlyTable: {} for table: {}", - _timeColumn, _timeFormatSpec.getFormat(), isHourlyTable, _offlineTableName); + _timeColumn, dateTimeSpec.getFormat(), isHourlyTable, _offlineTableName); } /** diff --git a/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java b/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java index ca5cc76de7..ddab2f7bf9 100644 --- a/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java +++ b/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java @@ -35,9 +35,6 @@ import org.apache.pinot.controller.utils.SegmentMetadataMockUtils; import org.apache.pinot.core.routing.RoutingTable; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; -import org.apache.pinot.spi.data.DateTimeFieldSpec; -import org.apache.pinot.spi.data.DateTimeFormatSpec; -import org.apache.pinot.spi.data.DateTimeGranularitySpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.env.PinotConfiguration; @@ -90,9 +87,7 @@ public class HelixBrokerStarterTest extends ControllerTest { addFakeServerInstancesToAutoJoinHelixCluster(NUM_SERVERS, true); Schema schema = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME) - .addDateTime(TIME_COLUMN_NAME, FieldSpec.DataType.INT, new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), - DateTimeFieldSpec.TimeFormat.EPOCH.toString()).getFormat(), - new DateTimeGranularitySpec(1, TimeUnit.DAYS).getGranularity()).build(); + .addDateTime(TIME_COLUMN_NAME, FieldSpec.DataType.INT, "EPOCH|DAYS", "1:DAYS").build(); _helixResourceManager.addSchema(schema, true); TableConfig offlineTableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).setTimeColumnName(TIME_COLUMN_NAME) diff --git a/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java b/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java index 53ef6fc6e3..70d41313bc 100644 --- a/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java +++ b/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java @@ -543,8 +543,7 @@ public class SegmentPrunerTest extends ControllerTest { TimeSegmentPruner segmentPruner = new TimeSegmentPruner(tableConfig, _propertyStore); Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, RAW_TABLE_NAME); - DateTimeFormatSpec dateTimeFormatSpec = - new DateTimeFormatSpec(schema.getSpecForTimeColumn(TIME_COLUMN).getFormat()); + DateTimeFormatSpec dateTimeFormatSpec = schema.getSpecForTimeColumn(TIME_COLUMN).getFormatSpec(); Set<String> onlineSegments = new HashSet<>(); String segment0 = "segment0"; diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java index 5f33b0227a..3a1ad600ce 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java @@ -249,44 +249,45 @@ public class DateTimeFormatSpecTest { }); entries.add(new Object[]{ - "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, + "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.UTC }); entries.add(new Object[]{ - "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, - "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST")) + "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.MILLISECONDS, + DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone( + TimeZone.getTimeZone("IST")) }); entries.add(new Object[]{ - "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.DAYS, + "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST")) }); entries.add(new Object[]{ - "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz ( IST ) ", 1, TimeUnit.DAYS, + "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz ( IST ) ", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST")) }); entries.add(new Object[]{ - "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH", 1, TimeUnit.HOURS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, - "yyyyMMdd HH", DateTimeZone.UTC + "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH", 1, TimeUnit.MILLISECONDS, + DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC }); entries.add(new Object[]{ - "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH tz(dummy)", 1, TimeUnit.HOURS, + "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH tz(dummy)", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC }); entries.add(new Object[]{ - "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a", 1, TimeUnit.HOURS, + "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.UTC }); entries.add(new Object[]{ - "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a tz(Asia/Tokyo)", 1, TimeUnit.HOURS, + "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a tz(Asia/Tokyo)", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo")) }); @@ -304,97 +305,47 @@ public class DateTimeFormatSpecTest { }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, + "SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.UTC }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, + "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST")) }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS, + "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST")) }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS, + "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST")) }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, + "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|yyyyMMdd HH|dummy", 1, TimeUnit.DAYS, + "SIMPLE_DATE_FORMAT|yyyyMMdd HH|dummy", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1, TimeUnit.DAYS, + "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.UTC }); entries.add(new Object[]{ - "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|Asia/Tokyo", 1, TimeUnit.DAYS, + "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|Asia/Tokyo", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo")) }); return entries.toArray(new Object[entries.size()][]); } - - // Test construct format given its components - @Test(dataProvider = "testConstructFormatDataProvider") - public void testConstructFormat(int columnSize, TimeUnit columnUnit, String columnTimeFormat, String pattern, - DateTimeFormatSpec formatExpected1, DateTimeFormatSpec formatExpected2) { - DateTimeFormatSpec formatActual1 = null; - try { - formatActual1 = new DateTimeFormatSpec(columnSize, columnUnit.toString(), columnTimeFormat); - } catch (Exception e) { - // invalid arguments - } - Assert.assertEquals(formatActual1, formatExpected1); - - DateTimeFormatSpec formatActual2 = null; - try { - formatActual2 = new DateTimeFormatSpec(columnSize, columnUnit.toString(), columnTimeFormat, pattern); - } catch (Exception e) { - // invalid arguments - } - Assert.assertEquals(formatActual2, formatExpected2); - } - - @DataProvider(name = "testConstructFormatDataProvider") - public Object[][] provideTestConstructFormatData() { - - List<Object[]> entries = new ArrayList<>(); - - entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", null, new DateTimeFormatSpec("1:HOURS:EPOCH"), null}); - entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", "yyyyMMdd", new DateTimeFormatSpec("1:HOURS:EPOCH"), null}); - entries.add(new Object[]{5, TimeUnit.MINUTES, "EPOCH", null, new DateTimeFormatSpec("5:MINUTES:EPOCH"), null}); - entries.add(new Object[]{0, TimeUnit.HOURS, "EPOCH", null, null, null}); - entries.add(new Object[]{1, null, "EPOCH", null, null, null}); - entries.add(new Object[]{1, TimeUnit.HOURS, null, null, null, null}); - entries.add(new Object[]{1, TimeUnit.HOURS, "DUMMY", "yyyyMMdd", null, null}); - entries.add(new Object[]{ - 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMdd", null, - new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd") - }); - entries.add(new Object[]{ - 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMdd tz(America/Los_Angeles)", null, - new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(America/Los_Angeles)") - }); - entries.add(new Object[]{1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", null, null, null}); - entries.add(new Object[]{-1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMDD", null, null}); - entries.add(new Object[]{ - 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "M/d/yyyy h:mm:ss a", null, - new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a") - }); - return entries.toArray(new Object[entries.size()][]); - } } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java index 1d2c92a8a8..0623068b35 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java @@ -245,7 +245,7 @@ public class FieldSpecTest { boolean exceptionActual = false; try { dateTimeFieldActual = new DateTimeFieldSpec(name, dataType, format, granularity); - } catch (IllegalStateException e) { + } catch (IllegalArgumentException e) { exceptionActual = true; } Assert.assertEquals(exceptionActual, exceptionExpected); diff --git a/pinot-common/src/test/resources/schemaTest.schema b/pinot-common/src/test/resources/schemaTest.schema index d6c5704b2b..443b10e2d6 100644 --- a/pinot-common/src/test/resources/schemaTest.schema +++ b/pinot-common/src/test/resources/schemaTest.schema @@ -93,7 +93,7 @@ "name": "dateTime3", "dataType": "TIMESTAMP", "format": "1:MILLISECONDS:TIMESTAMP", - "granularity": "1:SECOND" + "granularity": "1:SECONDS" } ], "schemaName": "schemaTest" diff --git a/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java b/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java index a50487e05b..ac69226211 100644 --- a/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java +++ b/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java @@ -226,8 +226,7 @@ public class StreamOp extends BaseOp { .sendGetRequest(ControllerRequestURLBuilder.baseUrl(ClusterDescriptor.getInstance().getControllerUrl()) .forSchemaGet(schemaName)); Schema schema = JsonUtils.stringToObject(schemaString, Schema.class); - DateTimeFormatSpec dateTimeFormatSpec = - new DateTimeFormatSpec(schema.getSpecForTimeColumn(timeColumn).getFormat()); + DateTimeFormatSpec dateTimeFormatSpec = schema.getSpecForTimeColumn(timeColumn).getFormatSpec(); try (RecordReader csvRecordReader = RecordReaderFactory .getRecordReader(FileFormat.CSV, localReplacedCSVFile, columnNames, recordReaderConfig)) { diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java b/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java index 89d06479f8..3d57ceb9fb 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java @@ -41,7 +41,6 @@ import org.apache.pinot.common.metadata.ZKMetadataProvider; import org.apache.pinot.spi.config.table.IndexingConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; -import org.apache.pinot.spi.data.DateTimeFormatSpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.utils.JsonUtils; @@ -243,7 +242,7 @@ public class AutoAddInvertedIndex { tableNameWithType); continue; } - TimeUnit timeUnit = new DateTimeFormatSpec(dateTimeSpec.getFormat()).getColumnUnit(); + TimeUnit timeUnit = dateTimeSpec.getFormatSpec().getColumnUnit(); if (timeUnit != TimeUnit.DAYS) { LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java index 3033508406..b84c449ab5 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java @@ -47,7 +47,6 @@ import org.apache.pinot.segment.spi.creator.SegmentVersion; import org.apache.pinot.spi.config.table.IndexingConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; -import org.apache.pinot.spi.data.DateTimeFormatSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.GenericRow; import org.apache.pinot.spi.metrics.PinotMeter; @@ -121,7 +120,7 @@ public class HLRealtimeSegmentDataManager extends RealtimeSegmentDataManager { _tableNameWithType); DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(_timeColumnName); Preconditions.checkNotNull(dateTimeFieldSpec, "Must provide field spec for time column {}", _timeColumnName); - _timeType = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()).getColumnUnit(); + _timeType = dateTimeFieldSpec.getFormatSpec().getColumnUnit(); List<String> sortedColumns = indexLoadingConfig.getSortedColumns(); if (sortedColumns.isEmpty()) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java index ccba8bfc9f..9e6ad822b5 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java @@ -43,7 +43,7 @@ public class EpochTimeHandler implements TimeHandler { long roundBucketMs, long partitionBucketMs) { _timeColumn = fieldSpec.getName(); _dataType = fieldSpec.getDataType(); - _formatSpec = new DateTimeFormatSpec(fieldSpec.getFormat()); + _formatSpec = fieldSpec.getFormatSpec(); _startTimeMs = startTimeMs; _endTimeMs = endTimeMs; _negateWindowFilter = negateWindowFilter; diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java index e8f29ad725..09dc8928b0 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java @@ -47,7 +47,6 @@ import static org.testng.Assert.assertThrows; * Tests schema validations */ public class SchemaUtilsTest { - private static final String TABLE_NAME = "testTable"; private static final String TIME_COLUMN = "timeColumn"; @@ -209,21 +208,21 @@ public class SchemaUtilsTest { public void testValidateTimeFieldSpec() { Schema pinotSchema; // time field spec using same name for incoming and outgoing - pinotSchema = new Schema.SchemaBuilder() - .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "time"), + pinotSchema = + new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "time"), new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "time")).build(); checkValidationFails(pinotSchema); // time field spec using SIMPLE_DATE_FORMAT, not allowed when conversion is needed - pinotSchema = new Schema.SchemaBuilder() - .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), + pinotSchema = + new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, TimeGranularitySpec.TimeFormat.SIMPLE_DATE_FORMAT.toString(), "outgoing")).build(); checkValidationFails(pinotSchema); // valid time field spec - pinotSchema = new Schema.SchemaBuilder() - .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), + pinotSchema = + new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing")).build(); SchemaUtils.validate(pinotSchema); } @@ -232,19 +231,20 @@ public class SchemaUtilsTest { public void testValidateDateTimeFieldSpec() { Schema pinotSchema; // valid date time. - pinotSchema = new Schema.SchemaBuilder() - .addDateTime("datetime1", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd", "1:DAYS") + pinotSchema = new Schema.SchemaBuilder().addDateTime("datetime1", FieldSpec.DataType.STRING, + "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd", "1:DAYS") .addDateTime("datetime2", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-ww-dd", "1:DAYS") .build(); SchemaUtils.validate(pinotSchema); // date time field spec using SIMPLE_DATE_FORMAT needs to be valid. - assertThrows(IllegalStateException.class, () -> new Schema.SchemaBuilder() - .addDateTime("datetime3", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:foo_bar", "1:DAYS").build()); + assertThrows(IllegalArgumentException.class, + () -> new Schema.SchemaBuilder().addDateTime("datetime3", FieldSpec.DataType.STRING, + "1:DAYS:SIMPLE_DATE_FORMAT:foo_bar", "1:DAYS").build()); // date time field spec using SIMPLE_DATE_FORMAT needs to be lexicographical order. - pinotSchema = new Schema.SchemaBuilder() - .addDateTime("datetime4", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:M/d/yyyy", "1:DAYS").build(); + pinotSchema = new Schema.SchemaBuilder().addDateTime("datetime4", FieldSpec.DataType.STRING, + "1:DAYS:SIMPLE_DATE_FORMAT:M/d/yyyy", "1:DAYS").build(); checkValidationFails(pinotSchema); } @@ -252,17 +252,17 @@ public class SchemaUtilsTest { public void testValidatePrimaryKeyColumns() { Schema pinotSchema; // non-existing column used as primary key - pinotSchema = new Schema.SchemaBuilder() - .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), - new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing")) - .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("test")).build(); + pinotSchema = + new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), + new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing")) + .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("test")).build(); checkValidationFails(pinotSchema); // valid primary key - pinotSchema = new Schema.SchemaBuilder() - .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), - new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing")) - .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("col")).build(); + pinotSchema = + new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"), + new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing")) + .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("col")).build(); SchemaUtils.validate(pinotSchema); } @@ -298,54 +298,53 @@ public class SchemaUtilsTest { @Test public void testDateTimeFieldSpec() throws IOException { - Schema pinotSchema; - pinotSchema = Schema.fromString( + Schema schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"x:HOURS:EPOCH\"," + "\"granularity\":\"1:HOURS\"}]}"); - checkValidationFails(pinotSchema); + checkValidationFails(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DUMMY:EPOCH\"," + "\"granularity\":\"1:HOURS\"}]}"); - checkValidationFails(pinotSchema); + checkValidationFails(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:DUMMY\"," + "\"granularity\":\"1:HOURS\"}]}"); - checkValidationFails(pinotSchema); + checkValidationFails(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:EPOCH\"," + "\"granularity\":\"x:HOURS\"}]}"); - checkValidationFails(pinotSchema); + checkValidationFails(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:EPOCH\"," + "\"granularity\":\"1:DUMMY\"}]}"); - checkValidationFails(pinotSchema); + checkValidationFails(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\"," + "\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT\",\"granularity\":\"1:DAYS\"}]}"); - checkValidationFails(pinotSchema); + checkValidationFails(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:EPOCH\"," + "\"granularity\":\"1:HOURS\"}]}"); - SchemaUtils.validate(pinotSchema); + SchemaUtils.validate(schema); - pinotSchema = Schema.fromString( + schema = Schema.fromString( "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}]," + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\"," + "\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}"); - SchemaUtils.validate(pinotSchema); + SchemaUtils.validate(schema); } /** @@ -394,7 +393,7 @@ public class SchemaUtilsTest { try { SchemaUtils.validate(pinotSchema); Assert.fail("Schema validation should have failed."); - } catch (IllegalStateException e) { + } catch (IllegalArgumentException | IllegalStateException e) { // expected } } diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java index 530bf2110c..847b92002d 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java @@ -156,7 +156,7 @@ public class SegmentGenerationTaskRunner implements Serializable { if (timeColumnName != null) { DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName); if (dateTimeFieldSpec != null) { - dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); + dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec(); } } return new NormalizedDateSegmentNameGenerator(tableName, segmentNameGeneratorConfigs.get(SEGMENT_NAME_PREFIX), diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java index e2232f6891..5f5d510bbb 100644 --- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java +++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java @@ -158,7 +158,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab if (timeColumnName != null) { DateTimeFieldSpec dateTimeFieldSpec = _schema.getSpecForTimeColumn(timeColumnName); if (dateTimeFieldSpec != null) { - dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); + dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec(); } } _segmentNameGenerator = diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java index aad0e63c5f..408081eb88 100644 --- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java +++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java @@ -68,15 +68,36 @@ public class SegmentPreprocessingMapper String timeColumnValue = _jobConf.get(InternalConfigConstants.TIME_COLUMN_VALUE); String pushFrequency = _jobConf.get(InternalConfigConstants.SEGMENT_PUSH_FREQUENCY); - String timeType = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_TYPE); - String timeFormat = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_FORMAT); + String timeFormatStr = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_FORMAT); + DateTimeFieldSpec.TimeFormat timeFormat; + try { + timeFormat = DateTimeFieldSpec.TimeFormat.valueOf(timeFormatStr); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid time format: " + timeFormatStr); + } DateTimeFormatSpec dateTimeFormatSpec; - if (timeFormat.equals(DateTimeFieldSpec.TimeFormat.EPOCH.toString()) || timeFormat.equals( - DateTimeFieldSpec.TimeFormat.TIMESTAMP.toString())) { - dateTimeFormatSpec = new DateTimeFormatSpec(1, timeType, timeFormat); - } else { - dateTimeFormatSpec = new DateTimeFormatSpec(1, timeType, timeFormat, - _jobConf.get(InternalConfigConstants.SEGMENT_TIME_SDF_PATTERN)); + switch (timeFormat) { + case EPOCH: + String timeTypeStr = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_TYPE); + try { + dateTimeFormatSpec = DateTimeFormatSpec.forEpoch(timeTypeStr); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid time type: " + timeTypeStr, e); + } + break; + case TIMESTAMP: + dateTimeFormatSpec = DateTimeFormatSpec.forTimestamp(); + break; + case SIMPLE_DATE_FORMAT: + String sdfPattern = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_SDF_PATTERN); + try { + dateTimeFormatSpec = DateTimeFormatSpec.forSimpleDateFormat(sdfPattern); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid SDF pattern: " + sdfPattern, e); + } + break; + default: + throw new IllegalStateException("Unsupported time format: " + timeFormat); } _normalizedDateSegmentNameGenerator = new NormalizedDateSegmentNameGenerator(tableName, null, false, "APPEND", pushFrequency, dateTimeFormatSpec, diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java index dc51e71e66..a9a0db40d6 100644 --- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java +++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java @@ -201,7 +201,7 @@ public abstract class DataPreprocessingHelper { if (timeColumnName != null) { DateTimeFieldSpec dateTimeFieldSpec = _pinotTableSchema.getSpecForTimeColumn(timeColumnName); if (dateTimeFieldSpec != null) { - DateTimeFormatSpec formatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); + DateTimeFormatSpec formatSpec = dateTimeFieldSpec.getFormatSpec(); job.getConfiguration().set(InternalConfigConstants.SEGMENT_TIME_TYPE, formatSpec.getColumnUnit().toString()); job.getConfiguration() .set(InternalConfigConstants.SEGMENT_TIME_FORMAT, formatSpec.getTimeFormat().toString()); diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java index a5c79215b8..0a1791e4df 100644 --- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java +++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java @@ -125,7 +125,7 @@ public class SparkSegmentCreationFunction implements Serializable { if (timeColumnName != null) { DateTimeFieldSpec dateTimeFieldSpec = _schema.getSpecForTimeColumn(timeColumnName); if (dateTimeFieldSpec != null) { - dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); + dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec(); } } _segmentNameGenerator = @@ -150,8 +150,8 @@ public class SparkSegmentCreationFunction implements Serializable { _logger.warn("Deleting existing file: {}", _localStagingDir); FileUtils.forceDelete(_localStagingDir); } - _logger - .info("Making local temporary directories: {}, {}, {}", _localStagingDir, _localInputDir, _localSegmentTarDir); + _logger.info("Making local temporary directories: {}, {}, {}", _localStagingDir, _localInputDir, + _localSegmentTarDir); Preconditions.checkState(_localStagingDir.mkdirs()); Preconditions.checkState(_localInputDir.mkdir()); Preconditions.checkState(_localSegmentDir.mkdir()); @@ -250,8 +250,9 @@ public class SparkSegmentCreationFunction implements Serializable { Path hdfsSegmentTarFile = new Path(_hdfsSegmentTarDir, segmentTarFileName); if (_useRelativePath) { - Path relativeOutputPath = SegmentCreationJob.getRelativeOutputPath( - new Path(_jobConf.get(JobConfigConstants.PATH_TO_INPUT)).toUri(), hdfsInputFile.toUri(), _hdfsSegmentTarDir); + Path relativeOutputPath = + SegmentCreationJob.getRelativeOutputPath(new Path(_jobConf.get(JobConfigConstants.PATH_TO_INPUT)).toUri(), + hdfsInputFile.toUri(), _hdfsSegmentTarDir); hdfsSegmentTarFile = new Path(relativeOutputPath, segmentTarFileName); } _logger.info("Copying segment tar file from: {} to: {}", localSegmentTarFile, hdfsSegmentTarFile); diff --git a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java index bebd903dbc..8c98f57a3f 100644 --- a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java +++ b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java @@ -34,8 +34,6 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; -import org.apache.pinot.spi.data.DateTimeFormatSpec; -import org.apache.pinot.spi.data.DateTimeGranularitySpec; import org.apache.pinot.spi.data.DimensionFieldSpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -369,9 +367,11 @@ public class AvroUtils { case DATE_TIME: Preconditions.checkState(isSingleValueField, "Time field: %s cannot be multi-valued", name); Preconditions.checkNotNull(timeUnit, "Time unit cannot be null"); - pinotSchema.addField(new DateTimeFieldSpec(name, dataType, - new DateTimeFormatSpec(1, timeUnit.toString(), DateTimeFieldSpec.TimeFormat.EPOCH.toString()).getFormat(), - new DateTimeGranularitySpec(1, timeUnit).getGranularity())); + // TODO: Switch to new format after releasing 0.11.0 + // "EPOCH|" + timeUnit.name() + String format = "1:" + timeUnit.name() + ":EPOCH"; + String granularity = "1:" + timeUnit.name(); + pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format, granularity)); break; default: throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + name); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java index 605ce80134..823f449a00 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java @@ -62,8 +62,7 @@ public class NullValueTransformer implements RecordTransformer { schema.getSchemaName()); String defaultTimeString = timeColumnSpec.getDefaultNullValueString(); - String timeFormat = timeColumnSpec.getFormat(); - DateTimeFormatSpec dateTimeFormatSpec = new DateTimeFormatSpec(timeFormat); + DateTimeFormatSpec dateTimeFormatSpec = timeColumnSpec.getFormatSpec(); try { long defaultTimeMs = dateTimeFormatSpec.fromFormatToMillis(defaultTimeString); if (TimeUtils.timeValueInValidRange(defaultTimeMs)) { @@ -78,7 +77,7 @@ public class NullValueTransformer implements RecordTransformer { _defaultNullValues.put(timeColumnName, currentTime); LOGGER.info( "Default time: {} does not comply with format: {}, using current time: {} as the default time for table: {}", - defaultTimeString, timeFormat, currentTime, tableConfig.getTableName()); + defaultTimeString, timeColumnSpec.getFormat(), currentTime, tableConfig.getTableName()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java index 2953646364..72aadb70cb 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java @@ -158,7 +158,7 @@ public final class IngestionUtils { if (timeColumnName != null) { DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName); if (dateTimeFieldSpec != null) { - dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); + dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec(); } } return new NormalizedDateSegmentNameGenerator(rawTableName, batchConfig.getSegmentNamePrefix(), diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java index 3b4e5183bd..337531c3e6 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java @@ -48,6 +48,7 @@ public class SchemaUtils { // checker to ensure simple date format matches lexicographic ordering. private static final Map<Character, Integer> DATETIME_PATTERN_ORDERING = new HashMap<>(); + static { char[] patternOrdering = new char[]{'y', 'M', 'd', 'H', 'm', 's', 'S'}; for (int i = 0; i < patternOrdering.length; i++) { @@ -120,10 +121,10 @@ public class SchemaUtils { } } if (fieldSpec.getFieldType().equals(FieldSpec.FieldType.TIME)) { - validateTimeFieldSpec(fieldSpec); + validateTimeFieldSpec((TimeFieldSpec) fieldSpec); } if (fieldSpec.getFieldType().equals(FieldSpec.FieldType.DATE_TIME)) { - validateDateTimeFieldSpec(fieldSpec); + validateDateTimeFieldSpec((DateTimeFieldSpec) fieldSpec); } } } @@ -132,8 +133,8 @@ public class SchemaUtils { transformedColumns.retainAll(argumentColumns)); if (schema.getPrimaryKeyColumns() != null) { for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) { - Preconditions - .checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn), "The primary key column must exist"); + Preconditions.checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn), + "The primary key column must exist"); } } } @@ -154,8 +155,7 @@ public class SchemaUtils { /** * Checks for valid incoming and outgoing granularity spec in the time field spec */ - private static void validateTimeFieldSpec(FieldSpec fieldSpec) { - TimeFieldSpec timeFieldSpec = (TimeFieldSpec) fieldSpec; + private static void validateTimeFieldSpec(TimeFieldSpec timeFieldSpec) { TimeGranularitySpec incomingGranularitySpec = timeFieldSpec.getIncomingGranularitySpec(); TimeGranularitySpec outgoingGranularitySpec = timeFieldSpec.getOutgoingGranularitySpec(); @@ -167,28 +167,21 @@ public class SchemaUtils { Preconditions.checkState( incomingGranularitySpec.getTimeFormat().equals(TimeGranularitySpec.TimeFormat.EPOCH.toString()) && outgoingGranularitySpec.getTimeFormat().equals(TimeGranularitySpec.TimeFormat.EPOCH.toString()), - "Cannot perform time conversion for time format other than EPOCH. TimeFieldSpec: %s", fieldSpec); + "Cannot perform time conversion for time format other than EPOCH. TimeFieldSpec: %s", timeFieldSpec); } } /** * Checks for valid format and granularity string in dateTimeFieldSpec */ - private static void validateDateTimeFieldSpec(FieldSpec fieldSpec) { - DateTimeFieldSpec dateTimeFieldSpec = (DateTimeFieldSpec) fieldSpec; - validateDateTimeFormat(dateTimeFieldSpec.getFormat()); - DateTimeGranularitySpec.validateGranularity(dateTimeFieldSpec.getGranularity()); - } - - private static void validateDateTimeFormat(String format) { - DateTimeFormatSpec dateTimeFormatSpec; + private static void validateDateTimeFieldSpec(DateTimeFieldSpec dateTimeFieldSpec) { + DateTimeFormatSpec formatSpec; try { - dateTimeFormatSpec = new DateTimeFormatSpec(format); + formatSpec = dateTimeFieldSpec.getFormatSpec(); } catch (Exception e) { - throw new IllegalStateException(String.format("invalid datetime format: %s", format), e); + throw new IllegalArgumentException("Invalid format: " + dateTimeFieldSpec.getFormat(), e); } - // validate the format is correct. - String sdfPattern = dateTimeFormatSpec.getSDFPattern(); + String sdfPattern = formatSpec.getSDFPattern(); if (sdfPattern != null) { // must be in "yyyy MM dd HH mm ss SSS" to make sure it is sorted by both lexicographical and datetime order. int[] maxIndexes = new int[]{-1, -1, -1, -1, -1, -1, -1, -1}; @@ -198,11 +191,19 @@ public class SchemaUtils { } // last index doesn't need to be checked. for (int idx = 0; idx < maxIndexes.length - 2; idx++) { - Preconditions.checkState(maxIndexes[idx] <= maxIndexes[idx + 1] || maxIndexes[idx + 1] == -1, + Preconditions.checkArgument(maxIndexes[idx] <= maxIndexes[idx + 1] || maxIndexes[idx + 1] == -1, String.format("SIMPLE_DATE_FORMAT pattern %s has to be sorted by both lexicographical and datetime order", sdfPattern)); maxIndexes[idx + 1] = Math.max(maxIndexes[idx + 1], maxIndexes[idx]); } } + + DateTimeGranularitySpec granularitySpec; + try { + granularitySpec = dateTimeFieldSpec.getGranularitySpec(); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid granularity: " + dateTimeFieldSpec.getGranularity(), e); + } + Preconditions.checkNotNull(granularitySpec); } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java index ab73f25e00..cf61964f7c 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java @@ -33,7 +33,6 @@ import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationD import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; -import org.apache.pinot.spi.data.DateTimeFormatSpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.data.Schema; @@ -111,8 +110,7 @@ public class PinotSegmentUtil { TimeUnit unit = timeFieldSpec.getIncomingGranularitySpec().getTimeType(); return generateTimeValue(random, unit); } else if (fieldSpec instanceof DateTimeFieldSpec) { - DateTimeFieldSpec dateTimeFieldSpec = (DateTimeFieldSpec) fieldSpec; - TimeUnit unit = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()).getColumnUnit(); + TimeUnit unit = ((DateTimeFieldSpec) fieldSpec).getFormatSpec().getColumnUnit(); return generateTimeValue(random, unit); } else { DataType storedType = fieldSpec.getDataType().getStoredType(); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java index e28d566c86..d81e69e18d 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java @@ -264,7 +264,7 @@ public class SegmentGeneratorConfig implements Serializable { DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName); if (dateTimeFieldSpec != null) { setTimeColumnName(dateTimeFieldSpec.getName()); - setDateTimeFormatSpec(new DateTimeFormatSpec(dateTimeFieldSpec.getFormat())); + setDateTimeFormatSpec(dateTimeFieldSpec.getFormatSpec()); } } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java index 0bd4c3c6cc..9fd6c97f35 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java @@ -63,7 +63,7 @@ public class SegmentNameGeneratorFactory { DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName); Preconditions.checkNotNull(dateTimeFieldSpec, "Schema does not contain the time column specified in the table config."); - dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); + dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec(); } return new NormalizedDateSegmentNameGenerator(tableName, prefix, excludeSequenceId, IngestionConfigUtils.getBatchSegmentIngestionType(tableConfig), diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java index 3316929502..0fef3e6991 100644 --- a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java +++ b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java @@ -35,8 +35,6 @@ public class NormalizedDateSegmentNameGeneratorTest { private static final String MALFORMED_SEGMENT_NAME_POSTFIX = "my\\postfix"; private static final String APPEND_PUSH_TYPE = "APPEND"; private static final String REFRESH_PUSH_TYPE = "REFRESH"; - private static final String EPOCH_TIME_FORMAT = "EPOCH"; - private static final String SIMPLE_DATE_TIME_FORMAT = "SIMPLE_DATE_FORMAT"; private static final String LONG_SIMPLE_DATE_FORMAT = "yyyyMMdd"; private static final String STRING_SIMPLE_DATE_FORMAT = "yyyy-MM-dd"; private static final String STRING_SLASH_DATE_FORMAT = "yyyy/MM/dd"; @@ -139,7 +137,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testAppend() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), null); + DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, " + "inputTimeUnit=DAYS"); @@ -153,7 +151,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testAppendWithSegmentNamePrefix() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE, - DAILY_PUSH_FREQUENCY, new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), null); + DAILY_PUSH_FREQUENCY, DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable_daily, appendPushType=true, " + "outputSDF=yyyy-MM-dd, inputTimeUnit=DAYS"); @@ -167,8 +165,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testAppendWithSegmentNamePrefixPostfix() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE, - DAILY_PUSH_FREQUENCY, new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), - SEGMENT_NAME_POSTFIX); + DAILY_PUSH_FREQUENCY, DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), SEGMENT_NAME_POSTFIX); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable_daily, segmentNamePostfix=myPostfix, " + "appendPushType=true, outputSDF=yyyy-MM-dd, inputTimeUnit=DAYS"); @@ -182,7 +179,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testHoursTimeType() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.HOURS.toString(), EPOCH_TIME_FORMAT), null); + DateTimeFormatSpec.forEpoch(TimeUnit.HOURS.name()), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, " + "inputTimeUnit=HOURS"); @@ -196,8 +193,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testLongSimpleDateFormat() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, LONG_SIMPLE_DATE_FORMAT), - null); + DateTimeFormatSpec.forSimpleDateFormat(LONG_SIMPLE_DATE_FORMAT), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, " + "inputSDF=yyyyMMdd"); @@ -211,8 +207,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testStringSimpleDateFormat() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SIMPLE_DATE_FORMAT), - null); + DateTimeFormatSpec.forSimpleDateFormat(STRING_SIMPLE_DATE_FORMAT), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, " + "inputSDF=yyyy-MM-dd"); @@ -224,26 +219,24 @@ public class NormalizedDateSegmentNameGeneratorTest { @Test public void testMalFormedTableNameAndSegmentNamePrefixPostfix() { + DateTimeFormatSpec dateTimeFormatSpec = DateTimeFormatSpec.forSimpleDateFormat(STRING_SLASH_DATE_FORMAT); try { new NormalizedDateSegmentNameGenerator(MALFORMED_TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT), null); + dateTimeFormatSpec, null); Assert.fail(); } catch (IllegalArgumentException e) { // Expected } try { new NormalizedDateSegmentNameGenerator(TABLE_NAME, MALFORMED_SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE, - DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT), null); + DAILY_PUSH_FREQUENCY, dateTimeFormatSpec, null); Assert.fail(); } catch (IllegalArgumentException e) { // Expected } try { new NormalizedDateSegmentNameGenerator(TABLE_NAME, SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE, - DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT), - MALFORMED_SEGMENT_NAME_POSTFIX); + DAILY_PUSH_FREQUENCY, dateTimeFormatSpec, MALFORMED_SEGMENT_NAME_POSTFIX); Assert.fail(); } catch (IllegalArgumentException e) { // Expected @@ -255,8 +248,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testMalFormedDateFormatAndTimeValue() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT), - null); + DateTimeFormatSpec.forSimpleDateFormat(STRING_SLASH_DATE_FORMAT), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, " + "appendPushType=true, outputSDF=yyyy-MM-dd, inputSDF=yyyy/MM/dd"); assertEquals(segmentNameGenerator.generateSegmentName(INVALID_SEQUENCE_ID, "1970/01/02", "1970/01/04"), @@ -270,7 +262,7 @@ public class NormalizedDateSegmentNameGeneratorTest { public void testHourlyPushFrequency() { SegmentNameGenerator segmentNameGenerator = new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, HOURLY_PUSH_FREQUENCY, - new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), null); + DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), null); assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd-HH," + " inputTimeUnit=DAYS"); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java index 400cd7d433..ddb04f70ca 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java @@ -31,6 +31,8 @@ import org.apache.pinot.spi.utils.EqualityUtils; public final class DateTimeFieldSpec extends FieldSpec { private String _format; private String _granularity; + private transient DateTimeFormatSpec _formatSpec; + private transient DateTimeGranularitySpec _granularitySpec; public enum TimeFormat { EPOCH, TIMESTAMP, SIMPLE_DATE_FORMAT @@ -70,17 +72,11 @@ public final class DateTimeFieldSpec extends FieldSpec { */ public DateTimeFieldSpec(String name, DataType dataType, String format, String granularity) { super(name, dataType, true); - Preconditions.checkNotNull(name); - Preconditions.checkNotNull(dataType); - if (Character.isDigit(format.charAt(0))) { - DateTimeFormatSpec.validateFormat(format); - } else { - DateTimeFormatSpec.validatePipeFormat(format); - } - DateTimeGranularitySpec.validateGranularity(granularity); _format = format; _granularity = granularity; + _formatSpec = new DateTimeFormatSpec(format); + _granularitySpec = new DateTimeGranularitySpec(granularity); } /** @@ -116,6 +112,16 @@ public final class DateTimeFieldSpec extends FieldSpec { _format = format; } + @JsonIgnore + public DateTimeFormatSpec getFormatSpec() { + DateTimeFormatSpec formatSpec = _formatSpec; + if (formatSpec == null) { + formatSpec = new DateTimeFormatSpec(_format); + _formatSpec = formatSpec; + } + return formatSpec; + } + public String getGranularity() { return _granularity; } @@ -125,6 +131,16 @@ public final class DateTimeFieldSpec extends FieldSpec { _granularity = granularity; } + @JsonIgnore + public DateTimeGranularitySpec getGranularitySpec() { + DateTimeGranularitySpec granularitySpec = _granularitySpec; + if (granularitySpec == null) { + granularitySpec = new DateTimeGranularitySpec(_granularity); + _granularitySpec = granularitySpec; + } + return granularitySpec; + } + @Override public ObjectNode toJsonObject() { ObjectNode jsonObject = super.toJsonObject(); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java index f237270e2a..a6d607d151 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java @@ -20,66 +20,95 @@ package org.apache.pinot.spi.data; import com.google.common.base.Preconditions; import java.util.Locale; +import java.util.Objects; import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.annotation.Nullable; -import org.apache.pinot.spi.utils.EqualityUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; public class DateTimeFormatPatternSpec { + public static final DateTimeZone DEFAULT_DATE_TIME_ZONE = DateTimeZone.UTC; + public static final Locale DEFAULT_LOCALE = Locale.ENGLISH; + + public static final DateTimeFormatPatternSpec EPOCH = new DateTimeFormatPatternSpec(TimeFormat.EPOCH); + public static final DateTimeFormatPatternSpec TIMESTAMP = new DateTimeFormatPatternSpec(TimeFormat.TIMESTAMP); /** eg: yyyyMMdd tz(CST) or yyyyMMdd HH tz(GMT+0700) or yyyyMMddHH tz(America/Chicago) **/ private static final Pattern SDF_PATTERN_WITH_TIMEZONE = Pattern.compile("^(.+)( tz[ ]*\\((.+)\\))[ ]*"); private static final int SDF_PATTERN_GROUP = 1; - private static final int TIMEZONE_GROUP = 3; - public static final DateTimeZone DEFAULT_DATETIMEZONE = DateTimeZone.UTC; - public static final Locale DEFAULT_LOCALE = Locale.ENGLISH; + private static final int TIME_ZONE_GROUP = 3; - private final DateTimeFieldSpec.TimeFormat _timeFormat; - private String _sdfPattern = null; - private DateTimeZone _dateTimeZone = DEFAULT_DATETIMEZONE; - private transient DateTimeFormatter _dateTimeFormatter; + private final TimeFormat _timeFormat; + private final String _sdfPattern; + private final DateTimeZone _dateTimeZone; + private transient final DateTimeFormatter _dateTimeFormatter; - public DateTimeFormatPatternSpec(String timeFormat) { + public DateTimeFormatPatternSpec(TimeFormat timeFormat) { this(timeFormat, null); } - public DateTimeFormatPatternSpec(String timeFormat, @Nullable String sdfPatternWithTz) { - _timeFormat = DateTimeFieldSpec.TimeFormat.valueOf(timeFormat); - if (_timeFormat.equals(DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT)) { - Preconditions.checkNotNull(sdfPatternWithTz, String.format( - "Must provide simple date format pattern with time format type: %s", timeFormat)); + public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPatternWithTz) { + _timeFormat = timeFormat; + if (timeFormat == TimeFormat.SIMPLE_DATE_FORMAT) { + Preconditions.checkArgument(StringUtils.isNotEmpty(sdfPatternWithTz), "Must provide SIMPLE_DATE_FORMAT pattern"); Matcher m = SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz); if (m.find()) { _sdfPattern = m.group(SDF_PATTERN_GROUP).trim(); - String timezoneString = m.group(TIMEZONE_GROUP).trim(); - _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timezoneString)); + String timeZone = m.group(TIME_ZONE_GROUP).trim(); + try { + _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone)); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid time zone: " + timeZone); + } } else { _sdfPattern = sdfPatternWithTz; + _dateTimeZone = DEFAULT_DATE_TIME_ZONE; + } + try { + _dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid SIMPLE_DATE_FORMAT pattern: " + _sdfPattern); } - _dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE); + } else { + _sdfPattern = null; + _dateTimeZone = DEFAULT_DATE_TIME_ZONE; + _dateTimeFormatter = null; } } - public DateTimeFormatPatternSpec(DateTimeFieldSpec.TimeFormat timeFormat, @Nullable String sdfPattern, - @Nullable String timeZone) { + public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPattern, @Nullable String timeZone) { _timeFormat = timeFormat; - if (_timeFormat.equals(DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT)) { + if (_timeFormat == TimeFormat.SIMPLE_DATE_FORMAT) { + Preconditions.checkArgument(StringUtils.isNotEmpty(sdfPattern), "Must provide SIMPLE_DATE_FORMAT pattern"); + _sdfPattern = sdfPattern; if (timeZone != null) { - _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone)); + try { + _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone)); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid time zone: " + timeZone); + } + } else { + _dateTimeZone = DEFAULT_DATE_TIME_ZONE; } - _dateTimeFormatter = DateTimeFormat.forPattern(sdfPattern). - withZone(_dateTimeZone). - withLocale(DEFAULT_LOCALE); - _sdfPattern = sdfPattern; + try { + _dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE); + } catch (Exception e) { + throw new IllegalArgumentException("Invalid SIMPLE_DATE_FORMAT pattern: " + _sdfPattern); + } + } else { + _sdfPattern = null; + _dateTimeZone = DEFAULT_DATE_TIME_ZONE; + _dateTimeFormatter = null; } } - public DateTimeFieldSpec.TimeFormat getTimeFormat() { + public TimeFormat getTimeFormat() { return _timeFormat; } @@ -95,54 +124,27 @@ public class DateTimeFormatPatternSpec { return _dateTimeFormatter; } - /** - * Validates the sdf pattern - */ - public static void validateFormat(String sdfPatternWithTz) { - try { - String sdfPattern; - Matcher m = SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz); - if (m.find()) { - sdfPattern = m.group(SDF_PATTERN_GROUP).trim(); - String timezoneString = m.group(TIMEZONE_GROUP).trim(); - DateTimeZone dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timezoneString)); - DateTimeFormat.forPattern(sdfPattern).withZone(dateTimeZone); - } else { - sdfPattern = sdfPatternWithTz; - DateTimeFormat.forPattern(sdfPattern); - } - } catch (Exception e) { - throw new IllegalStateException("Unsupported simple date format pattern or time zone: " + sdfPatternWithTz); - } - } - @Override public boolean equals(Object o) { - if (EqualityUtils.isSameReference(this, o)) { + if (this == o) { return true; } - - if (EqualityUtils.isNullOrNotSameClass(this, o)) { + if (o == null || getClass() != o.getClass()) { return false; } - DateTimeFormatPatternSpec that = (DateTimeFormatPatternSpec) o; - - return EqualityUtils.isEqual(_timeFormat, that._timeFormat) && EqualityUtils.isEqual(_sdfPattern, that._sdfPattern) - && EqualityUtils.isEqual(_dateTimeZone, that._dateTimeZone); + return _timeFormat == that._timeFormat && Objects.equals(_sdfPattern, that._sdfPattern) && _dateTimeZone.equals( + that._dateTimeZone); } @Override public int hashCode() { - int result = EqualityUtils.hashCodeOf(_timeFormat); - result = EqualityUtils.hashCodeOf(result, _sdfPattern); - result = EqualityUtils.hashCodeOf(result, _dateTimeZone); - return result; + return Objects.hash(_timeFormat, _sdfPattern, _dateTimeZone); } @Override public String toString() { return "DateTimeFormatPatternSpec{" + "_timeFormat=" + _timeFormat + ", _sdfPattern='" + _sdfPattern + '\'' - + ", _dateTimeZone=" + _dateTimeZone + ", _dateTimeFormatter=" + _dateTimeFormatter + '}'; + + ", _dateTimeZone=" + _dateTimeZone + '}'; } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java index 0a1de16214..c99c8a370e 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java @@ -18,13 +18,14 @@ */ package org.apache.pinot.spi.data; -import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import java.sql.Timestamp; +import java.util.Objects; import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat; -import org.apache.pinot.spi.utils.EqualityUtils; +import org.apache.pinot.spi.utils.StringUtil; import org.apache.pinot.spi.utils.TimestampUtils; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormatter; @@ -35,113 +36,201 @@ import org.joda.time.format.DateTimeFormatter; */ public class DateTimeFormatSpec { - public static final String NUMBER_REGEX = "[1-9][0-9]*"; - public static final String COLON_SEPARATOR = ":"; - public static final String PIPE_SEPARATOR = "|"; - - /* DateTimeFieldSpec format is of format size:timeUnit:timeformat:pattern tz(timezone) - * tz(timezone) is optional. If not specified, UTC timezone is used */ - public static final int FORMAT_SIZE_POSITION = 0; - public static final int FORMAT_UNIT_POSITION = 1; - public static final int FORMAT_TIMEFORMAT_POSITION = 2; - public static final int FORMAT_PATTERN_POSITION = 3; - public static final int MIN_FORMAT_TOKENS = 3; - public static final int MAX_FORMAT_TOKENS = 4; - - public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0; - public static final int MIN_FORMAT_TOKENS_PIPE = 1; - public static final int MAX_FORMAT_TOKENS_PIPE = 3; - - // Applicable for SIMPLE_DATE_FORMAT|<timeFormat>(|<timezone>) - public static final int SDF_PATTERN_POSITION = 1; - public static final int SDF_TIMEZONE_POSITION = 2; - - // Applicable for EPOCH|<timeUnit>(|<size>) - public static final int EPOCH_UNIT_POSITION = 1; - public static final int EPOCH_SIZE_POSITION = 2; - - private final String _format; + // Colon format: 'size:timeUnit:timeFormat:pattern tz(timeZone)' + // 'pattern' applies to the 'SIMPLE_DATE_FORMAT' time format + // 'tz(timeZone)' is optional in the 'pattern'. If not specified, UTC timezone is used. + private static final char COLON_SEPARATOR = ':'; + private static final int COLON_FORMAT_SIZE_POSITION = 0; + private static final int COLON_FORMAT_TIME_UNIT_POSITION = 1; + private static final int COLON_FORMAT_TIME_FORMAT_POSITION = 2; + private static final int COLON_FORMAT_PATTERN_POSITION = 3; + private static final int COLON_FORMAT_MIN_TOKENS = 3; + private static final int COLON_FORMAT_MAX_TOKENS = 4; + + // Pipe format: + // - EPOCH|timeUnit(|size) + // - SIMPLE_DATE_FORMAT|pattern(|timeZone) + // - TIMESTAMP + private static final char PIPE_SEPARATOR = '|'; + private static final int PIPE_FORMAT_TIME_FORMAT_POSITION = 0; + private static final int PIPE_FORMAT_TIME_UNIT_POSITION = 1; + private static final int PIPE_FORMAT_SIZE_POSITION = 2; + private static final int PIPE_FORMAT_PATTERN_POSITION = 1; + private static final int PIPE_FORMAT_TIME_ZONE_POSITION = 2; + private static final int PIPE_FORMAT_MIN_TOKENS = 1; + private static final int PIPE_FORMAT_MAX_TOKENS = 3; + + private static final DateTimeFormatSpec TIMESTAMP = + new DateTimeFormatSpec(1, DateTimeFormatUnitSpec.MILLISECONDS, DateTimeFormatPatternSpec.TIMESTAMP); + + // For EPOCH private final int _size; private final DateTimeFormatUnitSpec _unitSpec; + // For SIMPLE_DATE_FORMAT private final DateTimeFormatPatternSpec _patternSpec; public DateTimeFormatSpec(String format) { - _format = format; - if (Character.isDigit(_format.charAt(0))) { - String[] formatTokens = validateFormat(format); - if (formatTokens.length == MAX_FORMAT_TOKENS) { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION], - formatTokens[FORMAT_PATTERN_POSITION]); - } else { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]); + Preconditions.checkArgument(StringUtils.isNotEmpty(format), "Must provide format"); + + if (Character.isDigit(format.charAt(0))) { + // Colon format + + String[] tokens = StringUtil.split(format, COLON_SEPARATOR, COLON_FORMAT_MAX_TOKENS); + Preconditions.checkArgument(tokens.length >= COLON_FORMAT_MIN_TOKENS && tokens.length <= COLON_FORMAT_MAX_TOKENS, + "Invalid format: %s, must be of format 'size:timeUnit:timeFormat(:patternWithTz)'", format); + + TimeFormat timeFormat; + try { + timeFormat = TimeFormat.valueOf(tokens[COLON_FORMAT_TIME_FORMAT_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid time format: %s in format: %s", tokens[COLON_FORMAT_TIME_FORMAT_POSITION], format)); } - if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) { - // TIMESTAMP type stores millis since epoch - _size = 1; - _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS"); - } else { - _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]); - _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); + + switch (timeFormat) { + case EPOCH: + String sizeStr = tokens[COLON_FORMAT_SIZE_POSITION]; + try { + _size = Integer.parseInt(sizeStr); + } catch (Exception e) { + throw new IllegalArgumentException(String.format("Invalid size: %s in format: %s", sizeStr, format)); + } + Preconditions.checkArgument(_size > 0, "Invalid size: %s in format: %s, must be positive", _size, format); + String timeUnitStr = tokens[COLON_FORMAT_TIME_UNIT_POSITION]; + try { + _unitSpec = new DateTimeFormatUnitSpec(timeUnitStr); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid time unit: %s in format: %s", timeUnitStr, format)); + } + _patternSpec = DateTimeFormatPatternSpec.EPOCH; + break; + case TIMESTAMP: + _size = 1; + _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS; + _patternSpec = DateTimeFormatPatternSpec.TIMESTAMP; + break; + case SIMPLE_DATE_FORMAT: + _size = 1; + _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS; + Preconditions.checkArgument(tokens.length == COLON_FORMAT_MAX_TOKENS, + "Invalid SIMPLE_DATE_FORMAT format: %s, must be of format " + + "'<size>:<timeUnit>:SIMPLE_DATE_FORMAT:<patternWithTz>'", format); + String patternStr = tokens[COLON_FORMAT_PATTERN_POSITION]; + try { + _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, patternStr); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s in format: %s", patternStr, format)); + } + break; + default: + throw new IllegalStateException("Unsupported time format: " + timeFormat); } } else { - String[] formatTokens = validatePipeFormat(format); - if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.EPOCH.toString())) { - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]); - _unitSpec = new DateTimeFormatUnitSpec(formatTokens[EPOCH_UNIT_POSITION]); - if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) { - _size = Integer.parseInt(formatTokens[EPOCH_SIZE_POSITION]); - } else { + // Pipe format + + String[] tokens = StringUtil.split(format, PIPE_SEPARATOR, PIPE_FORMAT_MAX_TOKENS); + Preconditions.checkArgument(tokens.length >= PIPE_FORMAT_MIN_TOKENS && tokens.length <= PIPE_FORMAT_MAX_TOKENS, + "Invalid format: %s, must be of format 'EPOCH|<timeUnit>(|<size>)' or " + + "'SIMPLE_DATE_FORMAT|<pattern>(|<timeZone>)' or 'TIMESTAMP'", format); + + TimeFormat timeFormat; + try { + timeFormat = TimeFormat.valueOf(tokens[PIPE_FORMAT_TIME_FORMAT_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid time format: %s in format: %s", tokens[PIPE_FORMAT_TIME_FORMAT_POSITION], format)); + } + + switch (timeFormat) { + case EPOCH: + if (tokens.length > PIPE_FORMAT_SIZE_POSITION) { + try { + _size = Integer.parseInt(tokens[PIPE_FORMAT_SIZE_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid size: %s in format: %s", tokens[COLON_FORMAT_SIZE_POSITION], format)); + } + Preconditions.checkArgument(_size > 0, "Invalid size: %s in format: %s, must be positive", _size, format); + } else { + _size = 1; + } + Preconditions.checkArgument(tokens.length > PIPE_FORMAT_TIME_UNIT_POSITION, + "Invalid EPOCH format: %s, must be of format 'EPOCH|<timeUnit>(|<size>)'", format); + try { + _unitSpec = new DateTimeFormatUnitSpec(tokens[PIPE_FORMAT_TIME_UNIT_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid time unit: %s in format: %s", tokens[PIPE_FORMAT_TIME_UNIT_POSITION], format)); + } + _patternSpec = DateTimeFormatPatternSpec.EPOCH; + break; + case TIMESTAMP: _size = 1; - } - } else if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString())) { - if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) { - _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf( - formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]), - formatTokens[SDF_PATTERN_POSITION], - formatTokens[SDF_TIMEZONE_POSITION]); - } else { - _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf( - formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]), - formatTokens[SDF_PATTERN_POSITION], null); - } - _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.DAYS.toString()); - _size = 1; - } else { - // Applicable for TIMESTAMP format - _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]); - _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.MILLISECONDS.toString()); - _size = 1; + _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS; + _patternSpec = DateTimeFormatPatternSpec.TIMESTAMP; + break; + case SIMPLE_DATE_FORMAT: + _size = 1; + _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS; + Preconditions.checkArgument(tokens.length > PIPE_FORMAT_PATTERN_POSITION, + "Invalid SIMPLE_DATE_FORMAT format: %s, must be of format 'SIMPLE_DATE_FORMAT|<pattern>(|<timeZone>)'", + format); + if (tokens.length > PIPE_FORMAT_TIME_ZONE_POSITION) { + try { + _patternSpec = + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, tokens[PIPE_FORMAT_PATTERN_POSITION], + tokens[PIPE_FORMAT_TIME_ZONE_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s, time zone: %s in format: %s", + tokens[PIPE_FORMAT_PATTERN_POSITION], tokens[PIPE_FORMAT_TIME_ZONE_POSITION], format)); + } + } else { + try { + _patternSpec = + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, tokens[PIPE_FORMAT_PATTERN_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException(String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s in format: %s", + tokens[PIPE_FORMAT_PATTERN_POSITION], format)); + } + } + break; + default: + throw new IllegalStateException("Unsupported time format: " + timeFormat); } } } - /** - * Constructs a dateTimeSpec format, given the components of a format - */ - public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat) { - _format = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit, columnTimeFormat); - validateFormat(_format); + private DateTimeFormatSpec(int size, DateTimeFormatUnitSpec unitSpec, DateTimeFormatPatternSpec patternSpec) { + _size = size; + _unitSpec = unitSpec; + _patternSpec = patternSpec; + } - _size = columnSize; - _unitSpec = new DateTimeFormatUnitSpec(columnUnit); - _patternSpec = new DateTimeFormatPatternSpec(columnTimeFormat); + public static DateTimeFormatSpec forTimestamp() { + return TIMESTAMP; } - /** - * Constructs a dateTimeSpec format, given the components of a format - * @param sdfPattern and tz - */ - public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat, String sdfPattern) { - _format = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit, columnTimeFormat, sdfPattern); - validateFormat(_format); + public static DateTimeFormatSpec forEpoch(String timeUnit) { + return forEpoch(1, timeUnit); + } + + public static DateTimeFormatSpec forEpoch(int size, String timeUnit) { + Preconditions.checkArgument(size > 0, "Invalid size: {}, must be positive", size); + Preconditions.checkArgument(timeUnit != null, "Must provide time unit"); + return new DateTimeFormatSpec(size, new DateTimeFormatUnitSpec(timeUnit), DateTimeFormatPatternSpec.EPOCH); + } - _size = columnSize; - _unitSpec = new DateTimeFormatUnitSpec(columnUnit); - _patternSpec = new DateTimeFormatPatternSpec(columnTimeFormat, sdfPattern); + public static DateTimeFormatSpec forSimpleDateFormat(String patternWithTz) { + return new DateTimeFormatSpec(1, DateTimeFormatUnitSpec.MILLISECONDS, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, patternWithTz)); } - public String getFormat() { - return _format; + public static DateTimeFormatSpec forSimpleDateFormat(String pattern, @Nullable String timeZone) { + return new DateTimeFormatSpec(1, DateTimeFormatUnitSpec.MILLISECONDS, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, pattern, timeZone)); } public int getColumnSize() { @@ -217,91 +306,26 @@ public class DateTimeFormatSpec { } } - /** - * Validates the format string in the dateTimeFieldSpec - */ - public static String[] validateFormat(String format) { - Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null"); - String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); - Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS && formatTokens.length <= MAX_FORMAT_TOKENS, - "Incorrect format: %s. Must be of format 'size:timeunit:timeformat(:pattern)'", format); - Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].matches(NUMBER_REGEX), - "Incorrect format size: %s in format: %s. Must be of format '[0-9]+:<TimeUnit>:<TimeFormat>(:pattern)'", - formatTokens[FORMAT_SIZE_POSITION], format); - - DateTimeFormatUnitSpec.validateUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); - - if (formatTokens.length == MIN_FORMAT_TOKENS) { - Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.EPOCH.toString()) - || formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.TIMESTAMP.toString()), - "Incorrect format type: %s in format: %s. Must be of '[0-9]+:<TimeUnit>:EPOCH|TIMESTAMP'", - formatTokens[FORMAT_TIMEFORMAT_POSITION], format); - } else { - Preconditions - .checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()), - "Incorrect format type: %s in format: %s. Must be of '[0-9]+:<TimeUnit>:SIMPLE_DATE_FORMAT:pattern'", - formatTokens[FORMAT_TIMEFORMAT_POSITION], format); - DateTimeFormatPatternSpec.validateFormat(formatTokens[FORMAT_PATTERN_POSITION]); - } - return formatTokens; - } - - /** - * Validates the pipe format string in the dateTimeFieldSpec - */ - public static String[] validatePipeFormat(String format) { - Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null"); - String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR, MAX_FORMAT_TOKENS_PIPE); - Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS_PIPE - && formatTokens.length <= MAX_FORMAT_TOKENS_PIPE, - "Incorrect format: %s. Must be of the format 'EPOCH|<timeUnit>(|<size>)'" - + " or 'SDF|<timeFormat>(|<timezone>)' or 'TIMESTAMP'"); - if (formatTokens.length == MIN_FORMAT_TOKENS_PIPE) { - Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.TIMESTAMP.toString()), - "Incorrect format type: %s. Must be of TIMESTAMP", formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]); - } else { - Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString()) - || formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()), - "Incorrect format %s. Must be of 'EPOCH|<timeUnit>(|<size>)' or" + "'SDF|<timeFormat>(|<timezone>)'"); - - if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE - && formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())) { - Preconditions.checkState(formatTokens[EPOCH_SIZE_POSITION].matches(NUMBER_REGEX), - "Incorrect format size: %s in format: %s. Must be of format 'EPOCH|<timeUnit>|[0-9]+'", - formatTokens[EPOCH_SIZE_POSITION], format); - } - } - return formatTokens; - } - @Override public boolean equals(Object o) { - if (EqualityUtils.isSameReference(this, o)) { + if (this == o) { return true; } - - if (EqualityUtils.isNullOrNotSameClass(this, o)) { + if (o == null || getClass() != o.getClass()) { return false; } - DateTimeFormatSpec that = (DateTimeFormatSpec) o; - - return EqualityUtils.isEqual(_size, that._size) && EqualityUtils.isEqual(_format, that._format) && EqualityUtils - .isEqual(_unitSpec, that._unitSpec) && EqualityUtils.isEqual(_patternSpec, that._patternSpec); + return _size == that._size && _unitSpec.equals(that._unitSpec) && _patternSpec.equals(that._patternSpec); } @Override public int hashCode() { - int result = EqualityUtils.hashCodeOf(_format); - result = EqualityUtils.hashCodeOf(result, _size); - result = EqualityUtils.hashCodeOf(result, _unitSpec); - result = EqualityUtils.hashCodeOf(result, _patternSpec); - return result; + return Objects.hash(_size, _unitSpec, _patternSpec); } @Override public String toString() { - return "DateTimeFormatSpec{" + "_format='" + _format + '\'' + ", _size=" + _size + ", _unitSpec=" + _unitSpec - + ", _patternSpec=" + _patternSpec + '}'; + return "DateTimeFormatSpec{" + "_size=" + _size + ", _unitSpec=" + _unitSpec + ", _patternSpec=" + _patternSpec + + '}'; } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java index 56c11e057f..9052f2abc7 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java @@ -19,9 +19,9 @@ package org.apache.pinot.spi.data; import com.google.common.base.Preconditions; +import java.util.Objects; import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.EnumUtils; -import org.apache.pinot.spi.utils.EqualityUtils; import org.joda.time.DurationFieldType; import org.joda.time.chrono.ISOChronology; @@ -98,17 +98,24 @@ public class DateTimeFormatUnitSpec { public abstract long fromMillis(long millisSinceEpoch); } - private TimeUnit _timeUnit = null; - private DateTimeTransformUnit _dateTimeTransformUnit = null; + public static final DateTimeFormatUnitSpec MILLISECONDS = new DateTimeFormatUnitSpec(TimeUnit.MILLISECONDS.name()); + + private final TimeUnit _timeUnit; + private final DateTimeTransformUnit _dateTimeTransformUnit; public DateTimeFormatUnitSpec(String unit) { - validateUnitSpec(unit); if (EnumUtils.isValidEnum(TimeUnit.class, unit)) { _timeUnit = TimeUnit.valueOf(unit); + } else { + _timeUnit = null; } if (EnumUtils.isValidEnum(DateTimeTransformUnit.class, unit)) { _dateTimeTransformUnit = DateTimeTransformUnit.valueOf(unit); + } else { + _dateTimeTransformUnit = null; } + Preconditions.checkArgument(_timeUnit != null || _dateTimeTransformUnit != null, + "Unit must belong to enum TimeUnit or DateTimeTransformUnit, got: %s", unit); } public TimeUnit getTimeUnit() { @@ -119,32 +126,20 @@ public class DateTimeFormatUnitSpec { return _dateTimeTransformUnit; } - public static void validateUnitSpec(String unit) { - Preconditions.checkState( - EnumUtils.isValidEnum(TimeUnit.class, unit) || EnumUtils.isValidEnum(DateTimeTransformUnit.class, unit), - "Unit: %s must belong to enum TimeUnit or DateTimeTransformUnit", unit); - } - @Override public boolean equals(Object o) { - if (EqualityUtils.isSameReference(this, o)) { + if (this == o) { return true; } - - if (EqualityUtils.isNullOrNotSameClass(this, o)) { + if (o == null || getClass() != o.getClass()) { return false; } - DateTimeFormatUnitSpec that = (DateTimeFormatUnitSpec) o; - - return EqualityUtils.isEqual(_timeUnit, that._timeUnit) && EqualityUtils - .isEqual(_dateTimeTransformUnit, that._dateTimeTransformUnit); + return _timeUnit == that._timeUnit && _dateTimeTransformUnit == that._dateTimeTransformUnit; } @Override public int hashCode() { - int result = EqualityUtils.hashCodeOf(_timeUnit); - result = EqualityUtils.hashCodeOf(result, _dateTimeTransformUnit); - return result; + return Objects.hash(_timeUnit, _dateTimeTransformUnit); } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java index a4077d3a26..c8cd10eb15 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java @@ -18,28 +18,23 @@ */ package org.apache.pinot.spi.data; -import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import java.util.Objects; import java.util.concurrent.TimeUnit; -import org.apache.commons.lang3.EnumUtils; -import org.apache.pinot.spi.utils.EqualityUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.pinot.spi.utils.StringUtil; /** * Class to represent granularity from {@link DateTimeFieldSpec} */ public class DateTimeGranularitySpec { + // 'size:timeUnit' + private static final char SEPARATOR = ':'; + private static final int SIZE_POSITION = 0; + private static final int TIME_UNIT_POSITION = 1; + private static final int NUM_TOKENS = 2; - public static final String NUMBER_REGEX = "[1-9][0-9]*"; - - public static final String COLON_SEPARATOR = ":"; - - /* DateTimeFieldSpec granularity is of format size:timeUnit */ - public static final int GRANULARITY_SIZE_POSITION = 0; - public static final int GRANULARITY_UNIT_POSITION = 1; - public static final int MAX_GRANULARITY_TOKENS = 2; - - private final String _granularity; private final int _size; private final TimeUnit _timeUnit; @@ -47,25 +42,34 @@ public class DateTimeGranularitySpec { * Constructs a dateTimeGranularitySpec granularity from a string */ public DateTimeGranularitySpec(String granularity) { - validateGranularity(granularity); - _granularity = granularity; - String[] granularityTokens = _granularity.split(COLON_SEPARATOR); - _size = Integer.parseInt(granularityTokens[GRANULARITY_SIZE_POSITION]); - _timeUnit = TimeUnit.valueOf(granularityTokens[GRANULARITY_UNIT_POSITION]); + Preconditions.checkArgument(StringUtils.isNotEmpty(granularity), "Must provide granularity"); + String[] granularityTokens = StringUtil.split(granularity, SEPARATOR, 2); + Preconditions.checkArgument(granularityTokens.length >= NUM_TOKENS, + "Invalid granularity: %s, must be of format 'size:timeUnit", granularity); + try { + _size = Integer.parseInt(granularityTokens[SIZE_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid size: %s in granularity: %s", granularityTokens[SIZE_POSITION], granularity)); + } + Preconditions.checkArgument(_size > 0, "Invalid size: %s in granularity: %s, must be positive", _size, granularity); + try { + _timeUnit = TimeUnit.valueOf(granularityTokens[TIME_UNIT_POSITION]); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Invalid time unit: %s in granularity: %s", granularityTokens[TIME_UNIT_POSITION], + granularity)); + } } /** * Constructs a dateTimeGranularitySpec granularity given the components of a granularity */ - public DateTimeGranularitySpec(int columnSize, TimeUnit columnUnit) { - _granularity = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit); - validateGranularity(_granularity); - _size = columnSize; - _timeUnit = columnUnit; - } - - public String getGranularity() { - return _granularity; + public DateTimeGranularitySpec(int size, TimeUnit timeUnit) { + Preconditions.checkArgument(size > 0, "Invalid size: %s, must be positive", size); + Preconditions.checkArgument(timeUnit != null, "Must provide time unit"); + _size = size; + _timeUnit = timeUnit; } public int getSize() { @@ -77,55 +81,31 @@ public class DateTimeGranularitySpec { } /** + * Converts a granularity to millis. * <ul> - * <li>Convert a granularity to millis. - * This method should not do validation of outputGranularity. - * The validation should be handled by caller using {@link #validateGranularity}</li> - * <ul> - * <li>1) granularityToMillis(1:HOURS) = 3600000 (60*60*1000)</li> - * <li>2) granularityToMillis(1:MILLISECONDS) = 1</li> - * <li>3) granularityToMillis(15:MINUTES) = 900000 (15*60*1000)</li> - * </ul> + * <li>1) granularityToMillis(1:HOURS) = 3600000 (60*60*1000)</li> + * <li>2) granularityToMillis(1:MILLISECONDS) = 1</li> + * <li>3) granularityToMillis(15:MINUTES) = 900000 (15*60*1000)</li> * </ul> */ public long granularityToMillis() { return TimeUnit.MILLISECONDS.convert(_size, _timeUnit); } - /** - * Check correctness of granularity of {@link DateTimeFieldSpec} - */ - public static void validateGranularity(String granularity) { - Preconditions.checkNotNull(granularity, "Granularity string in dateTimeFieldSpec must not be null"); - - String[] granularityTokens = granularity.split(COLON_SEPARATOR); - Preconditions.checkState(granularityTokens.length == MAX_GRANULARITY_TOKENS, - "Incorrect granularity: %s. Must be of format 'size:timeunit'", granularity); - Preconditions.checkState(granularityTokens[GRANULARITY_SIZE_POSITION].matches(NUMBER_REGEX), - "Incorrect granularity size: %s. Must be of format '[0-9]+:<TimeUnit>'", - granularityTokens[GRANULARITY_SIZE_POSITION]); - Preconditions.checkState(EnumUtils.isValidEnum(TimeUnit.class, granularityTokens[GRANULARITY_UNIT_POSITION]), - "Incorrect granularity size: %s. Must be of format '[0-9]+:<TimeUnit>'", - granularityTokens[GRANULARITY_SIZE_POSITION]); - } - @Override public boolean equals(Object o) { - if (EqualityUtils.isSameReference(this, o)) { + if (this == o) { return true; } - - if (EqualityUtils.isNullOrNotSameClass(this, o)) { + if (o == null || getClass() != o.getClass()) { return false; } - DateTimeGranularitySpec that = (DateTimeGranularitySpec) o; - - return EqualityUtils.isEqual(_granularity, that._granularity); + return _size == that._size && _timeUnit == that._timeUnit; } @Override public int hashCode() { - return EqualityUtils.hashCodeOf(_granularity); + return Objects.hash(_size, _timeUnit); } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java index 0670f76e27..9c5a3cc42b 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java @@ -37,12 +37,12 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; -import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.data.FieldSpec.FieldType; import org.apache.pinot.spi.utils.EqualityUtils; import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.spi.utils.StringUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -751,16 +751,26 @@ public final class Schema implements Serializable { int outgoingTimeSize = outgoingGranularitySpec.getTimeUnitSize(); TimeUnit outgoingTimeUnit = outgoingGranularitySpec.getTimeType(); String outgoingTimeFormat = outgoingGranularitySpec.getTimeFormat(); - String[] split = StringUtils.split(outgoingTimeFormat, DateTimeFormatSpec.COLON_SEPARATOR, 2); - DateTimeFormatSpec formatSpec; - if (split[0].equals(DateTimeFieldSpec.TimeFormat.EPOCH.toString())) { - formatSpec = new DateTimeFormatSpec(outgoingTimeSize, outgoingTimeUnit.toString(), split[0]); + String[] split = StringUtil.split(outgoingTimeFormat, ':', 2); + String timeFormat; + if (split[0].equals(DateTimeFieldSpec.TimeFormat.EPOCH.name())) { + timeFormat = outgoingTimeSize + ":" + outgoingTimeUnit.name() + ":EPOCH"; } else { - formatSpec = new DateTimeFormatSpec(outgoingTimeSize, outgoingTimeUnit.toString(), split[0], split[1]); - } - dateTimeFieldSpec.setFormat(formatSpec.getFormat()); + timeFormat = outgoingTimeSize + ":" + outgoingTimeUnit.name() + ":SIMPLE_DATE_FORMAT:" + split[1]; + } + // TODO: Switch to new format after releasing 0.11.0 +// if (split[0].equals(DateTimeFieldSpec.TimeFormat.EPOCH.name())) { +// timeFormat = "EPOCH|" + outgoingTimeUnit.name(); +// if (outgoingTimeSize != 1) { +// timeFormat += "|" + outgoingTimeSize; +// } +// timeFormat = outgoingTimeSize + ":" + outgoingTimeUnit.name() + ":EPOCH"; +// } else { +// timeFormat = "SIMPLE_DATE_FORMAT|" + split[1]; +// } + dateTimeFieldSpec.setFormat(timeFormat); DateTimeGranularitySpec granularitySpec = new DateTimeGranularitySpec(outgoingTimeSize, outgoingTimeUnit); - dateTimeFieldSpec.setGranularity(granularitySpec.getGranularity()); + dateTimeFieldSpec.setGranularity(outgoingTimeSize + ":" + outgoingTimeUnit.name()); if (timeFieldSpec.getTransformFunction() != null) { dateTimeFieldSpec.setTransformFunction(timeFieldSpec.getTransformFunction()); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java index 13b638915d..d236bd76f0 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java @@ -55,8 +55,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; -import org.apache.pinot.spi.data.DateTimeFormatSpec; -import org.apache.pinot.spi.data.DateTimeGranularitySpec; import org.apache.pinot.spi.data.DimensionFieldSpec; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -598,9 +596,11 @@ public class JsonUtils { case DATE_TIME: Preconditions.checkState(isSingleValueField, "Time field: %s cannot be multi-valued", name); Preconditions.checkNotNull(timeUnit, "Time unit cannot be null"); - pinotSchema.addField(new DateTimeFieldSpec(name, dataType, - new DateTimeFormatSpec(1, timeUnit.toString(), DateTimeFieldSpec.TimeFormat.EPOCH.toString()).getFormat(), - new DateTimeGranularitySpec(1, timeUnit).getGranularity())); + // TODO: Switch to new format after releasing 0.11.0 + // "EPOCH|" + timeUnit.name() + String format = "1:" + timeUnit.name() + ":EPOCH"; + String granularity = "1:" + timeUnit.name(); + pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format, granularity)); break; default: throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + name); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java index 6939a50b13..d7d2d25875 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java @@ -18,6 +18,9 @@ */ package org.apache.pinot.spi.utils; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; @@ -35,6 +38,35 @@ public class StringUtil { return StringUtils.join(keys, separator); } + /** + * Splits the given string with the separator, returns an array with the given max length. When max <= 0, no limit is + * applied. + */ + public static String[] split(String str, char separator, int max) { + int length = str.length(); + if (length == 0) { + return ArrayUtils.EMPTY_STRING_ARRAY; + } + if (max == 1) { + return new String[]{str}; + } + List<String> list = new ArrayList<>(max); + int start = 0; + int end = 0; + while (end < length) { + if (str.charAt(end) == separator) { + list.add(str.substring(start, end)); + start = end + 1; + if (list.size() == max - 1) { + break; + } + } + end++; + } + list.add(str.substring(start, length)); + return list.toArray(new String[0]); + } + /** * Sanitizes a string value. * <ul> diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java index 3a730f2fc2..7d721c5290 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java @@ -18,26 +18,72 @@ */ package org.apache.pinot.spi.data; +import java.util.TimeZone; +import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat; +import org.joda.time.DateTimeZone; import org.testng.annotations.Test; +import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertThrows; + public class DateTimeFormatPatternSpecTest { @Test public void testValidateFormat() { - DateTimeFormatPatternSpec.validateFormat("yyyy-MM-dd"); - DateTimeFormatPatternSpec.validateFormat("yyyyMMdd tz(CST)"); - DateTimeFormatPatternSpec.validateFormat("yyyyMMdd HH tz(GMT+0700)"); - DateTimeFormatPatternSpec.validateFormat("yyyyMMddHH tz(America/Chicago)"); - - // Unknown tz is treated as UTC - DateTimeFormatPatternSpec.validateFormat("yyyyMMdd tz(CSEMT)"); - DateTimeFormatPatternSpec.validateFormat("yyyyMMdd tz(GMT+5000)"); - DateTimeFormatPatternSpec.validateFormat("yyyyMMddHH tz(HAHA/Chicago)"); - - // invalid chars will throw - assertThrows(IllegalStateException.class, () -> DateTimeFormatPatternSpec.validateFormat("yyyc-MM-dd")); - assertThrows(IllegalStateException.class, () -> DateTimeFormatPatternSpec.validateFormat("yyyy-MM-dd ff(a)")); + DateTimeFormatPatternSpec dateTimeFormatPatternSpec = + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyy-MM-dd"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyy-MM-dd"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyy-MM-dd", null)); + + dateTimeFormatPatternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(CST)"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.forTimeZone(TimeZone.getTimeZone("CST"))); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "CST")); + + dateTimeFormatPatternSpec = + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH tz(GMT+0700)"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd HH"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), + DateTimeZone.forTimeZone(TimeZone.getTimeZone("GMT+0700"))); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", "GMT+0700")); + + dateTimeFormatPatternSpec = + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMddHH tz(America/Chicago)"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMddHH"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), + DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Chicago"))); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMddHH", "America/Chicago")); + + // Unknown time zone is treated as UTC + dateTimeFormatPatternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(CSEMT)"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "CSEMT")); + + dateTimeFormatPatternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(GMT+5000)"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "GMT+5000")); + + dateTimeFormatPatternSpec = + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(HAHA/Chicago)"); + assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd"); + assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC); + assertEquals(dateTimeFormatPatternSpec, + new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "HAHA/Chicago")); + + // Invalid pattern + assertThrows(IllegalArgumentException.class, + () -> new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyc-MM-dd")); + assertThrows(IllegalArgumentException.class, + () -> new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyy-MM-dd ff(a)")); } } diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java index b7feae902e..8922568884 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java @@ -18,24 +18,75 @@ */ package org.apache.pinot.spi.data; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; +import org.joda.time.DateTimeZone; import org.testng.annotations.Test; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; import static org.testng.Assert.assertThrows; public class DateTimeFormatSpecTest { @Test - public void testValidateFormat() { - DateTimeFormatSpec.validateFormat("1:DAYS:EPOCH"); - DateTimeFormatSpec.validateFormat("1:DAYS:TIMESTAMP"); - DateTimeFormatSpec.validateFormat("1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd"); - assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY")); - assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("one:DAYS:EPOCH")); - assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY:EPOCH")); - assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY:EPOCH:yyyyMMdd")); - assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY:TIMESTAMP:yyyyMMdd")); - assertThrows(IllegalStateException.class, - () -> DateTimeFormatSpec.validateFormat("1:DAY:SIMPLE_DATE_FORMAT:yyycMMdd")); + public void testDateTimeFormatSpec() { + DateTimeFormatSpec dateTimeFormatSpec = new DateTimeFormatSpec("5:DAYS:EPOCH"); + assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.EPOCH); + assertEquals(dateTimeFormatSpec.getColumnSize(), 5); + assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.DAYS); + assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(), + DateTimeFormatUnitSpec.DateTimeTransformUnit.DAYS); + assertNull(dateTimeFormatSpec.getSDFPattern()); + + assertEquals(new DateTimeFormatSpec("EPOCH|DAYS|5"), dateTimeFormatSpec); + + dateTimeFormatSpec = new DateTimeFormatSpec("1:DAYS:TIMESTAMP"); + assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.TIMESTAMP); + assertEquals(dateTimeFormatSpec.getColumnSize(), 1); + assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.MILLISECONDS); + assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(), + DateTimeFormatUnitSpec.DateTimeTransformUnit.MILLISECONDS); + assertNull(dateTimeFormatSpec.getSDFPattern()); + + assertEquals(new DateTimeFormatSpec("TIMESTAMP"), dateTimeFormatSpec); + + dateTimeFormatSpec = new DateTimeFormatSpec("1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd"); + assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT); + assertEquals(dateTimeFormatSpec.getColumnSize(), 1); + assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.MILLISECONDS); + assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(), + DateTimeFormatUnitSpec.DateTimeTransformUnit.MILLISECONDS); + assertEquals(dateTimeFormatSpec.getSDFPattern(), "yyyyMMdd"); + assertEquals(dateTimeFormatSpec.getDateTimezone(), DateTimeZone.UTC); + + assertEquals(new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyyMMdd"), dateTimeFormatSpec); + + dateTimeFormatSpec = new DateTimeFormatSpec("1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd tz(CST)"); + assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT); + assertEquals(dateTimeFormatSpec.getColumnSize(), 1); + assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.MILLISECONDS); + assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(), + DateTimeFormatUnitSpec.DateTimeTransformUnit.MILLISECONDS); + assertEquals(dateTimeFormatSpec.getSDFPattern(), "yyyy-MM-dd"); + assertEquals(dateTimeFormatSpec.getDateTimezone(), DateTimeZone.forTimeZone(TimeZone.getTimeZone("CST"))); + + assertEquals(new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyy-MM-dd|CST"), dateTimeFormatSpec); + + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY")); + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH")); + + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("one:DAYS:EPOCH")); + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|DAYS|one")); + + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY:EPOCH")); + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|DAY")); + + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY:EPOCH:yyyyMMdd")); + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|yyyyMMdd")); + + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY:SIMPLE_DATE_FORMAT:yyycMMdd")); + assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyycMMdd")); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org