This is an automated email from the ASF dual-hosted git repository. snlee pushed a commit to branch release-0.8.0-rc in repository https://gitbox.apache.org/repos/asf/pinot.git
commit a851c2c8b759fd4e63d4864412c286239d4ee3bf Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Thu Aug 5 23:59:23 2021 -0700 Treat STRING to BOOLEAN data type change as backward compatible schema change (#7259) Before the native BOOLEAN support, BOOLEAN data type is stored as STRING within the schema. To keep existing schema backward compatible, when the new field spec has BOOLEAN data type and the old field spec has STRING data type, set the new field spec's data type to STRING. --- .../org/apache/pinot/common/data/SchemaTest.java | 48 ++++++++++++++++++++++ .../helix/core/PinotHelixResourceManager.java | 7 ++-- .../java/org/apache/pinot/spi/data/Schema.java | 31 +++++++++++--- 3 files changed, 76 insertions(+), 10 deletions(-) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/SchemaTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/SchemaTest.java index 5f2b612..47b536f 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/data/SchemaTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/data/SchemaTest.java @@ -393,4 +393,52 @@ public class SchemaTest { .addDateTime("dateTime", FieldSpec.DataType.LONG, "1:HOURS:EPOCH", "1:HOURS").build(); Assert.assertTrue(schema6.isBackwardCompatibleWith(oldSchema)); } + + @Test + public void testStringToBooleanSchemaBackwardCompatibility() { + Schema oldSchema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", FieldSpec.DataType.INT) + .addSingleValueDimension("svString", FieldSpec.DataType.STRING) + .addSingleValueDimension("svStringWithDefault", FieldSpec.DataType.STRING, "false").build(); + + // INT to BOOLEAN - incompatible + Schema newSchema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", FieldSpec.DataType.BOOLEAN) + .addSingleValueDimension("svString", FieldSpec.DataType.STRING) + .addSingleValueDimension("svStringWithDefault", FieldSpec.DataType.STRING, "false").build(); + newSchema.updateBooleanFieldsIfNeeded(oldSchema); + Assert.assertFalse(newSchema.isBackwardCompatibleWith(oldSchema)); + + // STRING to BOOLEAN - compatible + newSchema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", FieldSpec.DataType.INT) + .addSingleValueDimension("svString", FieldSpec.DataType.BOOLEAN) + .addSingleValueDimension("svStringWithDefault", FieldSpec.DataType.STRING, "false").build(); + newSchema.updateBooleanFieldsIfNeeded(oldSchema); + Assert.assertTrue(newSchema.isBackwardCompatibleWith(oldSchema)); + Assert.assertEquals(newSchema, oldSchema); + + // STRING with default to BOOLEAN with default - compatible + newSchema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", FieldSpec.DataType.INT) + .addSingleValueDimension("svString", FieldSpec.DataType.STRING) + .addSingleValueDimension("svStringWithDefault", FieldSpec.DataType.BOOLEAN, "false").build(); + newSchema.updateBooleanFieldsIfNeeded(oldSchema); + Assert.assertTrue(newSchema.isBackwardCompatibleWith(oldSchema)); + Assert.assertEquals(newSchema, oldSchema); + + // STRING with default to BOOLEAN without default - incompatible + newSchema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", FieldSpec.DataType.INT) + .addSingleValueDimension("svString", FieldSpec.DataType.STRING) + .addSingleValueDimension("svStringWithDefault", FieldSpec.DataType.BOOLEAN).build(); + newSchema.updateBooleanFieldsIfNeeded(oldSchema); + Assert.assertFalse(newSchema.isBackwardCompatibleWith(oldSchema)); + + // New added BOOLEAN - compatible + newSchema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", FieldSpec.DataType.INT) + .addSingleValueDimension("svString", FieldSpec.DataType.STRING) + .addSingleValueDimension("svStringWithDefault", FieldSpec.DataType.STRING, "false") + .addSingleValueDimension("svBoolean", FieldSpec.DataType.BOOLEAN) + .addSingleValueDimension("svBooleanWithDefault", FieldSpec.DataType.BOOLEAN, true).build(); + newSchema.updateBooleanFieldsIfNeeded(oldSchema); + Assert.assertTrue(newSchema.isBackwardCompatibleWith(oldSchema)); + Assert.assertEquals(newSchema.getFieldSpecFor("svBoolean").getDataType(), FieldSpec.DataType.BOOLEAN); + Assert.assertEquals(newSchema.getFieldSpecFor("svBooleanWithDefault").getDataType(), FieldSpec.DataType.BOOLEAN); + } } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java index e880875..c52df5e 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java @@ -1066,7 +1066,6 @@ public class PinotHelixResourceManager { public void updateSchema(Schema schema, boolean reload) throws SchemaNotFoundException, SchemaBackwardIncompatibleException, TableNotFoundException { - ZNRecord record = SchemaUtils.toZNRecord(schema); String schemaName = schema.getSchemaName(); Schema oldSchema = ZKMetadataProvider.getSchema(_propertyStore, schemaName); @@ -1074,6 +1073,8 @@ public class PinotHelixResourceManager { throw new SchemaNotFoundException(String.format("Schema %s did not exist.", schemaName)); } + schema.updateBooleanFieldsIfNeeded(oldSchema); + if (schema.equals(oldSchema)) { LOGGER.info("New schema is the same with the existing schema. Not updating schema " + schemaName); return; @@ -1084,9 +1085,7 @@ public class PinotHelixResourceManager { String.format("New schema %s is not backward compatible with the current schema", schemaName)); } - PinotHelixPropertyStoreZnRecordProvider propertyStoreHelper = - PinotHelixPropertyStoreZnRecordProvider.forSchema(_propertyStore); - propertyStoreHelper.set(schemaName, record); + ZKMetadataProvider.setSchema(_propertyStore, schema); if (reload) { LOGGER.info("Reloading tables with name: {}", schemaName); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java index b8ab67b5..2a2a219 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java @@ -46,9 +46,6 @@ import org.apache.pinot.spi.utils.JsonUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.pinot.spi.data.FieldSpec.DataType.JSON; -import static org.apache.pinot.spi.data.FieldSpec.DataType.STRING; - /** * The <code>Schema</code> class is defined for each table to describe the details of the table's fields (columns). @@ -216,7 +213,7 @@ public final class Schema implements Serializable { throw new UnsupportedOperationException("Unsupported field type: " + fieldType); } - _hasJSONColumn |= fieldSpec.getDataType().equals(JSON); + _hasJSONColumn |= fieldSpec.getDataType().equals(DataType.JSON); _fieldSpecMap.put(columnName, fieldSpec); } @@ -521,7 +518,8 @@ public final class Schema implements Serializable { */ public SchemaBuilder addSingleValueDimension(String dimensionName, DataType dataType, int maxLength, Object defaultNullValue) { - Preconditions.checkArgument(dataType == STRING, "The maxLength field only applies to STRING field right now"); + Preconditions + .checkArgument(dataType == DataType.STRING, "The maxLength field only applies to STRING field right now"); _schema.addField(new DimensionFieldSpec(dimensionName, dataType, true, maxLength, defaultNullValue)); return this; } @@ -547,7 +545,8 @@ public final class Schema implements Serializable { */ public SchemaBuilder addMultiValueDimension(String dimensionName, DataType dataType, int maxLength, Object defaultNullValue) { - Preconditions.checkArgument(dataType == STRING, "The maxLength field only applies to STRING field right now"); + Preconditions + .checkArgument(dataType == DataType.STRING, "The maxLength field only applies to STRING field right now"); _schema.addField(new DimensionFieldSpec(dimensionName, dataType, false, maxLength, defaultNullValue)); return this; } @@ -658,6 +657,26 @@ public final class Schema implements Serializable { } /** + * Updates fields with BOOLEAN data type to STRING if the data type in the old schema is STRING. + * + * BOOLEAN data type was stored as STRING within the schema before release 0.8.0. In release 0.8.0, we introduced + * native BOOLEAN support and BOOLEAN data type is no longer replaced with STRING. + * To keep the existing schema backward compatible, when the new field spec has BOOLEAN data type and the old field + * spec has STRING data type, set the new field spec's data type to STRING. + */ + public void updateBooleanFieldsIfNeeded(Schema oldSchema) { + for (Map.Entry<String, FieldSpec> entry : _fieldSpecMap.entrySet()) { + FieldSpec fieldSpec = entry.getValue(); + if (fieldSpec.getDataType() == DataType.BOOLEAN) { + FieldSpec oldFieldSpec = oldSchema.getFieldSpecFor(entry.getKey()); + if (oldFieldSpec != null && oldFieldSpec.getDataType() == DataType.STRING) { + fieldSpec.setDataType(DataType.STRING); + } + } + } + } + + /** * Check whether the current schema is backward compatible with oldSchema. * Backward compatibility requires all columns and fieldSpec in oldSchema should be retained. * --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org