Jackie-Jiang commented on code in PR #7358: URL: https://github.com/apache/pinot/pull/7358#discussion_r850814130
########## pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractor.java: ########## @@ -33,14 +35,19 @@ /** - * Extractor for Avro Records + * Extractor for Avro Records Review Comment: (minor) revert ########## pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroSchemaUtil.java: ########## @@ -114,4 +137,39 @@ private static ArrayNode convertStringsToJsonArray(String... strings) { } return jsonArray; } + + /** + * Applies the logical type conversion to the given Avro record field. If there isn't a logical + * type for the value then the value is returned unchanged. If there is a logical type associated + * to the field but no Avro conversion is known for the type then the value is returned unchanged. + * + * @param field Avro field spec + * @param value Value of the field + * @return Converted value as per the logical type in the spec, or the unchanged value if a + * logical type or conversion can't be found. + */ + public static Object applyLogicalType(Schema.Field field, Object value) { + if (field == null || field.schema() == null) { + return value; + } + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(field.schema()); + if (logicalType == null) { + return value; + } + Conversion<?> conversion = AvroSchemaUtil.findConversionFor(logicalType.getName()); + if (conversion == null) { + return value; + } + return Conversions.convertToLogicalType(value, field.schema(), logicalType, conversion); + } + + static { Review Comment: (minor) Let's put this static block next to the map construction for readability ########## pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroSchemaUtil.java: ########## @@ -20,18 +20,41 @@ import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import java.util.HashMap; +import java.util.Map; +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; +import org.apache.avro.data.TimeConversions; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.JsonUtils; public class AvroSchemaUtil { + /* + * These constants are copied from org.apache.avro.LogicalTypes + */ + private static final String DECIMAL = "decimal"; + private static final String UUID = "uuid"; + private static final String DATE = "date"; + private static final String TIME_MILLIS = "time-millis"; + private static final String TIME_MICROS = "time-micros"; + private static final String TIMESTAMP_MILLIS = "timestamp-millis"; + private static final String TIMESTAMP_MICROS = "timestamp-micros"; + private static final Map<String, Conversion<?>> CONVERSION_MAP = new HashMap<>(); + private AvroSchemaUtil() { } + public static Conversion<?> findConversionFor(String typeName) { + return CONVERSION_MAP.get(typeName); + } + /** - * Returns the data type stored in Pinot that is associated with the given Avro type. + * Returns the data type stored in Pinot that is associated with the given Avro type. Review Comment: (minor) Revert, same for other places (is this auto-formatted?) ########## pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractor.java: ########## @@ -107,4 +121,25 @@ protected Object convertRecord(Object value) { } return convertedMap; } + + /** + * This method convert any Avro logical-type converted (or not) value to a class supported by + * Pinot {@link GenericRow} + * + * Note that at the moment BigDecimal is converted to Pinot double which may lead to precision loss or may not be + * represented at all. + * Similarly, timestamp microsecond precision is not supported at the moment. These values will get converted to + * millisecond precision. + */ + @Override + protected Object convertSingleValue(Object value) { + if (value instanceof BigDecimal) { + return ((BigDecimal) value).doubleValue(); + } + if (value instanceof Instant) { Review Comment: Seems the `Instant` is always converted from the timestamp type, so we might want to return the `Timestamp` object here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org