walterddr commented on code in PR #9389: URL: https://github.com/apache/pinot/pull/9389#discussion_r973387843
########## pinot-common/src/main/java/org/apache/pinot/common/request/context/LiteralContext.java: ########## @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.request.context; + +import com.google.common.base.Preconditions; +import java.util.Objects; +import javax.annotation.Nullable; +import org.apache.pinot.common.request.Literal; +import org.apache.pinot.spi.data.FieldSpec; + + +/** + * The {@code LiteralContext} class represents a literal in the query. + * <p>This includes both value and type information. We translate thrift literal to this representation in server. + * Currently, only Boolean literal is correctly encoded in thrift and passed in. + * All integers are encoded as LONG in thrift, and the other numerical types are encoded as DOUBLE. + * The remaining types are encoded as STRING. + */ +public class LiteralContext { + // TODO: Support all of the types for sql. + private FieldSpec.DataType _type; + private Object _value; + + private static FieldSpec.DataType convertThriftTypeToDataType(Literal._Fields fields) { + switch (fields) { + case SHORT_VALUE: + case INT_VALUE: + return FieldSpec.DataType.INT; + case LONG_VALUE: + return FieldSpec.DataType.LONG; + case BOOL_VALUE: + return FieldSpec.DataType.BOOLEAN; + case DOUBLE_VALUE: + return FieldSpec.DataType.DOUBLE; + case STRING_VALUE: + return FieldSpec.DataType.STRING; + default: + throw new UnsupportedOperationException("Unsupported literal type:" + fields); + } + } + + private static Class<?> convertDataTypeToJavaType(FieldSpec.DataType dataType) { + switch (dataType) { + case INT: + return Integer.class; + case LONG: + return Long.class; + case BOOLEAN: + return Boolean.class; + case FLOAT: + return Float.class; + case DOUBLE: + return Double.class; + case STRING: + return String.class; + default: + throw new UnsupportedOperationException("Unsupported dataType:" + dataType); + } + } Review Comment: what's the purpose of this. we have PinotDataType for this right? ########## pinot-common/src/main/java/org/apache/pinot/common/request/context/ExpressionContext.java: ########## @@ -94,7 +113,7 @@ public boolean equals(Object o) { return false; } ExpressionContext that = (ExpressionContext) o; - return _type == that._type && Objects.equals(_value, that._value) && Objects.equals(_function, that._function); + return _type.equals(that._type) && Objects.equals(_identifier, that._identifier) && Objects.equals(_function, that._function) && Objects.equals(_literal, that._literal); Review Comment: this might have backward compatibility issue? e.g. if `_value` was still storing literal in broker but server upgraded to newer version and stored literal in `_literal` field. ########## pinot-core/src/main/java/org/apache/pinot/core/data/table/TableResizer.java: ########## @@ -124,7 +124,7 @@ public TableResizer(DataSchema dataSchema, QueryContext queryContext) { */ private OrderByValueExtractor getOrderByValueExtractor(ExpressionContext expression) { if (expression.getType() == ExpressionContext.Type.LITERAL) { - return new LiteralExtractor(expression.getLiteral()); + return new LiteralExtractor(expression.getLiteralString()); Review Comment: i think we can change LiteralExtractor to actually extract the literal. all literals we supported now are comparable. thus we can carry the type into it ########## pinot-common/src/main/java/org/apache/pinot/common/request/context/LiteralContext.java: ########## @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.request.context; + +import com.google.common.base.Preconditions; +import java.util.Objects; +import javax.annotation.Nullable; +import org.apache.pinot.common.request.Literal; +import org.apache.pinot.spi.data.FieldSpec; + + +/** + * The {@code LiteralContext} class represents a literal in the query. + * <p>This includes both value and type information. We translate thrift literal to this representation in server. + * Currently, only Boolean literal is correctly encoded in thrift and passed in. + * All integers are encoded as LONG in thrift, and the other numerical types are encoded as DOUBLE. + * The remaining types are encoded as STRING. + */ +public class LiteralContext { + // TODO: Support all of the types for sql. + private FieldSpec.DataType _type; + private Object _value; + + private static FieldSpec.DataType convertThriftTypeToDataType(Literal._Fields fields) { + switch (fields) { + case SHORT_VALUE: + case INT_VALUE: + return FieldSpec.DataType.INT; + case LONG_VALUE: + return FieldSpec.DataType.LONG; + case BOOL_VALUE: + return FieldSpec.DataType.BOOLEAN; + case DOUBLE_VALUE: + return FieldSpec.DataType.DOUBLE; + case STRING_VALUE: + return FieldSpec.DataType.STRING; + default: + throw new UnsupportedOperationException("Unsupported literal type:" + fields); + } + } + + private static Class<?> convertDataTypeToJavaType(FieldSpec.DataType dataType) { + switch (dataType) { + case INT: + return Integer.class; + case LONG: + return Long.class; + case BOOLEAN: + return Boolean.class; + case FLOAT: + return Float.class; + case DOUBLE: + return Double.class; + case STRING: + return String.class; + default: + throw new UnsupportedOperationException("Unsupported dataType:" + dataType); + } + } Review Comment: IMO we should focus on having Literal store `DataType` and `Value`. all conversion logics should be outside of LiteralContext ########## pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java: ########## @@ -352,10 +352,10 @@ public static FilterContext getFilter(FunctionContext filterFunction) { } private static String getStringValue(ExpressionContext expressionContext) { - if (expressionContext.getType() != ExpressionContext.Type.LITERAL) { + if(expressionContext.getType() != ExpressionContext.Type.LITERAL){ throw new BadQueryRequestException( "Pinot does not support column or function on the right-hand side of the predicate"); } - return expressionContext.getLiteral(); + return expressionContext.getLiteralString(); } Review Comment: add a new function ``` getLiteralContext(ExpressionContext expressionContext) { // ... } ``` and make ``` @Deprecated getStringValue() { LiteralContext literalContext = getLiteralContext(...); return literalContext.getDataType().convertToString(literalContext.getValue()); } ``` later we make all the usage of getStringValue() go to getLiteralContext ########## pinot-common/src/main/java/org/apache/pinot/common/request/context/ExpressionContext.java: ########## @@ -31,41 +33,58 @@ */ public class ExpressionContext { public enum Type { - LITERAL, IDENTIFIER, FUNCTION + LITERAL, IDENTIFIER, FUNCTION, } private final Type _type; - private final String _value; + private final String _identifier; private final FunctionContext _function; + // Only set when the _type is LITERAL + private final LiteralContext _literal; - public static ExpressionContext forLiteral(String literal) { - return new ExpressionContext(Type.LITERAL, literal, null); + public static ExpressionContext forLiteralContext(Literal literal){ + return new ExpressionContext(Type.LITERAL, null, null, new LiteralContext(literal)); + } + + public static ExpressionContext forLiteralContext(FieldSpec.DataType type, Object val){ + return new ExpressionContext(Type.LITERAL, null, null, new LiteralContext(type, val)); } public static ExpressionContext forIdentifier(String identifier) { - return new ExpressionContext(Type.IDENTIFIER, identifier, null); + return new ExpressionContext(Type.IDENTIFIER, identifier, null, null); } public static ExpressionContext forFunction(FunctionContext function) { - return new ExpressionContext(Type.FUNCTION, null, function); + return new ExpressionContext(Type.FUNCTION, null, function, null); } - private ExpressionContext(Type type, String value, FunctionContext function) { + private ExpressionContext(Type type, String value, FunctionContext function, LiteralContext literal) { _type = type; - _value = value; + _identifier = value; _function = function; + _literal = literal; + } + + // TODO: Refactor all of the usage for getLiteralString. + @Deprecated + public String getLiteralString() { Review Comment: hmm. I am all for the rule don't add stuff that's annotated with deprecate. but i was wondering what's the alternative if we don't add this? ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java: ########## @@ -101,7 +101,8 @@ public static AggregationFunction getAggregationFunction(FunctionContext functio } else if (numArguments == 2) { // Double arguments percentile (e.g. percentile(foo, 99), percentileTDigest(bar, 95), etc.) where the // second argument is a decimal number from 0.0 to 100.0. - double percentile = parsePercentileToDouble(arguments.get(1).getLiteral()); + // Have to use literal string because we need to cast int to double here. + double percentile = parsePercentileToDouble(arguments.get(1).getLiteralString()); Review Comment: here and the rest of the place. can we do implicit casting using PinotDataType? instead of using Stirng? ########## pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java: ########## @@ -114,7 +115,15 @@ public static Expression getLiteralExpression(SqlLiteral node) { literal.setDoubleValue(node.bigDecimalValue().doubleValue()); } } else { - literal.setStringValue(StringUtils.replace(node.toValue(), "''", "'")); + // TODO: Support null literal and other types. + switch (node.getTypeName()) { + case BOOLEAN: + literal.setBoolValue(node.booleanValue()); + break; + default: + literal.setStringValue(StringUtils.replace(node.toValue(), "''", "'")); + break; + } Review Comment: IMO this can be added later. since @Jackie-Jiang might have some idea regarding bool <-> string casting with or w/o dictionary ########## pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/LiteralTransformFunction.java: ########## @@ -58,21 +61,24 @@ public class LiteralTransformFunction implements TransformFunction { private String[] _stringResult; private byte[][] _bytesResult; - public LiteralTransformFunction(String literal) { - _literal = literal; - _dataType = inferLiteralDataType(literal); - if (_dataType.isNumeric()) { - _bigDecimalLiteral = new BigDecimal(_literal); - } else if (_dataType == DataType.BOOLEAN) { - _bigDecimalLiteral = PinotDataType.BOOLEAN.toBigDecimal(Boolean.valueOf(literal)); - } else if (_dataType == DataType.TIMESTAMP) { - // inferLiteralDataType successfully interpreted the literal as TIMESTAMP. _bigDecimalLiteral is populated and - // assigned to _longLiteral. - _bigDecimalLiteral = PinotDataType.TIMESTAMP.toBigDecimal(Timestamp.valueOf(literal)); + public LiteralTransformFunction(LiteralContext literalContext) { + Preconditions.checkNotNull(literalContext); + _literal = literalContext.getValue() == null ? "" : literalContext.getValue().toString(); + if (literalContext.getType() == DataType.BOOLEAN) { + _bigDecimalLiteral = PinotDataType.BOOLEAN.toBigDecimal(literalContext.getValue()); + _dataType = DataType.BOOLEAN; } else { - _bigDecimalLiteral = BigDecimal.ZERO; + _dataType = inferLiteralDataType(_literal); Review Comment: IMO we should not infer anymore. but for now we can keep this. and add a TODO: I think this is a hacky way of "EXPRESSING" literal using String. keep in mind that we only can express number / floating number / varchar / bool in SQL syntax. but it doesn't prevent user from using something like `CAST(1 AS DECIMAL) ` to construct a BIG_DECIMAL type literal context. we should add the rest in the future (but not along with the thrift conversion logic. as they will never be passed in as thrift, they will be optimized out by compileTimeLiteralFunctionInvoker -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org