Jackie-Jiang commented on code in PR #12763: URL: https://github.com/apache/pinot/pull/12763#discussion_r1610608990
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java: ########## @@ -571,21 +573,53 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct int numDocs = _segmentMetadata.getTotalDocs(); Object[] outputValues = new Object[numDocs]; PinotDataType outputValueType = null; + + FieldSpec fieldSpec = _schema.getFieldSpecFor(column); + Object defaultNullValue = fieldSpec.getDefaultNullValue(); + // Just log the first function evaluation error + int functionEvaluateErrorCount = 0; + Exception functionEvalError = null; + Object[] inputValuesWithError = null; + for (int i = 0; i < numDocs; i++) { for (int j = 0; j < numArguments; j++) { inputValues[j] = valueReaders.get(j).getValue(i); } - Object outputValue = functionEvaluator.evaluate(inputValues); - outputValues[i] = outputValue; - if (outputValueType == null) { - Class<?> outputValueClass = outputValue.getClass(); - outputValueType = FunctionUtils.getArgumentType(outputValueClass); - Preconditions.checkState(outputValueType != null, "Unsupported output value class: %s", outputValueClass); + try { + Object outputValue = functionEvaluator.evaluate(inputValues); + outputValues[i] = outputValue; + if (outputValueType == null) { + Class<?> outputValueClass = outputValue.getClass(); + outputValueType = FunctionUtils.getArgumentType(outputValueClass); + Preconditions.checkState(outputValueType != null, "Unsupported output value class: %s", outputValueClass); + } + } catch (Exception e) { + if (!errorOnFailure) { + functionEvaluateErrorCount++; + // Just record one error to log after the loop + if (functionEvalError == null) { + functionEvalError = e; + inputValuesWithError = Arrays.copyOf(inputValues, inputValues.length); + } + outputValues[i] = defaultNullValue; Review Comment: We cannot directly put default value here. Say scalar function returns `TIMESTAMP`, the field is configured as `LONG`, when converting the type, it will expect `Timestamp` value instead of `Long` value. The proper solution is to skip filling the `outputValue` here and do `null` check afterwards. ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java: ########## @@ -571,21 +573,53 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct int numDocs = _segmentMetadata.getTotalDocs(); Object[] outputValues = new Object[numDocs]; PinotDataType outputValueType = null; + + FieldSpec fieldSpec = _schema.getFieldSpecFor(column); + Object defaultNullValue = fieldSpec.getDefaultNullValue(); + // Just log the first function evaluation error + int functionEvaluateErrorCount = 0; + Exception functionEvalError = null; + Object[] inputValuesWithError = null; + for (int i = 0; i < numDocs; i++) { for (int j = 0; j < numArguments; j++) { inputValues[j] = valueReaders.get(j).getValue(i); } - Object outputValue = functionEvaluator.evaluate(inputValues); - outputValues[i] = outputValue; - if (outputValueType == null) { - Class<?> outputValueClass = outputValue.getClass(); - outputValueType = FunctionUtils.getArgumentType(outputValueClass); - Preconditions.checkState(outputValueType != null, "Unsupported output value class: %s", outputValueClass); + try { + Object outputValue = functionEvaluator.evaluate(inputValues); + outputValues[i] = outputValue; + if (outputValueType == null) { + Class<?> outputValueClass = outputValue.getClass(); + outputValueType = FunctionUtils.getArgumentType(outputValueClass); + Preconditions.checkState(outputValueType != null, "Unsupported output value class: %s", outputValueClass); + } + } catch (Exception e) { + if (!errorOnFailure) { + functionEvaluateErrorCount++; + // Just record one error to log after the loop + if (functionEvalError == null) { + functionEvalError = e; + inputValuesWithError = Arrays.copyOf(inputValues, inputValues.length); + } + outputValues[i] = defaultNullValue; + } else { + throw e; + } } } + if (outputValueType == null) { + // If no output value type is determined, it means all the function evaluations failed and fieldSpec default + // null value is set. + outputValueType = PinotDataType.getPinotDataTypeForIngestion(fieldSpec); + } + + if (functionEvaluateErrorCount > 0) { + LOGGER.warn("Caught {} exceptions while evaluating derived column: {} with function: {} and arguments: {}", + functionEvaluateErrorCount, column, functionEvaluator, inputValuesWithError, functionEvalError); Review Comment: You'll need to use `Arrays.toString(inputValuesWithError)` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java: ########## @@ -571,21 +573,53 @@ private void createDerivedColumnV1Indices(String column, FunctionEvaluator funct int numDocs = _segmentMetadata.getTotalDocs(); Object[] outputValues = new Object[numDocs]; PinotDataType outputValueType = null; + + FieldSpec fieldSpec = _schema.getFieldSpecFor(column); + Object defaultNullValue = fieldSpec.getDefaultNullValue(); + // Just log the first function evaluation error + int functionEvaluateErrorCount = 0; + Exception functionEvalError = null; + Object[] inputValuesWithError = null; + for (int i = 0; i < numDocs; i++) { for (int j = 0; j < numArguments; j++) { inputValues[j] = valueReaders.get(j).getValue(i); } - Object outputValue = functionEvaluator.evaluate(inputValues); - outputValues[i] = outputValue; - if (outputValueType == null) { - Class<?> outputValueClass = outputValue.getClass(); - outputValueType = FunctionUtils.getArgumentType(outputValueClass); - Preconditions.checkState(outputValueType != null, "Unsupported output value class: %s", outputValueClass); + try { + Object outputValue = functionEvaluator.evaluate(inputValues); + outputValues[i] = outputValue; + if (outputValueType == null) { + Class<?> outputValueClass = outputValue.getClass(); + outputValueType = FunctionUtils.getArgumentType(outputValueClass); + Preconditions.checkState(outputValueType != null, "Unsupported output value class: %s", outputValueClass); + } + } catch (Exception e) { + if (!errorOnFailure) { + functionEvaluateErrorCount++; + // Just record one error to log after the loop + if (functionEvalError == null) { + functionEvalError = e; + inputValuesWithError = Arrays.copyOf(inputValues, inputValues.length); + } + outputValues[i] = defaultNullValue; + } else { + throw e; + } } } + if (outputValueType == null) { + // If no output value type is determined, it means all the function evaluations failed and fieldSpec default + // null value is set. + outputValueType = PinotDataType.getPinotDataTypeForIngestion(fieldSpec); + } + + if (functionEvaluateErrorCount > 0) { + LOGGER.warn("Caught {} exceptions while evaluating derived column: {} with function: {} and arguments: {}", + functionEvaluateErrorCount, column, functionEvaluator, inputValuesWithError, functionEvalError); Review Comment: Probably also mention this is one sampled input values -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org