rohityadav1993 commented on code in PR #11983: URL: https://github.com/apache/pinot/pull/11983#discussion_r1551615323
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java: ########## @@ -32,76 +31,79 @@ /** * Handler for partial-upsert. + * + * This class is responsible for merging the new record with the previous record. + * It uses the configured merge strategies to merge the columns. If no merge strategy is configured for a column, + * it uses the default merge strategy. + * + * It is also possible to define a custom logic for merging rows by implementing {@link PartialUpsertMerger}. + * If a merger for row is defined then it takes precedence and ignores column mergers. */ public class PartialUpsertHandler { - // _column2Mergers maintains the mapping of merge strategies per columns. - private final Map<String, PartialUpsertMerger> _column2Mergers = new HashMap<>(); - private final PartialUpsertMerger _defaultPartialUpsertMerger; private final List<String> _comparisonColumns; private final List<String> _primaryKeyColumns; + private final PartialUpsertMerger _partialUpsertMerger; - public PartialUpsertHandler(Schema schema, Map<String, UpsertConfig.Strategy> partialUpsertStrategies, - UpsertConfig.Strategy defaultPartialUpsertStrategy, List<String> comparisonColumns) { - _defaultPartialUpsertMerger = PartialUpsertMergerFactory.getMerger(defaultPartialUpsertStrategy); + public PartialUpsertHandler(Schema schema, List<String> comparisonColumns, UpsertConfig upsertConfig) { _comparisonColumns = comparisonColumns; _primaryKeyColumns = schema.getPrimaryKeyColumns(); - for (Map.Entry<String, UpsertConfig.Strategy> entry : partialUpsertStrategies.entrySet()) { - _column2Mergers.put(entry.getKey(), PartialUpsertMergerFactory.getMerger(entry.getValue())); - } + _partialUpsertMerger = + PartialUpsertMergerFactory.getPartialUpsertMerger(_primaryKeyColumns, comparisonColumns, upsertConfig); } - /** - * Merges records and returns the merged record. - * We used a map to indicate all configured fields for partial upsert. For these fields - * (1) If the prev value is null, return the new value - * (2) If the prev record is not null, the new value is null, return the prev value. - * (3) If neither values are not null, then merge the value and return. - * For un-configured fields, they are using default override behavior, regardless null values. - * - * For example, overwrite merger will only override the prev value if the new value is not null. - * Null values will override existing values if not configured. They can be ignored by using ignoreMerger. - * - * @param prevRecord wrapper for previous record, which lazily reads column values of previous row and caches for - * re-reads. - * @param newRecord the new consumed record. - */ - public void merge(LazyRow prevRecord, GenericRow newRecord) { - for (String column : prevRecord.getColumnNames()) { - if (!_primaryKeyColumns.contains(column)) { - PartialUpsertMerger merger = _column2Mergers.getOrDefault(column, _defaultPartialUpsertMerger); - // Non-overwrite mergers - // (1) If the value of the previous is null value, skip merging and use the new value - // (2) Else If the value of new value is null, use the previous value (even for comparison columns). - // (3) Else If the column is not a comparison column, we applied the merged value to it. - if (!(merger instanceof OverwriteMerger)) { - Object prevValue = prevRecord.getValue(column); - if (prevValue != null) { - if (newRecord.isNullValue(column)) { - // Note that we intentionally want to overwrite any previous _comparisonColumn value in the case of - // using - // multiple comparison columns. We never apply a merge function to it, rather we just take any/all - // non-null comparison column values from the previous record, and the sole non-null comparison column - // value from the new record. - newRecord.putValue(column, prevValue); - newRecord.removeNullValueField(column); - } else if (!_comparisonColumns.contains(column)) { - newRecord.putValue(column, merger.merge(prevValue, newRecord.getValue(column))); - } - } - } else { - // Overwrite mergers. - // (1) If the merge strategy is Overwrite merger and newValue is not null, skip and use the new value - // (2) Otherwise, if previous is not null, init columnReader and use the previous value. - if (newRecord.isNullValue(column)) { - Object prevValue = prevRecord.getValue(column); - if (prevValue != null) { - newRecord.putValue(column, prevValue); - newRecord.removeNullValueField(column); - } - } + public void merge(LazyRow prevRecord, GenericRow newRecord, Map<String, Object> reuseMergerResult) { + reuseMergerResult.clear(); + + // merger current row with previously indexed row + _partialUpsertMerger.merge(prevRecord, newRecord, reuseMergerResult); + + if (_partialUpsertMerger instanceof PartialUpsertColumnarMerger) { + // iterate over all columns in prevRecord and update newRecord with merged values + for (String column : prevRecord.getColumnNames()) { + // no merger to apply on primary key columns + if (_primaryKeyColumns.contains(column) || _comparisonColumns.contains(column)) { + continue; + } + + // use merged column value from result map + if (reuseMergerResult.containsKey(column)) { + Object mergedValue = reuseMergerResult.get(column); + setMergedValue(newRecord, column, mergedValue); + } + } + } else { + // iterate over only merger results and update newRecord with merged values + for (Map.Entry<String, Object> entry : reuseMergerResult.entrySet()) { + // skip if primary key column + String column = entry.getKey(); + if (_primaryKeyColumns.contains(column) || _comparisonColumns.contains(column)) { + continue; } + + Object mergedValue = entry.getValue(); + setMergedValue(newRecord, column, mergedValue); + } + } + + // handle comparison columns + for (String column: _comparisonColumns) { + if (newRecord.isNullValue(column) && !prevRecord.isNullValue(column)) { + newRecord.putValue(column, prevRecord.getValue(column)); + newRecord.removeNullValueField(column); } } } + + private void setMergedValue(GenericRow newRecord, String column, Object mergedValue) { + if (mergedValue != null) { + // remove null value field if it was set + newRecord.removeNullValueField(column); + newRecord.putValue(column, mergedValue); + } else { + // if column exists but mapped to a null value then merger result was a null value + newRecord.addNullValueField(column); + newRecord.putValue(column, null); Review Comment: I might not be fully aware of the null value support feature, let me update it with `putDefaultNullValue()`. Thank you. I don't forsee a scenario for null value. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org