gortiz commented on code in PR #13303: URL: https://github.com/apache/pinot/pull/13303#discussion_r1669858884
########## pinot-common/src/main/java/org/apache/pinot/common/datablock/DataBlockSerde.java: ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.datablock; + +import java.io.IOException; +import java.util.function.LongConsumer; +import javax.annotation.Nullable; +import org.apache.pinot.segment.spi.memory.DataBuffer; + + +/** + * An interface that can be implemented to support different types of data block serialization and deserialization. + * <p> + * It is important to distinguish between the serialization format and the data block raw representation. + * The raw representation is the in-memory representation of the data block and is completely dependent on the + * runtime Pinot version while the serialization format is the format used to write the data block through the network. + * Two Pinot nodes in different versions may represent the same data block in different raw representations but there + * should be at least one common serialization format (defined by the {@link Version} used) that can be used to + * serialize and deserialize the data block between the two nodes. + */ +public interface DataBlockSerde { + + /** + * Serialize the data block into a buffer. + * @param dataBlock The data block to serialize. + * @param firstInt The first integer, which is used to codify the version and type of the data block in a protocol + * defined way. This integer must be written in the first 4 positions of the buffer in BIG_ENDIAN + * order. + */ + DataBuffer serialize(DataBlock.Raw dataBlock, int firstInt) + throws IOException; + + /** + * Serialize the data block into the given output stream. + * + * @param buffer The buffer that contains the data. It will always use {@link java.nio.ByteOrder#BIG_ENDIAN} order. + * @param offset the offset in the buffer where the data starts. The first integer is reserved to store version and + * type and should not be trusted by the implementation. Use the type parameter instead. + * @param type the type of data block. + * @param finalOffsetConsumer A consumer that will be called after the data block is deserialized. The consumer will + * receive the offset where the data block ends. + */ + DataBlock deserialize(DataBuffer buffer, long offset, DataBlock.Type type, @Nullable LongConsumer finalOffsetConsumer) + throws IOException; + + default DataBlock deserialize(DataBuffer buffer, long offset, DataBlock.Type type) + throws IOException { + return deserialize(buffer, offset, type, null); + } + + Version getVersion(); + + /** + * The version used by this implementation. + * <p> + * The version should be incremented whenever the serialization format changes in a way that is not backwards + * compatible in both serialization and deserialization ways. + */ + enum Version { Review Comment: This is here to support new versions. In case we add a new version in the future, we just need to add it to `DataBlockUtils`. The idea is that in the future we are probably going to need to support more than one format. Instead of having a single serde class that understands all the formats, this mechanism is prepared to have one serde per version in order to simplify the code. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org