imay commented on a change in pull request #3580: URL: https://github.com/apache/incubator-doris/pull/3580#discussion_r425521114
########## File path: be/src/olap/memory/column_reader.h ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/memory/column.h" + +namespace doris { +namespace memory { + +// Holder for a ColumnBlock +// Although ColumnBlock support reference counting, we avoid using it because +// a reader already hold a proxy to all the ColumnBlocks, it's unnecessary +// and inefficient to inc/dec refcount, so we use this holder instead. +// +// If the underlying column data doesn't need merge-on-read, we can use the +// underlying base's ColumnBlock directly, and _own_cb equals false. +// +// If there are some deltas need to be merged, a new ColumnBlock will be +// created, and _own_cb equals true. +class ColumnBlockHolder { Review comment: Is this thread-safe? Better declare it in the comment. ########## File path: be/src/olap/memory/column.cpp ########## @@ -105,19 +108,102 @@ Status Column::capture_version(uint64_t version, vector<ColumnDelta*>* deltas, return Status::OK(); } -void Column::capture_latest(vector<ColumnDelta*>* deltas) const { +void Column::capture_latest(vector<ColumnDelta*>* deltas, uint64_t* version) const { deltas->reserve(_versions.size() - _base_idx - 1); for (size_t i = _base_idx + 1; i < _versions.size(); i++) { deltas->emplace_back(_versions[i].delta.get()); } + *version = _versions.back().version; } Status Column::read(uint64_t version, std::unique_ptr<ColumnReader>* reader) { - return Status::NotSupported("not supported"); + ColumnType type = schema().type(); + bool nullable = schema().is_nullable(); + vector<ColumnDelta*> deltas; + uint64_t real_version; + RETURN_IF_ERROR(capture_version(version, &deltas, &real_version)); + +#define CREATE_READER(T) \ + if (nullable) { \ + (*reader).reset( \ + new TypedColumnReader<T, true>(this, version, real_version, std::move(deltas))); \ + } else { \ + (*reader).reset( \ + new TypedColumnReader<T, false>(this, version, real_version, std::move(deltas))); \ + } + + switch (type) { + case OLAP_FIELD_TYPE_BOOL: + case OLAP_FIELD_TYPE_TINYINT: + CREATE_READER(int8_t) + break; + case OLAP_FIELD_TYPE_SMALLINT: + CREATE_READER(int16_t) + break; + case OLAP_FIELD_TYPE_INT: + CREATE_READER(int32_t) + break; + case OLAP_FIELD_TYPE_BIGINT: + CREATE_READER(int64_t) + break; + case OLAP_FIELD_TYPE_LARGEINT: + CREATE_READER(int128_t) + break; + case OLAP_FIELD_TYPE_FLOAT: + CREATE_READER(float) + break; + case OLAP_FIELD_TYPE_DOUBLE: + CREATE_READER(double) + break; + default: + return Status::NotSupported("create column reader: type not supported"); + } Review comment: better to add `#undef CREATE_READER` after usage ########## File path: be/src/olap/memory/typed_column_writer.h ########## @@ -0,0 +1,305 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/memory/column.h" +#include "olap/memory/column_writer.h" +#include "olap/memory/typed_column_reader.h" + +namespace doris { +namespace memory { + +// Used to hold temporary nullable update cells in ColumnWriter +template <class T> +class NullableUpdateType { +public: + bool& isnull() { return _isnull; } + T& value() { return _value; } + +private: + bool _isnull; Review comment: default value ########## File path: be/src/olap/memory/column_writer.h ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/memory/common.h" + +namespace doris { +namespace memory { + +class Column; + +// Exclusive writer for column, also with some reader's functionality +// +// Example writer usage: +// scoped_refptr<Column> column; +// std::unique_ptr<ColumnWriter> writer; +// // create writer +// RETURN_IF_ERROR(column->write(&writer).ok()); +// writer->insert(new_rowid, &value, 0); +// writer->update(rowid, &value, 0); +// writer->finalize(new_version); +// // get new column refptr +// // if a COW has been done, column will point to new column object +// // else column will remain the same. +// writer->get_new_column(&column); +class ColumnWriter { +public: + virtual ~ColumnWriter() {} + + // Get cell by rid, caller needs to make sure rid is in valid range + // + // Note: this is the same method as ColumnReader::get + virtual const void* get(const uint32_t rid) const = 0; + + // Borrow a vtable slot to do typed hashcode calculation, mainly used to find + // row by rowkey using hash index. + // + // It's designed to support array operator, so there are two parameters for user + // to pass array start and array index. + // + // Note: this is the same method as ColumnReader::hashcode + virtual uint64_t hashcode(const void* rhs, size_t rhs_idx) const = 0; + + // Check cell equality, mainly used to find row by rowkey using hash index. + // + // Note: this is the same method as ColumnReader::equals + virtual bool equals(const uint32_t rid, const void* rhs, size_t rhs_idx) const = 0; + + virtual string debug_string() const = 0; + + // Insert/append a cell at specified row + virtual Status insert(uint32_t rid, const void* value) = 0; + + // Update a cell at specified row + virtual Status update(uint32_t rid, const void* value) = 0; Review comment: And how about delete operation. ########## File path: be/src/olap/memory/column_writer.h ########## @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/memory/common.h" + +namespace doris { +namespace memory { + +class Column; + +// Exclusive writer for column, also with some reader's functionality +// +// Example writer usage: +// scoped_refptr<Column> column; +// std::unique_ptr<ColumnWriter> writer; +// // create writer +// RETURN_IF_ERROR(column->write(&writer).ok()); +// writer->insert(new_rowid, &value, 0); +// writer->update(rowid, &value, 0); +// writer->finalize(new_version); +// // get new column refptr +// // if a COW has been done, column will point to new column object +// // else column will remain the same. +// writer->get_new_column(&column); +class ColumnWriter { Review comment: Is the class a writer of class or a writer of part of column? In my opinion, column will contain all data for a table "column", and its data is not written at one time. One Writer may update some part of column data. If my understanding is right, I think ColumnBlockWriter/ColumnChunkWriter is a better name. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org