dataroaring commented on code in PR #23681:
URL: https://github.com/apache/doris/pull/23681#discussion_r1311008323


##########
be/src/olap/rowid_conversion.h:
##########
@@ -69,18 +87,93 @@ class RowIdConversion {
         }
     }
 
+    const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& 
get_rowid_conversion_map()
+            const {
+        return _segments_rowid_map;
+    }
+
+    const std::map<std::pair<RowsetId, uint32_t>, uint32_t>& 
get_src_segment_to_id_map() {
+        return _segment_to_id_map;
+    }
+
+    std::pair<RowsetId, uint32_t> get_segment_by_id(uint32_t id) const {
+        DCHECK_GT(_id_to_segment_map.size(), id);
+        return _id_to_segment_map.at(id);
+    }
+
+    uint32_t get_id_by_segment(const std::pair<RowsetId, uint32_t>& segment) 
const {
+        return _segment_to_id_map.at(segment);
+    }
+
+    Status save_to_file() {

Review Comment:
   move to cpp.



##########
be/src/olap/merger.cpp:
##########
@@ -227,6 +227,23 @@ Status Merger::vertical_compact_one_group(
 
     if (reader_params.record_rowids) {
         
stats_output->rowid_conversion->set_dst_rowset_id(dst_rowset_writer->rowset_id());
+        std::stringstream file_path_ss;
+        file_path_ss << tablet->tablet_path() << "/rowid_conversion_" << 
tablet->tablet_id();
+        if (reader_type == ReaderType::READER_BASE_COMPACTION) {
+            file_path_ss << "_base";
+        } else if (reader_type == ReaderType::READER_CUMULATIVE_COMPACTION ||
+                   reader_type == ReaderType::READER_SEGMENT_COMPACTION) {
+            file_path_ss << "_cumu";
+        } else if (reader_type == ReaderType::READER_COLD_DATA_COMPACTION) {
+            file_path_ss << "_cold";
+        } else {
+            DCHECK(false);
+            return Status::InternalError("unknown reader type");
+        }
+        file_path_ss << ".XXXXXX";
+        std::string file_path = file_path_ss.str();
+        LOG(INFO) << "rowid_conversion path: " << file_path;
+        stats_output->rowid_conversion->set_file_name(file_path);

Review Comment:
   This code should be a method of the tablet, like rowid_file_name.



##########
be/src/olap/rowid_conversion.h:
##########
@@ -69,18 +87,93 @@ class RowIdConversion {
         }
     }
 
+    const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& 
get_rowid_conversion_map()
+            const {
+        return _segments_rowid_map;
+    }
+
+    const std::map<std::pair<RowsetId, uint32_t>, uint32_t>& 
get_src_segment_to_id_map() {
+        return _segment_to_id_map;
+    }
+
+    std::pair<RowsetId, uint32_t> get_segment_by_id(uint32_t id) const {
+        DCHECK_GT(_id_to_segment_map.size(), id);
+        return _id_to_segment_map.at(id);
+    }
+
+    uint32_t get_id_by_segment(const std::pair<RowsetId, uint32_t>& segment) 
const {
+        return _segment_to_id_map.at(segment);
+    }
+
+    Status save_to_file() {
+        RETURN_IF_ERROR(io::global_local_filesystem()->create_file(_file_name, 
&_file_writer));
+        size_t total_size = 0;
+        total_size += _id_to_segment_map.size() * sizeof(uint64_t);
+        total_size += count * sizeof(uint32_t) * 3;
+        std::string binary(total_size, '\0');
+        char* row_binary = binary.data();
+        size_t offset = 0;
+        size_t segment_index = 0;
+        for (auto segment : _segments_rowid_map) {
+            _id_to_pos_map.emplace(segment_index, offset);
+            uint64_t size = segment.size();
+            memcpy(row_binary + offset, &size, sizeof(size));
+            offset += sizeof(size);
+            uint32_t src_row_index = 0;
+            for (auto iter = segment.begin(); iter != segment.end(); ++iter) {
+                memcpy(row_binary + offset, &src_row_index, 
sizeof(src_row_index));
+                offset += sizeof(src_row_index);
+                memcpy(row_binary + offset, &iter->first, sizeof(iter->first));
+                offset += sizeof(iter->first);
+                memcpy(row_binary + offset, &iter->second, 
sizeof(iter->second));
+                offset += sizeof(iter->second);
+                src_row_index++;
+            }
+            segment_index++;
+        }
+        DCHECK(offset == total_size);
+        RETURN_IF_ERROR(_file_writer->append({row_binary, offset}));
+        RETURN_IF_ERROR(_file_writer->close());
+
+        return Status::OK();
+    }
+
+    Status open_file() {
+        RETURN_IF_ERROR(io::global_local_filesystem()->open_file(_file_name, 
&file_reader));
+        return Status::OK();
+    }
+
+    void clear_segments_rowid_map() { _segments_rowid_map.clear(); }
+
     // get destination RowLocation
     // return non-zero if the src RowLocation does not exist
     int get(const RowLocation& src, RowLocation* dst) const {

Review Comment:
   move to cpp



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to