gavinchou commented on code in PR #35473:
URL: https://github.com/apache/doris/pull/35473#discussion_r1616546427


##########
be/src/olap/rowset/segment_v2/inverted_index_writer.cpp:
##########
@@ -566,15 +566,15 @@ class InvertedIndexColumnWriterImpl : public 
InvertedIndexColumnWriter {
                     null_bitmap_out =
                             
std::unique_ptr<lucene::store::IndexOutput>(_dir->createOutput(
                                     
InvertedIndexDescriptor::get_temporary_null_bitmap_file_name()
-                                            .c_str()));
+                                            .data()));

Review Comment:
   `data()` has different meaning with `c_str()`,
   we have to consider what does `createOutput()` need, if it needs a c-string, 
`c_str()` should be used here.



##########
be/src/cloud/cloud_meta_mgr.cpp:
##########
@@ -518,8 +518,7 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* 
tablet, bool warmup_delta_
                 rs_meta->init_from_pb(meta_pb);
                 RowsetSharedPtr rowset;
                 // schema is nullptr implies using RowsetMeta.tablet_schema
-                Status s = RowsetFactory::create_rowset(nullptr, 
tablet->tablet_path(), rs_meta,
-                                                        &rowset);
+                Status s = RowsetFactory::create_rowset(nullptr, "", rs_meta, 
&rowset);

Review Comment:
   What does `""` mean here?



##########
be/src/olap/rowset/rowset.h:
##########
@@ -136,6 +136,8 @@ class Rowset : public std::enable_shared_from_this<Rowset> {
 
     bool is_local() const { return _rowset_meta->is_local(); }
 
+    const std::string& tablet_path() const { return _tablet_path; }

Review Comment:
   Add comment, what is a `tablet_path`, also add some examples



##########
be/src/agent/task_worker_pool.cpp:
##########
@@ -1462,16 +1462,20 @@ void push_storage_policy_callback(StorageEngine& 
engine, const TAgentTaskRequest
     const auto& push_storage_policy_req = req.push_storage_policy_req;
     // refresh resource
     for (auto&& param : push_storage_policy_req.resource) {
-        auto existed_resource = get_storage_resource(param.id);
-        if (existed_resource.version >= param.version) {
-            // Stale request, ignore
-            continue;
+        io::RemoteFileSystemSPtr fs;
+        if (auto existed_resource = get_storage_resource(param.id); 
existed_resource) {
+            if (existed_resource->second >= param.version) {
+                // Stale request, ignore
+                continue;
+            }
+
+            fs = std::move(existed_resource->first.fs);
         }
 
         if (param.__isset.s3_storage_param) {
-            update_s3_resource(param, std::move(existed_resource.fs));
+            update_s3_resource(param, std::move(fs));

Review Comment:
   Is is possible and OK that `fs` is nullptr here?



##########
be/src/olap/olap_define.h:
##########
@@ -100,15 +99,14 @@ static const std::string CLONE_PREFIX = "clone";
 static const std::string SPILL_DIR_PREFIX = "spill";
 static const std::string SPILL_GC_DIR_PREFIX = "spill_gc";
 
-// define paths
-static inline std::string remote_tablet_path(int64_t tablet_id) {
-    // data/{tablet_id}
-    return fmt::format("{}/{}", DATA_PREFIX, tablet_id);
+static inline std::string local_segment_path(std::string_view tablet_path,

Review Comment:
   Do we need to add path_version here?



##########
be/src/olap/storage_policy.h:
##########
@@ -62,20 +62,35 @@ std::vector<std::pair<int64_t, int64_t>> 
get_storage_policy_ids();
 
 struct StorageResource {

Review Comment:
   `LocalFileSystem` is also a kind of `StorageResource`, we may take it into 
consideration in the future.



##########
be/src/olap/tablet.cpp:
##########
@@ -1857,20 +1833,19 @@ void 
Tablet::_init_context_common_fields(RowsetWriterContext& context) {
     if (context.rowset_type == ALPHA_ROWSET) {
         context.rowset_type = _engine.default_rowset_type();
     }
-    if (context.fs != nullptr && context.fs->type() != 
io::FileSystemType::LOCAL) {
-        context.rowset_dir = remote_tablet_path(tablet_id());
-    } else {
-        context.rowset_dir = tablet_path();
+
+    if (context.is_local_rowset()) {
+        context.tablet_path = _tablet_path;
     }

Review Comment:
   Why there is no `tablet_path` for remote rowset?
   What does it matter if we set a `tablet_path` even it's a remote rowset?



##########
be/src/olap/storage_policy.cpp:
##########
@@ -123,9 +131,54 @@ std::vector<std::pair<std::string, int64_t>> 
get_storage_resource_ids() {
     res.reserve(s_storage_resource_mgr.map.size());
     std::lock_guard lock(s_storage_resource_mgr.mtx);
     for (auto& [id, resource] : s_storage_resource_mgr.map) {
-        res.emplace_back(id, resource.version);
+        res.emplace_back(id, resource.second);
     }
     return res;
 }
 
+namespace {
+
+[[noreturn]] void exit_at_unknown_path_version(std::string_view resource_id, 
int64_t path_version) {
+    CHECK(false)
+            << "unknown path version, please upgrade BE or drop this storage 
vault. resource_id="
+            << resource_id << " path_version=" << path_version;
+}
+
+} // namespace
+
+std::string StorageResource::remote_segment_path(int64_t tablet_id, 
std::string_view rowset_id,
+                                                 int64_t seg_id) const {
+    switch (path_version) {
+    case 0:
+        return fmt::format("{}/{}/{}_{}.dat", DATA_PREFIX, tablet_id, 
rowset_id, seg_id);
+    default:
+        exit_at_unknown_path_version(fs->id(), path_version);
+    }
+}
+
+std::string StorageResource::remote_segment_path(const RowsetMeta& rowset, 
int64_t seg_id) const {
+    switch (path_version) {
+    case 0:
+        return fmt::format("{}/{}/{}_{}.dat", DATA_PREFIX, rowset.tablet_id(),

Review Comment:
   Ditto.



##########
be/src/olap/storage_policy.cpp:
##########
@@ -123,9 +131,54 @@ std::vector<std::pair<std::string, int64_t>> 
get_storage_resource_ids() {
     res.reserve(s_storage_resource_mgr.map.size());
     std::lock_guard lock(s_storage_resource_mgr.mtx);
     for (auto& [id, resource] : s_storage_resource_mgr.map) {
-        res.emplace_back(id, resource.version);
+        res.emplace_back(id, resource.second);
     }
     return res;
 }
 
+namespace {
+
+[[noreturn]] void exit_at_unknown_path_version(std::string_view resource_id, 
int64_t path_version) {
+    CHECK(false)
+            << "unknown path version, please upgrade BE or drop this storage 
vault. resource_id="
+            << resource_id << " path_version=" << path_version;
+}
+
+} // namespace
+
+std::string StorageResource::remote_segment_path(int64_t tablet_id, 
std::string_view rowset_id,
+                                                 int64_t seg_id) const {
+    switch (path_version) {
+    case 0:
+        return fmt::format("{}/{}/{}_{}.dat", DATA_PREFIX, tablet_id, 
rowset_id, seg_id);

Review Comment:
   Consider making `"{}/{}/{}_{}.dat"` as a constant.



##########
be/src/olap/schema_change.cpp:
##########
@@ -1112,7 +1112,12 @@ Status 
SchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParams& sc
         context.segments_overlap = 
rs_reader->rowset()->rowset_meta()->segments_overlap();
         context.tablet_schema = _new_tablet_schema;
         context.newest_write_timestamp = rs_reader->newest_write_timestamp();
-        context.fs = rs_reader->rowset()->rowset_meta()->fs();
+
+        if (!rs_reader->rowset()->is_local()) {

Review Comment:
   For storage vault mode, is that true `is_local() == false` for all rowsets?



##########
be/src/olap/compaction.cpp:
##########
@@ -581,25 +582,42 @@ Status Compaction::do_inverted_index_compaction() {
     }
 
     // create index_writer to compaction indexes
-    const auto& fs = _output_rowset->rowset_meta()->fs();
-    const auto& tablet_path = _tablet->tablet_path();
+    std::unordered_map<RowsetId, Rowset*> rs_id_to_rowset_map;
+    for (auto&& rs : _input_rowsets) {
+        rs_id_to_rowset_map.emplace(rs->rowset_id(), rs.get());
+    }
 
     // src index dirs
-    // format: rowsetId_segmentId
     std::vector<std::unique_ptr<InvertedIndexFileReader>> 
inverted_index_file_readers(
             src_segment_num);
     for (const auto& m : src_seg_to_id_map) {

Review Comment:
   also "unpack" `m` to avoid `first` and `second`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to