airborne12 commented on code in PR #27887:
URL: https://github.com/apache/doris/pull/27887#discussion_r1423380763


##########
be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp:
##########
@@ -97,6 +97,36 @@ CL_NS(store)::Directory* 
DorisCompoundFileWriter::getDirectory() {
     return directory;
 }
 
+void DorisCompoundFileWriter::sort_files(std::vector<FileInfo>& file_infos) {
+    auto cmp = [](const std::pair<int32_t, std::string>& lhs,
+                  const std::pair<int32_t, std::string>& rhs) { return 
lhs.first < rhs.first; };
+    std::map<std::pair<int32_t, std::string>, std::set<FileInfo>, 
decltype(cmp)> file_maps(cmp);
+    file_maps[{0, "segments"}];
+    file_maps[{1, "fnm"}];
+    file_maps[{2, "tii"}];
+    file_maps[{INT_MAX, "all"}];
+
+    for (size_t i = 0; i < file_infos.size(); i++) {
+        const auto& file_info = file_infos[i];
+        auto it =
+                std::find_if(file_maps.begin(), file_maps.end(), 
[&file_info](const auto& element) {
+                    return file_info.filename.find(element.first.second) != 
std::string::npos;
+                });
+        if (it != file_maps.end()) {
+            it->second.insert(file_info);
+        } else {
+            file_maps[{INT_MAX, "all"}].insert(file_info);
+        }
+    }
+
+    file_infos.clear();
+    for (const auto& map : file_maps) {
+        for (const auto& file : map.second) {
+            file_infos.emplace_back(file);
+        }
+    }
+}
+

Review Comment:
   if we suppose to order file_infos by segments, fnm, tii, and other files by 
size, we could simplify the code, like following:
   
   ```
   void DorisCompoundFileWriter::sort_files(std::vector<FileInfo>& file_infos) 
const {
       auto file_priority = [](const std::string& filename) {
           if (filename.find("segments") != std::string::npos) return 1;
           if (filename.find("fnm") != std::string::npos) return 2;
           if (filename.find("tii") != std::string::npos) return 3;
           return 4; // Other files
       };
   
       std::sort(file_infos.begin(), file_infos.end(),
           [&](const FileInfo& a, const FileInfo& b) {
               int priority_a = file_priority(a.filename);
               int priority_b = file_priority(b.filename);
               if (priority_a != priority_b) return priority_a < priority_b;
               return a.filesize < b.filesize;
           });
   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to