freemandealer commented on code in PR #38645:
URL: https://github.com/apache/doris/pull/38645#discussion_r1700034746


##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -77,16 +79,45 @@ namespace doris::segment_v2 {
 static bvar::Adder<size_t> g_total_segment_num("doris_total_segment_num");
 class InvertedIndexIterator;
 
+io::UInt128Wrapper file_cache_key_from_path(const std::string& seg_path) {
+    std::string base = seg_path.substr(seg_path.rfind('/') + 1); // tricky: 
npos + 1 == 0
+    return io::BlockFileCache::hash(base);
+}
+
+std::string file_cache_key_str(const std::string& seg_path) {
+    return file_cache_key_from_path(seg_path).to_string();
+}
+
 Status Segment::open(io::FileSystemSPtr fs, const std::string& path, uint32_t 
segment_id,
                      RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
                      const io::FileReaderOptions& reader_options,
                      std::shared_ptr<Segment>* output) {
     io::FileReaderSPtr file_reader;
     RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
     std::shared_ptr<Segment> segment(new Segment(segment_id, rowset_id, 
std::move(tablet_schema)));
-    segment->_fs = std::move(fs);
+    segment->_fs = fs;
     segment->_file_reader = std::move(file_reader);
-    RETURN_IF_ERROR(segment->_open());
+    auto st = segment->_open();
+    TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption", &st);
+    if (st.is<ErrorCode::CORRUPTION>() &&
+        reader_options.cache_type == io::FileCachePolicy::FILE_BLOCK_CACHE) {
+        LOG(WARNING) << "bad segment file may be read from file cache, try to 
read remote source "
+                        "file directly, file path: "
+                     << path << " cache_key: " << file_cache_key_str(path);
+        auto file_key = file_cache_key_from_path(path);
+        auto* file_cache = 
io::FileCacheFactory::instance()->get_by_path(file_key);
+        file_cache->remove_if_cached(file_key);

Review Comment:
   the corruption file may not be deleted right  away if the cell is not 
releasable



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to