On Jun 27, 2022, Alexandre Oliva <ol...@adacore.com> wrote:

> (ii) arrange for recursive_directory_iterator to rewind a dir from
> which entries have been _erase()d before returning to the parent dir

Here's an implementation of the above.  I kind of like it; it's far more
elegant than the earlier patch, and if it starts removing stuff from one
subdir, it won't remove stuff from other sibling subdirs before removing
that one subdir, and it won't visit any directory more than once.  I
don't think POSIX imposes any such restriction (AFAICT one could launch
parallel removes for each subdir and still be compliant), but it's less
surprising this way.


libstdc++: auto-rewind dirs for remove_all

On some target systems (e.g. rtems6.0), removing directory components
while iterating over directory entries may cause some of the directory
entries to be skipped, which prevents the removal of the parent
directory from succeeding.

Advancing the iterator before removing a member proved not to be
enough, so I've arranged the directory reading implementation to
implicitly rewind a directory when reaching the end, as long as there
were any entries and they have all been removed.  Rewinding will only
ever take place for users of recursive_directory_iterator::__erase,
and thus of _Dir::do_unlink, and since __erase is a private member
function, it can only be used by the remove_all functions because
they're granted friendship.

Regstrapped on x86_64-linux-gnu, also tested with a cross to
aarch64-rtems6.  Ok to install?


for  libstdc++-v3/ChangeLog

        * src/filesystem/dir-common.h (__gnu_posix::rewinddir):
        Define.
        * src/c++17/fs_dir.cc (fs::_Dir::auto_rewind): New enum.
        (fs::_Dir::rewind): New data member.
        (fs::_Dir::advance): Update rewind, rewinddir if found entries
        have all been removed.
        (fs::_Dir::do_unlink): Drop const.  Update rewind.
        (fs::_Dir::unlink, fs::_Dir::rmdir): Drop const.
---
 libstdc++-v3/src/c++17/fs_dir.cc         |   75 +++++++++++++++++++++++++++++-
 libstdc++-v3/src/filesystem/dir-common.h |    3 +
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/src/c++17/fs_dir.cc b/libstdc++-v3/src/c++17/fs_dir.cc
index 2258399da2587..6f535c95a32fb 100644
--- a/libstdc++-v3/src/c++17/fs_dir.cc
+++ b/libstdc++-v3/src/c++17/fs_dir.cc
@@ -44,6 +44,30 @@ template class 
std::__shared_ptr<fs::recursive_directory_iterator::_Dir_stack>;
 
 struct fs::_Dir : _Dir_base
 {
+  // On some systems, removing a directory entry causes readdir to
+  // skip the subsequent entry.  This state machine enables remove_all
+  // to not miss entries, by arranging for directories to
+  // automatically rewind iff every visited entry got removed, hitting
+  // the end only when no entries are found.  We start with
+  // no_entries, and advance moves from that to found_entry, that
+  // do_unlink changes to removed_entry.  If advance is called again
+  // without unlink, it moves to remaining_entry instead, so that we
+  // will know not to rewind (otherwise we'd visit the same entry
+  // again).  OTOH, if advance doesn't find any entry and the state is
+  // removed_entry, it resets to no_entries and rewinds; at other
+  // states, no rewind takes place.  Skipped entries (dot and dotdot
+  // and permission-denied) do not affect the state machine: they're
+  // skipped every time anyway.  It is envisioned that, with a
+  // reliable detection mechanism for systems that don't need
+  // rewinding, rewind could be initialized to remaining_entry, that
+  // is a final state that prevents rewinding.
+  enum auto_rewind {
+    no_entries,
+    found_entry,
+    removed_entry,
+    remaining_entry
+  };
+
   _Dir(const fs::path& p, bool skip_permission_denied, bool nofollow,
        [[maybe_unused]] bool filename_only, error_code& ec)
   : _Dir_base(p.c_str(), skip_permission_denied, nofollow, ec)
@@ -67,6 +91,21 @@ struct fs::_Dir : _Dir_base
   {
     if (const auto entp = _Dir_base::advance(skip_permission_denied, ec))
       {
+       switch (rewind)
+         {
+         case no_entries:
+         case removed_entry:
+           rewind = found_entry;
+           break;
+         case found_entry:
+           rewind = remaining_entry;
+           break;
+         case remaining_entry:
+           break;
+         default:
+           __builtin_unreachable ();
+           break;
+         }
        auto name = path;
        name /= entp->d_name;
        file_type type = file_type::none;
@@ -80,6 +119,22 @@ struct fs::_Dir : _Dir_base
       }
     else if (!ec)
       {
+       switch (rewind)
+         {
+         case removed_entry:
+           rewind = no_entries;
+           posix::rewinddir(this->dirp);
+           return advance (skip_permission_denied, ec);
+         case found_entry:
+           rewind = remaining_entry;
+           break;
+         case no_entries:
+         case remaining_entry:
+           break;
+         default:
+           __builtin_unreachable ();
+           break;
+         }
        // reached the end
        entry = {};
       }
@@ -144,8 +199,21 @@ struct fs::_Dir : _Dir_base
   }
 
   bool
-  do_unlink(bool is_directory, error_code& ec) const noexcept
+  do_unlink(bool is_directory, error_code& ec) noexcept
   {
+    switch (rewind)
+      {
+      case no_entries:
+      case removed_entry:
+      default:
+       __builtin_unreachable ();
+       break;
+      case found_entry:
+       rewind = removed_entry;
+       break;
+      case remaining_entry:
+       break;
+      }
 #if _GLIBCXX_HAVE_UNLINKAT
     const auto atp = current();
     if (::unlinkat(atp.dir(), atp.path_at_dir(),
@@ -166,16 +234,17 @@ struct fs::_Dir : _Dir_base
 
   // Remove the non-directory that this->entry refers to.
   bool
-  unlink(error_code& ec) const noexcept
+  unlink(error_code& ec) noexcept
   { return do_unlink(/* is_directory*/ false, ec); }
 
   // Remove the directory that this->entry refers to.
   bool
-  rmdir(error_code& ec) const noexcept
+  rmdir(error_code& ec) noexcept
   { return do_unlink(/* is_directory*/ true, ec); }
 
   fs::path             path; // Empty if only using unlinkat with file descr.
   directory_entry      entry;
+  enum auto_rewind     rewind = no_entries;
 };
 
 namespace
diff --git a/libstdc++-v3/src/filesystem/dir-common.h 
b/libstdc++-v3/src/filesystem/dir-common.h
index 228fab55afbcf..7eb39ae74378d 100644
--- a/libstdc++-v3/src/filesystem/dir-common.h
+++ b/libstdc++-v3/src/filesystem/dir-common.h
@@ -55,6 +55,7 @@ using char_type = wchar_t;
 using DIR = ::_WDIR;
 using dirent = _wdirent;
 inline DIR* opendir(const wchar_t* path) { return ::_wopendir(path); }
+inline void rewinddir(DIR* dir) { ::_wrewinddir(dir); }
 inline dirent* readdir(DIR* dir) { return ::_wreaddir(dir); }
 inline int closedir(DIR* dir) { return ::_wclosedir(dir); }
 #elif defined _GLIBCXX_HAVE_DIRENT_H
@@ -64,11 +65,13 @@ typedef struct ::dirent dirent;
 using ::opendir;
 using ::readdir;
 using ::closedir;
+using ::rewinddir;
 #else
 using char_type = char;
 struct dirent { const char* d_name; };
 struct DIR { };
 inline DIR* opendir(const char*) { return nullptr; }
+inline void rewinddir(DIR*) { }
 inline dirent* readdir(DIR*) { return nullptr; }
 inline int closedir(DIR*) { return -1; }
 #undef _GLIBCXX_HAVE_DIRFD


-- 
Alexandre Oliva, happy hacker                https://FSFLA.org/blogs/lxo/
   Free Software Activist                       GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about <https://stallmansupport.org>

Reply via email to