On Thu, 8 May 2025, Lasse Collin wrote:

In double-byte character sets, the trail byte of a two-byte character
can be a backslash. If such a two-byte character was at the end of
the pathname, the trailing backslash was incorrectly removed.

The code still removes only one trailing directory separator and thus
stat("directory//", &st) still incorrectly fails with MSVCRT. This
commit only fixes the DBCS issue.
---
mingw-w64-crt/stdio/__mingw_fix_stat_path.c | 35 ++++++++++++++++++---
1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/mingw-w64-crt/stdio/__mingw_fix_stat_path.c 
b/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
index 7614491a9..83ca35730 100644
--- a/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
+++ b/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
@@ -4,10 +4,22 @@
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */

+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
#include <sys/stat.h>
#include <stdlib.h>
+#include <locale.h>
+#include <windows.h>
#include "__mingw_fix_stat.h"

+static const char* next_char (unsigned int cp, const char* p)
+{
+  /* If it is a lead byte, skip the next byte except if it is \0.
+   * If it is \0, it's not a valid DBCS string. */
+  return (IsDBCSLeadByteEx (cp, *p) && p[1] != '\0') ? p + 2 : p + 1;
+}
+
/**
 * Returns _path without trailing slash if any
 *
@@ -20,6 +32,7 @@

char* __mingw_fix_stat_path (const char* _path)
{
+  const unsigned int cp = __mingw_filename_cp ();
  size_t len;
  char *p;

@@ -28,24 +41,27 @@ char* __mingw_fix_stat_path (const char* _path)
  if (_path && *_path) {
    len = strlen (_path);

-    /* Ignore X:\ */
-
+    /* Ignore X:\
+     * No ANSI or OEM code page uses ':' as a trail byte. (The code page 1361
+     * cannot be used as ANSI or OEM code page.) */
    if (len <= 1 || ((len == 2 || len == 3) && _path[1] == ':'))
      return p;

+    const char *r = _path;
+
    /* Check UNC \\abc\<name>\ */
    if ((_path[0] == '\\' || _path[0] == '/')
        && (_path[1] == '\\' || _path[1] == '/'))
      {
-       const char *r = &_path[2];
+       r = &_path[2];
        while (*r != 0 && *r != '\\' && *r != '/')
-         ++r;
+         r = next_char (cp, r);
        if (*r != 0)
          ++r;
        if (*r == 0)
          return p;
        while (*r != 0 && *r != '\\' && *r != '/')
-         ++r;
+         r = next_char (cp, r);
        if (*r != 0)
          ++r;
        if (*r == 0)
@@ -54,6 +70,15 @@ char* __mingw_fix_stat_path (const char* _path)

    if (_path[len - 1] == '/' || _path[len - 1] == '\\')
      {
+       /* Return if the last character is a double-byte character.
+        * Its trail byte could be a '\' which must not be interpret
+        * as a directory separator. */
+       while (r[1] != '\0') {
+         r = next_char (cp, r);
+         if (*r == '\0')
+           return p;
+       }
+
        p = (char*)malloc (len);

This logic here feels kinda complicated and unintuitive to prove that it is right and doesn't risk accessing things outside the end of the buffer, but I think it should be correct and safe.

(I did find and pick up the v2 of this patch, and have it applied to my local branch of this patchset.)

// Martin



_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to