On Thu, 8 May 2025, Lasse Collin wrote:
In double-byte character sets, the trail byte of a two-byte character
can be a backslash. If such a two-byte character was at the end of
the pathname, the trailing backslash was incorrectly removed.
The code still removes only one trailing directory separator and thus
stat("directory//", &st) still incorrectly fails with MSVCRT. This
commit only fixes the DBCS issue.
---
mingw-w64-crt/stdio/__mingw_fix_stat_path.c | 35 ++++++++++++++++++---
1 file changed, 30 insertions(+), 5 deletions(-)
diff --git a/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
b/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
index 7614491a9..83ca35730 100644
--- a/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
+++ b/mingw-w64-crt/stdio/__mingw_fix_stat_path.c
@@ -4,10 +4,22 @@
* No warranty is given; refer to the file DISCLAIMER.PD within this package.
*/
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
#include <sys/stat.h>
#include <stdlib.h>
+#include <locale.h>
+#include <windows.h>
#include "__mingw_fix_stat.h"
+static const char* next_char (unsigned int cp, const char* p)
+{
+ /* If it is a lead byte, skip the next byte except if it is \0.
+ * If it is \0, it's not a valid DBCS string. */
+ return (IsDBCSLeadByteEx (cp, *p) && p[1] != '\0') ? p + 2 : p + 1;
+}
+
/**
* Returns _path without trailing slash if any
*
@@ -20,6 +32,7 @@
char* __mingw_fix_stat_path (const char* _path)
{
+ const unsigned int cp = __mingw_filename_cp ();
size_t len;
char *p;
@@ -28,24 +41,27 @@ char* __mingw_fix_stat_path (const char* _path)
if (_path && *_path) {
len = strlen (_path);
- /* Ignore X:\ */
-
+ /* Ignore X:\
+ * No ANSI or OEM code page uses ':' as a trail byte. (The code page 1361
+ * cannot be used as ANSI or OEM code page.) */
if (len <= 1 || ((len == 2 || len == 3) && _path[1] == ':'))
return p;
+ const char *r = _path;
+
/* Check UNC \\abc\<name>\ */
if ((_path[0] == '\\' || _path[0] == '/')
&& (_path[1] == '\\' || _path[1] == '/'))
{
- const char *r = &_path[2];
+ r = &_path[2];
while (*r != 0 && *r != '\\' && *r != '/')
- ++r;
+ r = next_char (cp, r);
if (*r != 0)
++r;
if (*r == 0)
return p;
while (*r != 0 && *r != '\\' && *r != '/')
- ++r;
+ r = next_char (cp, r);
if (*r != 0)
++r;
if (*r == 0)
@@ -54,6 +70,15 @@ char* __mingw_fix_stat_path (const char* _path)
if (_path[len - 1] == '/' || _path[len - 1] == '\\')
{
+ /* Return if the last character is a double-byte character.
+ * Its trail byte could be a '\' which must not be interpret
+ * as a directory separator. */
+ while (r[1] != '\0') {
+ r = next_char (cp, r);
+ if (*r == '\0')
+ return p;
+ }
+
p = (char*)malloc (len);
This logic here feels kinda complicated and unintuitive to prove that it
is right and doesn't risk accessing things outside the end of the buffer,
but I think it should be correct and safe.
(I did find and pick up the v2 of this patch, and have it applied to my
local branch of this patchset.)
// Martin
_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public