Skip filenames that cannot be converted to the active code page in a lossless manner. If the end of the directory is successfully reached, return NULL with errno = EILSEQ for one time. If readdir() is called again, errno won't be modified again.
If a multibyte filename is too long to fit into struct dirent.d_name[], do the same as above except use ENAMETOOLONG. If both ENAMETOOLONG and EILSEQ occur, only the last one is remembered, and only one delayed error is reported at the end of the directory. The delaying of EILSEQ and ENAMETOOLONG allows applications to still see all other filenames in the directory. EILSEQ could be returned immediately too but few apps would continue calling readdir() after EILSEQ or ENAMETOOLONG (doing so might not be portable). As a side effect, readdir() no longer crashes if a very long multibyte filename is found. A filename with 255 wide characters can be 255 * 3 = 765 bytes (without the terminating null character) when using the UTF-8 code page. readdir() calls UCRT's _findnext() which crashes when it finds a filename that doesn't fit into MAX_PATH (260) bytes. Now the multibyte version of readdir() uses _wfindnext() which works. The use of fullPath and bufferForFullPath is weird because one could use the array directly like it was done previously. It was done this way to make the following commits neater. Best-fit mapping is a lossy conversion, thus the resulting filenames cannot be used to access the correct file. Best-fit mapping might result, for example, in a directory traversal attack if an attacker can control the filenames. See: https://devco.re/blog/2025/01/09/worstfit-unveiling-hidden-transformers-in-windows-ansi/ https://worst.fit/ --- mingw-w64-crt/misc/dirent.c | 148 ++++++++++++++++++++++++++------- mingw-w64-headers/crt/dirent.h | 10 ++- 2 files changed, 128 insertions(+), 30 deletions(-) diff --git a/mingw-w64-crt/misc/dirent.c b/mingw-w64-crt/misc/dirent.c index 8bda0b169..b1195fa44 100644 --- a/mingw-w64-crt/misc/dirent.c +++ b/mingw-w64-crt/misc/dirent.c @@ -28,8 +28,6 @@ #include <windows.h> /* for GetFileAttributes */ #include <tchar.h> -#define SUFFIX _T("*") -#define SLASH _T("\\") /* @@ -43,7 +41,8 @@ _topendir (const _TCHAR *szPath) { _TDIR *nd; unsigned int rc; - _TCHAR szFullPath[MAX_PATH]; + wchar_t bufferForFullPath[MAX_PATH]; + wchar_t *fullPath; if (!szPath) { @@ -73,20 +72,58 @@ _topendir (const _TCHAR *szPath) } /* Make an absolute pathname. */ - if (_tfullpath (szFullPath, szPath, MAX_PATH) == NULL) +#ifdef _UNICODE + fullPath = _wfullpath (bufferForFullPath, szPath, MAX_PATH); +#else + { + /* Convert szPath to wide char. */ + wchar_t *wzPath; + int wzPathSize = MultiByteToWideChar (CP_ACP, MB_ERR_INVALID_CHARS, + szPath, -1, NULL, 0); + if (wzPathSize <= 0) + { + errno = EILSEQ; + return NULL; + } + + wzPath = malloc (wzPathSize * sizeof (wchar_t)); + if (wzPath == NULL) + { + errno = ENOMEM; + return NULL; + } + + if (MultiByteToWideChar (CP_ACP, MB_ERR_INVALID_CHARS, + szPath, -1, wzPath, wzPathSize) != wzPathSize) + { + free (wzPath); + errno = EILSEQ; + return NULL; + } + + fullPath = _wfullpath (bufferForFullPath, wzPath, MAX_PATH); + + /* If _wfullpath failed, we need its errno value. */ + int savedErrno = errno; + free (wzPath); + errno = savedErrno; + } +#endif + + if (!fullPath) { /* It was a directory when GetFileAttributes was called but * the absolute pathname could not be created now. Use the - * errno value set by _tfullpath. */ + * errno value set by _wfullpath. */ return NULL; } + size_t fullPathLen = wcslen (fullPath); + /* Allocate enough space to store DIR structure and the complete * directory path given. */ - nd = (_TDIR *) malloc (sizeof (_TDIR) + (_tcslen (szFullPath) - + _tcslen (SLASH) - + _tcslen (SUFFIX) + 1) - * sizeof (_TCHAR)); + nd = (_TDIR *) malloc (sizeof (_TDIR) + (fullPathLen + 2 + 1) + * sizeof (wchar_t)); if (!nd) { @@ -96,18 +133,19 @@ _topendir (const _TCHAR *szPath) } /* Create the search expression. */ - _tcscpy (nd->dd_name, szFullPath); + memcpy (nd->dd_name, fullPath, (fullPathLen + 1) * sizeof (wchar_t)); /* Add on a slash if the path does not end with one. */ - if (nd->dd_name[0] != _T('\0') && - nd->dd_name[_tcslen (nd->dd_name) - 1] != _T('/') && - nd->dd_name[_tcslen (nd->dd_name) - 1] != _T('\\')) + if (fullPathLen > 0 && + nd->dd_name[fullPathLen - 1] != L'/' && + nd->dd_name[fullPathLen - 1] != L'\\') { - _tcscat (nd->dd_name, SLASH); + nd->dd_name[fullPathLen++] = L'\\'; } /* Add on the search pattern */ - _tcscat (nd->dd_name, SUFFIX); + nd->dd_name[fullPathLen++] = L'*'; + nd->dd_name[fullPathLen] = L'\0'; /* Initialize handle to -1 so that a premature closedir doesn't try * to call _findclose on it. */ @@ -116,6 +154,11 @@ _topendir (const _TCHAR *szPath) /* Initialize the status. */ nd->dd_stat = 0; +#ifndef _UNICODE + /* Initially there are no delayed errors. */ + nd->dd_errno = 0; +#endif + /* Initialize the dirent structure. ino and reclen are invalid under * Win32. */ memset (&nd->dd_dir, 0, sizeof (nd->dd_dir)); @@ -148,25 +191,37 @@ _treaddir (_TDIR * dirp) if (dirp->dd_stat < 0) { - /* We have already returned all files in the directory + /* Unless there is a delayed error from skipped filenames, + * we have already returned all files in the directory * (or the structure has an invalid dd_stat). At the end * of the directory, errno must not be modified. */ +#ifndef _UNICODE + /* If filenames were skipped, use the delayed error number. + * Do so only once to prevent an infinite loop in case the + * caller considers the error number non-serious and tries + * to resume reading the directory. */ + if (dirp->dd_errno != 0) + { + errno = dirp->dd_errno; + dirp->dd_errno = 0; + } +#endif return NULL; } else if (dirp->dd_stat == 0) { /* We haven't started the search yet. */ /* Start the search */ - dirp->dd_handle = _tfindfirst (dirp->dd_name, &(dirp->dd_dta)); + dirp->dd_handle = _wfindfirst (dirp->dd_name, &(dirp->dd_dta)); if (dirp->dd_handle == -1) { /* There are no files in the directory or an error occurred. - * _tfindfirst sets errno to ENOENT if the directory is empty + * _wfindfirst sets errno to ENOENT if the directory is empty * but readdir must not do that. * * Note that the interesting value from GetLastError is different - * than with _tfindnext. Here even the entries "." and ".." don't + * than with _wfindnext. Here even the entries "." and ".." don't * exist, so no files can be found. The root directory of an empty * drive is an example. */ DWORD winerr = GetLastError (); @@ -181,15 +236,29 @@ _treaddir (_TDIR * dirp) } else { +#ifndef _UNICODE +again: +#endif /* Get the next search entry. */ - if (_tfindnext (dirp->dd_handle, &(dirp->dd_dta))) + if (_wfindnext (dirp->dd_handle, &(dirp->dd_dta))) { /* We are off the end or otherwise error. - _tfindnext sets errno to ENOENT at the end - of the directory but readdir must not do that. */ + _wfindnext sets errno to ENOENT at the end + of the directory but readdir must not do that. + Because we have read at least one filename, + there might be a delayed error in dd_errno. */ DWORD winerr = GetLastError (); if (winerr != ERROR_NO_MORE_FILES) - finalErrno = errno; + { + finalErrno = errno; + } +#ifndef _UNICODE + else if (dirp->dd_errno != 0) + { + finalErrno = dirp->dd_errno; + dirp->dd_errno = 0; + } +#endif _findclose (dirp->dd_handle); dirp->dd_handle = -1; dirp->dd_stat = -1; @@ -204,11 +273,28 @@ _treaddir (_TDIR * dirp) if (dirp->dd_stat > 0) { - /* Successfully got an entry. Everything about the file is - * already appropriately filled in except the length of the - * file name. */ - dirp->dd_dir.d_namlen = _tcslen (dirp->dd_dta.name); - _tcscpy (dirp->dd_dir.d_name, dirp->dd_dta.name); + /* Successfully got an entry. */ +#ifdef _UNICODE + /* Everything about the file is already appropriately filled in + * except the length of the filename and the name itself. */ + dirp->dd_dir.d_namlen = wcslen (dirp->dd_dta.name); + wcscpy (dirp->dd_dir.d_name, dirp->dd_dta.name); +#else + /* Convert to multibyte. */ + BOOL wasLossy = TRUE; + int convResult = WideCharToMultiByte (CP_ACP, WC_NO_BEST_FIT_CHARS, + dirp->dd_dta.name, -1, + dirp->dd_dir.d_name, + sizeof (dirp->dd_dir.d_name), + NULL, &wasLossy); + if (convResult <= 0 || wasLossy) + { + dirp->dd_errno = (convResult <= 0) ? ENAMETOOLONG : EILSEQ; + goto again; + } + + dirp->dd_dir.d_namlen = (unsigned short) (convResult - 1); +#endif /* It shouldn't be necessary to preserve errno when we return non-NULL. * Do it anyway. */ @@ -272,6 +358,9 @@ _trewinddir (_TDIR * dirp) dirp->dd_handle = -1; dirp->dd_stat = 0; +#ifndef _UNICODE + dirp->dd_errno = 0; +#endif } /* @@ -324,6 +413,9 @@ _tseekdir (_TDIR * dirp, long lPos) } dirp->dd_handle = -1; dirp->dd_stat = -1; +#ifndef _UNICODE + dirp->dd_errno = 0; +#endif } else { diff --git a/mingw-w64-headers/crt/dirent.h b/mingw-w64-headers/crt/dirent.h index 2d7a1b73f..272662d65 100644 --- a/mingw-w64-headers/crt/dirent.h +++ b/mingw-w64-headers/crt/dirent.h @@ -38,7 +38,7 @@ struct dirent typedef struct { /* disk transfer area for this dir */ - struct _finddata_t dd_dta; + struct _wfinddata_t dd_dta; /* dirent struct to return from dir (NOTE: this makes this thread * safe as long as only one thread uses a particular DIR struct at @@ -56,8 +56,14 @@ typedef struct */ int dd_stat; + /* If readdir skips any filenames, this is set to a non-zero error + * number. If dd_errno is non-zero at the end of the directory, + * readdir sets errno = dd_errno, dd_errno = 0, and returns NULL. + * So if readdir is called again, it won't modify errno again. */ + unsigned int dd_errno; + /* given path for dir with search pattern (struct is extended) */ - char dd_name[1]; + wchar_t dd_name[1]; } DIR; DIR* __cdecl __MINGW_NOTHROW opendir (const char*); -- 2.47.1 _______________________________________________ Mingw-w64-public mailing list Mingw-w64-public@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mingw-w64-public