> From: Mark H Weaver <m...@netris.org> > Cc: Paul Eggert <egg...@cs.ucla.edu>, br...@clisp.org, bug-gnulib@gnu.org > Date: Wed, 07 Nov 2012 22:49:36 -0500 > > Eli Zaretskii <e...@gnu.org> writes: > >> Eli, I see that Bruno asked for a set of Windows-syntax filenames to > >> extend the unit test. Would you be willing to work on that? > > > > I can come up with file names that could be used to test the code, if > > this will help, but I won't have resources to make a complete patch > > for the unit test program(s), sorry. > > That would be very helpful!
Sorry for the delay. I found that the sources changed in gnulib since I patched them, so I went back and redid the changes relative to the current version. You will find below, in addition to the test cases and the test program I used, also patches for both canonicalize-lgpl.c and canonicalize.c. > Hopefully the rest will be an easy job for someone familiar with the > gnulib unit tests, but if no one steps forward, I'll do it. > > > I just hope that whatever I submit in addition to what I already did > > will not be left collecting dust for another year. > > I'll do what I can to prevent that outcome. Hopefully Paul can help us > get your patch committed. Thanks. Looking forward to see this resolved. Here are the test cases I tried and their results. My current directory when running these tests was d:\usr\eli\utils\CURDIR. 'foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' '.\foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' './foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' '..\foo.bar' => 'd:\usr\eli\utils\foo.bar' '../foo.bar' => 'd:\usr\eli\utils\foo.bar' '../CURDIR/foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' '..\CURDIR\foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' '..\CURDIR/foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' '..\CURDIR\foo.bar\' => '(null)' '../CURDIR\foo.bar' => 'd:\usr\eli\utils\CURDIR/foo.bar' 'C:WINDOWS' => '(null)' 'C:system32' => '(null)' 'C:/WINDOWS' => 'C:/WINDOWS' 'C:\WINDOWS' => 'C:/WINDOWS' 'C:\\WINDOWS' => 'C:/WINDOWS' 'C:./WINDOWS' => '(null)' 'C:./system32' => '(null)' 'C:.\WINDOWS' => '(null)' 'C:.\system32' => '(null)' 'C:.' => 'd:\usr\eli\utils\lib' 'C:/' => 'C:/' 'C:\' => 'C:/' The "(null)" results are failures, and they all mean one thing: the patched functions still do not support drive-relative file names. I don't consider it a significant omission, since these names are very rare, and OTOH supporting them would require much more invasive changes in the original gnulib code. So I decided to omit that. Here's the test program I used: /* Before running, type: "cd C:\WINDOWS" at the shell prompt. */ #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> extern char * canonicalize_file_name (const char *); int main (void) { const char *test_fname[] = { "foo.bar", ".\\foo.bar", "./foo.bar", "..\\foo.bar", "../foo.bar", "../%s/foo.bar", "..\\%s\\foo.bar", "..\\%s/foo.bar", "..\\%s\\foo.bar\\", "../%s\\foo.bar", "C:WINDOWS", "C:system32", "C:/WINDOWS", "C:\\WINDOWS", "C:\\\\WINDOWS", "C:./WINDOWS", "C:./system32", "C:.\\WINDOWS", "C:.\\system32", "C:.", "C:/", "C:\\" }; int i; char curdir[MAX_PATH], *pc; getcwd (curdir, MAX_PATH); for (pc = curdir + strlen (curdir); !ISSLASH (pc[-1]); pc--) ; for (i = 0; i < sizeof (test_fname) / sizeof (test_fname[0]); i++) { char fname[MAX_PATH]; sprintf (fname, test_fname[i], pc); printf ("'%s' => '%s'\n", fname, canonicalize_file_name (fname)); } return 0; } Here are the patched to the two gnulib source files: --- canonicalize-lgpl.c~0 2012-11-19 07:28:15.584793000 +0200 +++ canonicalize-lgpl.c 2012-11-19 08:31:46.886703100 +0200 @@ -51,6 +51,7 @@ # define __realpath realpath # include "pathmax.h" # include "malloca.h" +# include "dosname.h" # if HAVE_GETCWD # if IN_RELOCWRAPPER /* When building the relocatable program wrapper, use the system's getcwd @@ -131,6 +132,7 @@ const char *start, *end, *rpath_limit; long int path_max; int num_links = 0; + size_t prefix_len; if (name == NULL) { @@ -173,7 +175,11 @@ rpath = resolved; rpath_limit = rpath + path_max; - if (name[0] != '/') + /* This is always zero for Posix hosts, but can be 2 for MS-Windows + and MS-DOS X:/foo/bar file names. */ + prefix_len = FILE_SYSTEM_PREFIX_LEN (name); + + if (!IS_ABSOLUTE_FILE_NAME (name)) { if (!__getcwd (rpath, path_max)) { @@ -184,17 +190,22 @@ } else { - rpath[0] = '/'; - dest = rpath + 1; + dest = rpath; + if (prefix_len) + { + memcpy (rpath, name, prefix_len); + dest += prefix_len; + } + *dest++ = '/'; if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { - if (name[1] == '/' && name[2] != '/') + if (ISSLASH (name[1]) && !ISSLASH (name[2]) && !prefix_len) *dest++ = '/'; *dest = '\0'; } } - for (start = end = name; *start; start = end) + for (start = end = name + prefix_len; *start; start = end) { #ifdef _LIBC struct stat64 st; @@ -204,11 +215,11 @@ int n; /* Skip sequence of multiple path-separators. */ - while (*start == '/') + while (ISSLASH (*start)) ++start; /* Find end of path component. */ - for (end = start; *end && *end != '/'; ++end) + for (end = start; *end && !ISSLASH (*end); ++end) /* Nothing. */; if (end - start == 0) @@ -218,17 +229,18 @@ else if (end - start == 2 && start[0] == '.' && start[1] == '.') { /* Back up to previous component, ignore if at root already. */ - if (dest > rpath + 1) - while ((--dest)[-1] != '/'); - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 - && *dest == '/' && dest[1] != '/') + if (dest > rpath + prefix_len + 1) + for ( --dest; !ISSLASH (dest[-1]); --dest); + if (DOUBLE_SLASH_IS_DISTINCT_ROOT + && dest == rpath + 1 && !prefix_len + && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; } else { size_t new_size; - if (dest[-1] != '/') + if (!ISSLASH (dest[-1])) *dest++ = '/'; if (dest + (end - start) >= rpath_limit) @@ -239,7 +251,7 @@ if (resolved) { __set_errno (ENAMETOOLONG); - if (dest > rpath + 1) + if (dest > rpath + prefix_len + 1) dest--; *dest = '\0'; goto error; @@ -329,24 +341,31 @@ memmove (&extra_buf[n], end, len + 1); name = end = memcpy (extra_buf, buf, n); - if (buf[0] == '/') + if (IS_ABSOLUTE_FILE_NAME (buf)) { - dest = rpath + 1; /* It's an absolute symlink */ + size_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); + + if (pfxlen) + memcpy (rpath, buf, pfxlen); + dest = rpath + pfxlen; + *dest++ = '/'; /* It's an absolute symlink */ if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { - if (buf[1] == '/' && buf[2] != '/') + if (ISSLASH (buf[1]) && !ISSLASH (buf[2]) && !pfxlen) *dest++ = '/'; *dest = '\0'; } + /* Install the new prefix to be in effect hereafter. */ + prefix_len = pfxlen; } else { /* Back up to previous component, ignore if at root already: */ - if (dest > rpath + 1) - while ((--dest)[-1] != '/'); + if (dest > rpath + prefix_len + 1) + for (--dest; !ISSLASH (dest[-1]); --dest); if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 - && *dest == '/' && dest[1] != '/') + && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len) dest++; } } @@ -357,10 +376,10 @@ } } } - if (dest > rpath + 1 && dest[-1] == '/') + if (dest > rpath + prefix_len + 1 && ISSLASH (dest[-1])) --dest; - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 - && *dest == '/' && dest[1] != '/') + if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rpath + 1 && !prefix_len + && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; *dest = '\0'; --- canonicalize.c~0 2012-10-24 12:32:12.000000000 +0200 +++ canonicalize.c 2012-11-19 08:45:52.450238000 +0200 @@ -31,4 +31,5 @@ #include "xalloc.h" #include "xgetcwd.h" +#include "dosname.h" #define MULTIPLE_BITS_SET(i) (((i) & ((i) - 1)) != 0) @@ -51,4 +73,10 @@ The result is malloc'd. */ +#if ISSLASH('\\') +# define SLASHES "/\\" +#else +# define SLASHES "/" +#endif + char * canonicalize_file_name (const char *name) @@ -101,4 +139,5 @@ int can_flags = can_mode & ~CAN_MODE_MASK; bool logical = can_flags & CAN_NOLINKS; + size_t prefix_len; can_mode &= CAN_MODE_MASK; @@ -122,5 +161,9 @@ } - if (name[0] != '/') + /* This is always zero for Posix hosts, but can be 2 for MS-Windows + and MS-DOS X:/foo/bar file names. */ + prefix_len = FILE_SYSTEM_PREFIX_LEN (name); + + if (!IS_ABSOLUTE_FILE_NAME (name)) { rname = xgetcwd (); @@ -144,9 +187,14 @@ rname = xmalloc (PATH_MAX); rname_limit = rname + PATH_MAX; - rname[0] = '/'; - dest = rname + 1; + dest = rname; + if (prefix_len) + { + memcpy (rname, name, prefix_len); + dest += prefix_len; + } + *dest++ = '/'; if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { - if (name[1] == '/' && name[2] != '/') + if (ISSLASH (name[1]) && !ISSLASH (name[2]) && !prefix_len) *dest++ = '/'; *dest = '\0'; @@ -154,12 +202,12 @@ } - for (start = name; *start; start = end) + for (start = name + prefix_len; *start; start = end) { /* Skip sequence of multiple file name separators. */ - while (*start == '/') + while (ISSLASH (*start)) ++start; /* Find end of component. */ - for (end = start; *end && *end != '/'; ++end) + for (end = start; *end && !ISSLASH (*end); ++end) /* Nothing. */; @@ -171,8 +219,8 @@ { /* Back up to previous component, ignore if at root already. */ - if (dest > rname + 1) - while ((--dest)[-1] != '/'); + if (dest > rname + prefix_len + 1) + for ( --dest; !ISSLASH (dest[-1]); --dest); if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 - && *dest == '/' && dest[1] != '/') + && !prefix_len && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; } @@ -181,5 +229,5 @@ struct stat st; - if (dest[-1] != '/') + if (!ISSLASH (dest[-1])) *dest++ = '/'; @@ -217,5 +265,5 @@ if (can_mode == CAN_ALL_BUT_LAST) { - if (end[strspn (end, "/")] || saved_errno != ENOENT) + if (end[strspn (end, SLASHES)] || saved_errno != ENOENT) goto error; continue; @@ -269,13 +317,20 @@ name = end = memcpy (extra_buf, buf, n); - if (buf[0] == '/') + if (IS_ABSOLUTE_FILE_NAME (buf)) { - dest = rname + 1; /* It's an absolute symlink */ + size_t pfxlen = FILE_SYSTEM_PREFIX_LEN (buf); + + if (pfxlen) + memcpy (rname, buf, pfxlen); + dest = rname + pfxlen; + *dest++ = '/'; /* It's an absolute symlink */ if (DOUBLE_SLASH_IS_DISTINCT_ROOT) { - if (buf[1] == '/' && buf[2] != '/') + if (ISSLASH (buf[1]) && !ISSLASH (buf[2]) && !pfxlen) *dest++ = '/'; *dest = '\0'; } + /* Install the new prefix to be in effect hereafter. */ + prefix_len = pfxlen; } else @@ -283,8 +338,8 @@ /* Back up to previous component, ignore if at root already: */ - if (dest > rname + 1) - while ((--dest)[-1] != '/'); + if (dest > rname + prefix_len + 1) + for (--dest; !ISSLASH (dest[-1]); --dest); if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 - && *dest == '/' && dest[1] != '/') + && ISSLASH (*dest) && !ISSLASH (dest[1]) && !prefix_len) dest++; } @@ -302,8 +357,8 @@ } } - if (dest > rname + 1 && dest[-1] == '/') + if (dest > rname + prefix_len + 1 && ISSLASH (dest[-1])) --dest; - if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 - && *dest == '/' && dest[1] != '/') + if (DOUBLE_SLASH_IS_DISTINCT_ROOT && dest == rname + 1 && !prefix_len + && ISSLASH (*dest) && !ISSLASH (dest[1])) dest++; *dest = '\0';