https://gcc.gnu.org/g:5e1d530da87a6d2aa7e719744cb278e7e54a6623

commit r15-2063-g5e1d530da87a6d2aa7e719744cb278e7e54a6623
Author: Andrew Burgess <aburg...@redhat.com>
Date:   Sat Feb 10 11:22:13 2024 +0000

    libiberty/buildargv: handle input consisting of only white space
    
    GDB makes use of the libiberty function buildargv for splitting the
    inferior (program being debugged) argument string in the case where
    the inferior is not being started under a shell.
    
    I have recently been working to improve this area of GDB, and noticed
    some unexpected behaviour to the libiberty function buildargv, when
    the input is a string consisting only of white space.
    
    What I observe is that if the input to buildargv is a string
    containing only white space, then buildargv will return an argv list
    containing a single empty argument, e.g.:
    
      char **argv = buildargv (" ");
      assert (*argv[0] == '\0');
      assert (argv[1] == NULL);
    
    We get the same output from buildargv if the input is a single space,
    or multiple spaces.  Other white space characters give the same
    results.
    
    This doesn't seem right to me, and in fact, there appears to be a work
    around for this issue in expandargv where we have this code:
    
      /* If the file is empty or contains only whitespace, buildargv would
         return a single empty argument.  In this context we want no arguments,
         instead.  */
      if (only_whitespace (buffer))
        {
          file_argv = (char **) xmalloc (sizeof (char *));
          file_argv[0] = NULL;
        }
      else
        /* Parse the string.  */
        file_argv = buildargv (buffer);
    
    I think that the correct behaviour in this situation is to return an
    empty argv array, e.g.:
    
      char **argv = buildargv (" ");
      assert (argv[0] == NULL);
    
    And it turns out that this is a trivial change to buildargv.  The diff
    does look big, but this is because I've re-indented a block.  Check
    with 'git diff -b' to see the minimal changes.  I've also removed the
    work around from expandargv.
    
    When testing this sort of thing I normally write the tests first, and
    then fix the code.  In this case test-expandargv.c has sort-of been
    used as a mechanism for testing the buildargv function (expandargv
    does call buildargv most of the time), however, for this particular
    issue the work around in expandargv (mentioned above) masked the
    buildargv bug.
    
    I did consider adding a new test-buildargv.c file, however, this would
    have basically been a copy & paste of test-expandargv.c (with some
    minor changes to call buildargv).  This would be fine now, but feels
    like we would eventually end up with one file not being updated as
    much as the other, and so test coverage would suffer.
    
    Instead, I have added some explicit buildargv testing to the
    test-expandargv.c file, this reuses the test input that is already
    defined for expandargv.
    
    Of course, once I removed the work around from expandargv then we now
    do always call buildargv from expandargv, and so the bug I'm fixing
    would impact both expandargv and buildargv, so maybe the new testing
    is redundant?  I tend to think more testing is always better, so I've
    left it in for now.
    
    2024-07-16  Andrew Burgess  <aburg...@redhat.com>
    
    libiberty/
    
            * argv.c (buildargv): Treat input of only whitespace as an empty
            argument list.
            (expandargv): Remove work around for intput that is only
            whitespace.
            * testsuite/test-expandargv.c: Add new tests 10, 11, and 12.
            Extend testing to call buildargv in more cases.

Diff:
---
 libiberty/argv.c                      | 108 +++++++++++++--------------
 libiberty/testsuite/test-expandargv.c | 136 ++++++++++++++++++++++++++++------
 2 files changed, 166 insertions(+), 78 deletions(-)

diff --git a/libiberty/argv.c b/libiberty/argv.c
index d9d32e59e720..675336273f3a 100644
--- a/libiberty/argv.c
+++ b/libiberty/argv.c
@@ -212,71 +212,74 @@ char **buildargv (const char *input)
              argv[argc] = NULL;
            }
          /* Begin scanning arg */
-         arg = copybuf;
-         while (*input != EOS)
+         if (*input != EOS)
            {
-             if (ISSPACE (*input) && !squote && !dquote && !bsquote)
+             arg = copybuf;
+             while (*input != EOS)
                {
-                 break;
-               }
-             else
-               {
-                 if (bsquote)
-                   {
-                     bsquote = 0;
-                     if (*input != '\n')
-                       *arg++ = *input;
-                   }
-                 else if (*input == '\\'
-                          && !squote
-                          && (!dquote
-                              || strchr ("$`\"\\\n", *(input + 1)) != NULL))
+                 if (ISSPACE (*input) && !squote && !dquote && !bsquote)
                    {
-                     bsquote = 1;
-                   }
-                 else if (squote)
-                   {
-                     if (*input == '\'')
-                       {
-                         squote = 0;
-                       }
-                     else
-                       {
-                         *arg++ = *input;
-                       }
+                     break;
                    }
-                 else if (dquote)
+                 else
                    {
-                     if (*input == '"')
+                     if (bsquote)
                        {
-                         dquote = 0;
+                         bsquote = 0;
+                         if (*input != '\n')
+                           *arg++ = *input;
                        }
-                     else
+                     else if (*input == '\\'
+                              && !squote
+                              && (!dquote
+                                  || strchr ("$`\"\\\n", *(input + 1)) != 
NULL))
                        {
-                         *arg++ = *input;
+                         bsquote = 1;
                        }
-                   }
-                 else
-                   {
-                     if (*input == '\'')
+                     else if (squote)
                        {
-                         squote = 1;
+                         if (*input == '\'')
+                           {
+                             squote = 0;
+                           }
+                         else
+                           {
+                             *arg++ = *input;
+                           }
                        }
-                     else if (*input == '"')
+                     else if (dquote)
                        {
-                         dquote = 1;
+                         if (*input == '"')
+                           {
+                             dquote = 0;
+                           }
+                         else
+                           {
+                             *arg++ = *input;
+                           }
                        }
                      else
                        {
-                         *arg++ = *input;
+                         if (*input == '\'')
+                           {
+                             squote = 1;
+                           }
+                         else if (*input == '"')
+                           {
+                             dquote = 1;
+                           }
+                         else
+                           {
+                             *arg++ = *input;
+                           }
                        }
+                     input++;
                    }
-                 input++;
                }
+             *arg = EOS;
+             argv[argc] = xstrdup (copybuf);
+             argc++;
            }
-         *arg = EOS;
-         argv[argc] = xstrdup (copybuf);
-         argc++;
          argv[argc] = NULL;
 
          consume_whitespace (&input);
@@ -439,17 +442,8 @@ expandargv (int *argcp, char ***argvp)
        }
       /* Add a NUL terminator.  */
       buffer[len] = '\0';
-      /* If the file is empty or contains only whitespace, buildargv would
-        return a single empty argument.  In this context we want no arguments,
-        instead.  */
-      if (only_whitespace (buffer))
-       {
-         file_argv = (char **) xmalloc (sizeof (char *));
-         file_argv[0] = NULL;
-       }
-      else
-       /* Parse the string.  */
-       file_argv = buildargv (buffer);
+      /* Parse the string.  */
+      file_argv = buildargv (buffer);
       /* If *ARGVP is not already dynamically allocated, copy it.  */
       if (*argvp == original_argv)
        *argvp = dupargv (*argvp);
diff --git a/libiberty/testsuite/test-expandargv.c 
b/libiberty/testsuite/test-expandargv.c
index ea1aeb0eda2b..ca7031eaf689 100644
--- a/libiberty/testsuite/test-expandargv.c
+++ b/libiberty/testsuite/test-expandargv.c
@@ -176,6 +176,30 @@ const char *test_data[] = {
   "\\t",
   0,
 
+  /* Test 10 - Mixed white space characters.  */
+  "\t \n \t ",         /* Test 10 data */
+  ARGV0,
+  "@test-expandargv-10.lst",
+  0,
+  ARGV0,
+  0,
+
+  /* Test 11 - Single ' ' character.  */
+  " ",         /* Test 11 data */
+  ARGV0,
+  "@test-expandargv-11.lst",
+  0,
+  ARGV0,
+  0,
+
+  /* Test 12 - Multiple ' ' characters.  */
+  "   ",               /* Test 12 data */
+  ARGV0,
+  "@test-expandargv-12.lst",
+  0,
+  ARGV0,
+  0,
+
   0 /* Test done marker, don't remove. */
 };
 
@@ -265,6 +289,78 @@ erase_test (int test)
     fatal_error (__LINE__, "Failed to erase test file.", errno);
 }
 
+/* compare_argv:
+     TEST is the current test number, and NAME is a short string to identify
+     which libibery function is being tested.  ARGC_A and ARGV_A describe an
+     argument array, and this is compared to ARGC_B and ARGV_B, return 0 if
+     the two arrays match, otherwise return 1.  */
+
+static int
+compare_argv (int test, const char *name, int argc_a, char *argv_a[],
+             int argc_b, char *argv_b[])
+{
+  int failed = 0, k;
+
+  if (argc_a != argc_b)
+    {
+      printf ("FAIL: test-%s-%d.  Argument count didn't match\n", name, test);
+      failed = 1;
+    }
+  /* Compare each of the argv's ... */
+  else
+    for (k = 0; k < argc_a; k++)
+      if (strcmp (argv_a[k], argv_b[k]) != 0)
+       {
+         printf ("FAIL: test-%s-%d. Arguments don't match.\n", name, test);
+         failed = 1;
+         break;
+       }
+
+  if (!failed)
+    printf ("PASS: test-%s-%d.\n", name, test);
+
+  return failed;
+}
+
+/* test_buildargv
+     Test the buildargv function from libiberty.  TEST is the current test
+     number and TEST_INPUT is the string to pass to buildargv (after calling
+     run_replaces on it).  ARGC_AFTER and ARGV_AFTER are the expected
+     results.  Return 0 if the test passes, otherwise return 1.  */
+
+static int
+test_buildargv (int test, const char * test_input, int argc_after,
+               char *argv_after[])
+{
+  char * input, ** argv;
+  size_t len;
+  int argc, failed;
+
+  /* Generate RW copy of data for replaces */
+  len = strlen (test_input);
+  input = malloc (sizeof (char) * (len + 1));
+  if (input == NULL)
+    fatal_error (__LINE__, "Failed to malloc buildargv input buffer.", errno);
+
+  memcpy (input, test_input, sizeof (char) * (len + 1));
+  /* Run all possible replaces */
+  run_replaces (input);
+
+  /* Split INPUT into separate arguments.  */
+  argv = buildargv (input);
+
+  /* Count the arguments we got back.  */
+  argc = 0;
+  while (argv[argc])
+    ++argc;
+
+  failed = compare_argv (test, "buildargv", argc_after, argv_after, argc, 
argv);
+
+  free (input);
+  freeargv (argv);
+
+  return failed;
+}
 
 /* run_tests:
     Run expandargv
@@ -276,12 +372,16 @@ run_tests (const char **test_data)
 {
   int argc_after, argc_before;
   char ** argv_before, ** argv_after;
-  int i, j, k, fails, failed;
+  int i, j, k, fails;
+  const char * input_str;
 
   i = j = fails = 0;
   /* Loop over all the tests */
   while (test_data[j])
     {
+      /* Save original input in case we run a buildargv test.  */
+      input_str = test_data[j];
+
       /* Write test data */
       writeout_test (i, test_data[j++]);
       /* Copy argv before */
@@ -305,29 +405,23 @@ run_tests (const char **test_data)
       for (k = 0; k < argc_after; k++)
         run_replaces (argv_after[k]);
 
+      /* If the test input is just a file to expand then we can also test
+        calling buildargv directly as the expected output is equivalent to
+        calling buildargv on the contents of the file.
+
+        The results of calling buildargv will not include the ARGV0 constant,
+        which is why we pass 'argc_after - 1' and 'argv_after + 1', this skips
+        over the ARGV0 in the expected results.  */
+      if (argc_before == 2)
+       fails += test_buildargv (i, input_str, argc_after - 1, argv_after + 1);
+      else
+       printf ("SKIP: test-buildargv-%d.  This test isn't for buildargv\n", i);
+
       /* Run test: Expand arguments */
       expandargv (&argc_before, &argv_before);
 
-      failed = 0;
-      /* Compare size first */
-      if (argc_before != argc_after)
-        {
-          printf ("FAIL: test-expandargv-%d. Number of arguments don't 
match.\n", i);
-         failed++;
-        }
-      /* Compare each of the argv's ... */
-      else
-        for (k = 0; k < argc_after; k++)
-          if (strcmp (argv_before[k], argv_after[k]) != 0)
-            {
-              printf ("FAIL: test-expandargv-%d. Arguments don't match.\n", i);
-              failed++;
-            }
-
-      if (!failed)
-        printf ("PASS: test-expandargv-%d.\n", i);
-      else
-        fails++;
+      fails += compare_argv (i, "expandargv", argc_before, argv_before,
+                            argc_after, argv_after);
 
       freeargv (argv_before);
       freeargv (argv_after);

Reply via email to