On 28/10/15 18:19, Jim Meyering wrote:
> On Wed, Oct 28, 2015 at 10:30 AM, Pádraig Brady <p...@draigbrady.com> wrote:
>> On 28/10/15 17:01, Jim Meyering wrote:
>>> On Wed, Oct 28, 2015 at 6:18 AM, Pádraig Brady <p...@draigbrady.com> wrote:
>>>> seq 10 | shuf --random-source="blah"$'\r'
>>>
>>> Thank you for pursuing this.
>>> Properly quoting unusual names like those is definitely welcome,
>>
>> Cool. At least with this patch, the quoting is consistent across all utils.
>> I.E. we were already using quote() in most places.
>>
>>> however, in the remaining 99% of use cases, I find the added quotes
>>> to be most unwelcome: at least two extra bytes per line, in addition to
>>> the common hassles with multi-byte rendering.
>>>
>>> What do you think about a mode that quotes only when necessary?
>>
>> What about distinguishing file names which account for many of these?
>> I.E. have quote_name() use "shell-escaped" quoting by default,
>> which would mean easier copy and pasting?
> 
> I like it.

gnulib part attached.

The coreutils part to call into this will be something like:

#define quotef(arg) quotearg_style (shell_escape_quoting_style, arg)

It will also be automatically available to `ls --quoting-style=shell-escape`
which is useful to give a concise and unambiguous presentation of file names.

cheers,
Pádraig.
From 308810eeb89d1eb75a5d8326b82be56ef2ef19bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Fri, 30 Oct 2015 22:49:16 +0000
Subject: [PATCH] quotearg: add support for $'' shell escaping

* lib/quotearg.h: Add "shell-escape" and "shell-escape-always"
items and descriptions:
* lib/quotearg.c (quotearg_buffer_restyled): Add support for the
above types by quoting like "shell", but using $'...' syntax
for non printable characters, which should provide unambiguous
printable output for any input.
* tests/test-quotearg-simple.c: Update accordingly.
---
 lib/quotearg.c               | 80 +++++++++++++++++++++++++++++++++++++-------
 lib/quotearg.h               | 31 +++++++++++++++++
 tests/test-quotearg-simple.c | 22 ++++++++++++
 3 files changed, 120 insertions(+), 13 deletions(-)

diff --git a/lib/quotearg.c b/lib/quotearg.c
index 9f91659..77896bf 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -77,6 +77,8 @@ char const *const quoting_style_args[] =
   "literal",
   "shell",
   "shell-always",
+  "shell-escape",
+  "shell-escape-always",
   "c",
   "c-maybe",
   "escape",
@@ -91,6 +93,8 @@ enum quoting_style const quoting_style_vals[] =
   literal_quoting_style,
   shell_quoting_style,
   shell_always_quoting_style,
+  shell_escape_quoting_style,
+  shell_escape_always_quoting_style,
   c_quoting_style,
   c_maybe_quoting_style,
   escape_quoting_style,
@@ -253,6 +257,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
   bool backslash_escapes = false;
   bool unibyte_locale = MB_CUR_MAX == 1;
   bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
+  bool pending_shell_escape_end = false;
 
 #define STORE(c) \
     do \
@@ -263,6 +268,36 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
       } \
     while (0)
 
+#define START_ESC() \
+    do \
+      { \
+        if (elide_outer_quotes) \
+          goto force_outer_quoting_style; \
+        escaping = true; \
+        if (quoting_style == shell_always_quoting_style \
+            && ! pending_shell_escape_end) \
+          { \
+            STORE ('\''); \
+            STORE ('$'); \
+            STORE ('\''); \
+            pending_shell_escape_end = true; \
+          } \
+        STORE ('\\'); \
+      } \
+    while (0)
+
+#define END_ESC() \
+    do \
+      { \
+        if (pending_shell_escape_end && ! escaping) \
+          { \
+            STORE ('\''); \
+            STORE ('\''); \
+            pending_shell_escape_end = false; \
+          } \
+      } \
+    while (0)
+
   switch (quoting_style)
     {
     case c_maybe_quoting_style:
@@ -321,11 +356,18 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
       }
       break;
 
+    case shell_escape_quoting_style:
+      backslash_escapes = true;
+      /* Fall through.  */
     case shell_quoting_style:
-      quoting_style = shell_always_quoting_style;
       elide_outer_quotes = true;
       /* Fall through.  */
+    case shell_escape_always_quoting_style:
+      if (!elide_outer_quotes)
+        backslash_escapes = true;
+      /* Fall through.  */
     case shell_always_quoting_style:
+      quoting_style = shell_always_quoting_style;
       if (!elide_outer_quotes)
         STORE ('\'');
       quote_string = "'";
@@ -345,8 +387,10 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
       unsigned char c;
       unsigned char esc;
       bool is_right_quote = false;
+      bool escaping = false;
 
       if (backslash_escapes
+          && quoting_style != shell_always_quoting_style
           && quote_string_len
           && (i + quote_string_len
               <= (argsize == SIZE_MAX && 1 < quote_string_len
@@ -367,15 +411,15 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
         case '\0':
           if (backslash_escapes)
             {
-              if (elide_outer_quotes)
-                goto force_outer_quoting_style;
-              STORE ('\\');
+              START_ESC ();
               /* If quote_string were to begin with digits, we'd need to
                  test for the end of the arg as well.  However, it's
                  hard to imagine any locale that would use digits in
                  quotes, and set_custom_quoting is documented not to
-                 accept them.  */
-              if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
+                 accept them.  Use only a single \0 with shell-escape
+                 as currently digits are not printed within $'...'  */
+              if (quoting_style != shell_always_quoting_style
+                  && i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
                 {
                   STORE ('0');
                   STORE ('0');
@@ -436,6 +480,14 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
         case '\t': esc = 't'; goto c_and_shell_escape;
         case '\v': esc = 'v'; goto c_escape;
         case '\\': esc = c;
+          /* Never need to escape '\' in shell case.  */
+          if (quoting_style == shell_always_quoting_style)
+            {
+              if (elide_outer_quotes)
+                goto force_outer_quoting_style;
+              goto store_c;
+            }
+
           /* No need to escape the escape if we are trying to elide
              outer quotes and nothing else is problematic.  */
           if (backslash_escapes && elide_outer_quotes && quote_string_len)
@@ -488,6 +540,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
               STORE ('\'');
               STORE ('\\');
               STORE ('\'');
+              pending_shell_escape_end = false;
             }
           break;
 
@@ -601,9 +654,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                   {
                     if (backslash_escapes && ! printable)
                       {
-                        if (elide_outer_quotes)
-                          goto force_outer_quoting_style;
-                        STORE ('\\');
+                        START_ESC ();
                         STORE ('0' + (c >> 6));
                         STORE ('0' + ((c >> 3) & 7));
                         c = '0' + (c & 7);
@@ -615,6 +666,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                       }
                     if (ilim <= i + 1)
                       break;
+                    END_ESC ();
                     STORE (c);
                     c = arg[++i];
                   }
@@ -624,18 +676,18 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
           }
         }
 
-      if (! ((backslash_escapes || elide_outer_quotes)
+      if (! (((backslash_escapes && quoting_style != shell_always_quoting_style)
+              || elide_outer_quotes)
              && quote_these_too
              && quote_these_too[c / INT_BITS] >> (c % INT_BITS) & 1)
           && !is_right_quote)
         goto store_c;
 
     store_escape:
-      if (elide_outer_quotes)
-        goto force_outer_quoting_style;
-      STORE ('\\');
+      START_ESC ();
 
     store_c:
+      END_ESC ();
       STORE (c);
     }
 
@@ -654,6 +706,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
  force_outer_quoting_style:
   /* Don't reuse quote_these_too, since the addition of outer quotes
      sufficiently quotes the specified characters.  */
+  if (quoting_style == shell_always_quoting_style && backslash_escapes)
+    quoting_style = shell_escape_always_quoting_style;
   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
                                    quoting_style,
                                    flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
diff --git a/lib/quotearg.h b/lib/quotearg.h
index d0ccd8b..2b7c93c 100644
--- a/lib/quotearg.h
+++ b/lib/quotearg.h
@@ -73,6 +73,37 @@ enum quoting_style
     */
     shell_always_quoting_style,
 
+    /* Quote names for the shell if they contain shell metacharacters
+       or other problematic characters (ls --quoting-style=shell-escape).
+       Non printable characters are quoted using the $'...' syntax,
+       which originated in ksh93 and is widely supported by most shells,
+       and proposed for inclusion in POSIX.
+
+       quotearg_buffer:
+       "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\\'", "a:b"
+       quotearg:
+       "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\\'", "a:b"
+       quotearg_colon:
+       "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\\'", "'a:b'"
+    */
+    shell_escape_quoting_style,
+
+    /* Quote names for the shell even if they would normally not
+       require quoting (ls --quoting-style=shell-escape).
+       Non printable characters are quoted using the $'...' syntax,
+       which originated in ksh93 and is widely supported by most shells,
+       and proposed for inclusion in POSIX.  Behaves like
+       shell_escape_quoting_style if QA_ELIDE_OUTER_QUOTES is in effect.
+
+       quotearg_buffer:
+       "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\'", "a:b"
+       quotearg:
+       "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\'", "a:b"
+       quotearg_colon:
+       "simple", "''$'\\0'' '$'\\t\\n'\\''\"'$'\\033''??/\'", "'a:b'"
+    */
+    shell_escape_always_quoting_style,
+
     /* Quote names as for a C language string (ls --quoting-style=c).
        Behaves like c_maybe_quoting_style if QA_ELIDE_OUTER_QUOTES is
        in effect.  Split into consecutive strings if
diff --git a/tests/test-quotearg-simple.c b/tests/test-quotearg-simple.c
index b579af7..516e890 100644
--- a/tests/test-quotearg-simple.c
+++ b/tests/test-quotearg-simple.c
@@ -58,6 +58,28 @@ static struct result_groups results_g[] = {
     { "''", "'1'", 3, "'simple'", "' \t\n'\\''\"\033?""?/\\'", "'a:b'",
       "'a\\b'", "'" LQ RQ "'", "'" LQ RQ "'" } },
 
+  /* shell_escape_quoting_style */
+  { { "''", "''$'\\0''1'$'\\0'", 15, "simple",
+      "' '$'\\t\\n'\\''\"'$'\\033''?""?/\\'", "a:b",
+      "'a\\b'", "''$'" LQ_ENC RQ_ENC "'", LQ RQ },
+    { "''", "''$'\\0''1'$'\\0'", 15, "simple",
+      "' '$'\\t\\n'\\''\"'$'\\033''?""?/\\'", "a:b",
+      "'a\\b'", "''$'" LQ_ENC RQ_ENC "'", LQ RQ },
+    { "''", "''$'\\0''1'$'\\0'", 15, "simple",
+      "' '$'\\t\\n'\\''\"'$'\\033''?""?/\\'", "'a:b'",
+      "'a\\b'", "''$'" LQ_ENC RQ_ENC "'", LQ RQ } },
+
+  /* shell_escape_always_quoting_style */
+  { { "''", "''$'\\0''1'$'\\0'", 15, "'simple'",
+      "' '$'\\t\\n'\\''\"'$'\\033''?""?/\\'", "'a:b'",
+      "'a\\b'", "''$'" LQ_ENC RQ_ENC "'", "'" LQ RQ "'" },
+    { "''", "''$'\\0''1'$'\\0'", 15, "'simple'",
+      "' '$'\\t\\n'\\''\"'$'\\033''?""?/\\'", "'a:b'",
+      "'a\\b'", "''$'" LQ_ENC RQ_ENC "'", "'" LQ RQ "'" },
+    { "''", "''$'\\0''1'$'\\0'", 15, "'simple'",
+      "' '$'\\t\\n'\\''\"'$'\\033''?""?/\\'", "'a:b'",
+      "'a\\b'", "''$'" LQ_ENC RQ_ENC "'", "'" LQ RQ "'" } },
+
   /* c_quoting_style */
   { { "\"\"", "\"\\0001\\0\"", 9, "\"simple\"",
       "\" \\t\\n'\\\"\\033?""?/\\\\\"", "\"a:b\"", "\"a\\\\b\"",
-- 
2.5.0

Reply via email to