Hi, I'm looking at adding a "java_quoting_style" to quotearg for use in Bison. After consulting <URI:http://java.sun.com/docs/books/jls/third_edition/html/lexical.html>, I've come up with the attached patch which isn't complete yet.
Unfortunately, my knowledge of C's (gnulib's?) handling of locales, coding systems, multibyte sequences and wide char- acters is non-existent, and so I don't know what the result of `quotearg ("äöü")' should look like and what it should depend on. Any comments and suggestions are highly appreciated :-). TIA, Tim
diff --git a/lib/quotearg.c b/lib/quotearg.c index da8ba1e..1c9a873 100644 --- a/lib/quotearg.c +++ b/lib/quotearg.c @@ -72,6 +72,7 @@ char const *const quoting_style_args[] = "escape", "locale", "clocale", + "java", 0 }; @@ -85,7 +86,8 @@ enum quoting_style const quoting_style_vals[] = c_maybe_quoting_style, escape_quoting_style, locale_quoting_style, - clocale_quoting_style + clocale_quoting_style, + java_quoting_style }; /* The default quoting options. */ @@ -295,6 +297,14 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, elide_outer_quotes = false; break; + case java_quoting_style: + if (!elide_outer_quotes) + STORE ('"'); + backslash_escapes = true; + quote_string = "\""; + quote_string_len = 1; + break; + default: abort (); } @@ -382,13 +392,24 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, } break; - case '\a': esc = 'a'; goto c_escape; + case '\a': esc = quoting_style == java_quoting_style ? '7' : 'a'; goto c_escape; case '\b': esc = 'b'; goto c_escape; case '\f': esc = 'f'; goto c_escape; case '\n': esc = 'n'; goto c_and_shell_escape; case '\r': esc = 'r'; goto c_and_shell_escape; case '\t': esc = 't'; goto c_and_shell_escape; - case '\v': esc = 'v'; goto c_escape; + + case '\v': + if (quoting_style != java_quoting_style) + { + esc = 'v'; + goto c_escape; + } + STORE ('\\'); + STORE ('1'); + c = '3'; + break; + case '\\': esc = c; /* No need to escape the escape if we are trying to elide outer quotes and nothing else is problematic. */ diff --git a/lib/quotearg.h b/lib/quotearg.h index 2756d76..f2a2064 100644 --- a/lib/quotearg.h +++ b/lib/quotearg.h @@ -161,6 +161,17 @@ enum quoting_style */ clocale_quoting_style, + /* Quote names as for a Java language string. + + quotearg_buffer: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + quotearg: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a:b\"" + quotearg_colon: + "\"simple\"", "\"\\0 \\t\\n'\\\"\\033??/\\\\\"", "\"a\\:b\"" + */ + java_quoting_style, + /* Like clocale_quoting_style except use the custom quotation marks set by set_custom_quoting. If custom quotation marks are not set, the behavior is undefined. diff --git a/tests/test-quotearg-simple.c b/tests/test-quotearg-simple.c index 4d9c87c..2524046 100644 --- a/tests/test-quotearg-simple.c +++ b/tests/test-quotearg-simple.c @@ -101,7 +101,15 @@ static struct result_groups results_g[] = { "\"" LQ_ENC RQ_ENC "\"", "\"" LQ RQ "\"" }, { "\"\"", "\"\\0001\\0\"", 9, "\"simple\"", "\" \\t\\n'\\\"\\033?""?/\\\\\"", "\"a\\:b\"", "\"a\\\\b\"", - "\"" LQ_ENC RQ_ENC "\"", "\"" LQ RQ "\"" } } + "\"" LQ_ENC RQ_ENC "\"", "\"" LQ RQ "\"" } }, + + /* java_quoting_style */ + { { "\"\"", "\"\\0001\\0\"", 9, "\"simple\"", "\" \\t\\n'\\\"\\033?""?/\\\\\"", "\"a:b\"", + "\"a\\\\b\"", "\"" LQ_ENC RQ_ENC "\"", "FIXME" }, + { "\"\"", "\"\\0001\\0\"", 9, "\"simple\"", "\" \\t\\n'\\\"\\033?""?/\\\\\"", "\"a:b\"", + "\"a\\\\b\"", "\"" LQ_ENC RQ_ENC "\"", "FIXME" }, + { "\"\"", "\"\\0001\\0\"", 9, "\"simple\"", "\" \\t\\n'\\\"\\033?""?/\\\\\"", "\"a\\:b\"", + "\"a\\\\b\"", "\"" LQ_ENC RQ_ENC "\"", "FIXME" } } }; static struct result_groups flag_results[] = { @@ -242,7 +250,7 @@ main (int argc _GL_UNUSED, char *argv[]) testing for quoting of str7, use the ascii_only flag to decide what to expect for the 8-bit data being quoted. */ ASSERT (!isprint ('\033')); - for (i = literal_quoting_style; i <= clocale_quoting_style; i++) + for (i = literal_quoting_style; i <= java_quoting_style; i++) { set_quoting_style (NULL, (enum quoting_style) i); compare_strings (use_quotearg_buffer, &results_g[i].group1, ascii_only);