As of r230285 (b279775faf3c56b554ecd38159b70ea7f2d37e0b; PR driver/67613) the driver provides suggestions for misspelled options.
This works well for some options e.g. $ gcc -static-libfortran test.f95 gcc: error: unrecognized command line option '-static-libfortran'; did you mean '-static-libgfortran'? but as reported in PR driver/69265 it can generate poor suggestions: $ c++ -sanitize=address foo.cc c++: error: unrecognized command line option ‘-sanitize=address’; did you mean ‘-Wframe-address’? The root cause is that the current implementation only considers cl_options[].opt_text, and has no knowledge of the arguments to -fsanitize (and hence doesn't consider the "address" text when computing edit distances). It also fails to consider the alternate ways of spelling options e.g. "-Wno-" vs "-W". The following patch addresses these issues by building a vec of candidates from cl_options[].opt_text, rather than just using the latter. Successfully bootstrapped®rtested on x86_64-pc-linux-gnu; adds 8 PASS results to gcc.sum. OK for trunk in stage 3? gcc/ChangeLog: PR driver/69265 * gcc.c (suggest_option): Move 2nd half of existing implementation into find_closest_string. Build the list of candidates using add_misspelling_candidates. Special-case OPT_fsanitize_ and OPT_fsanitize_recover_, making use of the sanitizer_args array. Clean up the list of candidates, returning a copy of the suggestion. (driver::handle_unrecognized_options): Free the result of suggest_option. * opts-common.c (add_misspelling_candidates): New function. * opts.c (common_handle_option): Rename local "spec" array and make it a global... (sanitizer_args): ...here. * opts.h (sanitizer_args): New array decl. (add_misspelling_candidates): New function decl. * spellcheck.c (find_closest_string): New function. * spellcheck.h (find_closest_string): New function decl. gcc/testsuite/ChangeLog: PR driver/69265 * gcc.dg/spellcheck-options-3.c: New test case. * gcc.dg/spellcheck-options-4.c: New test case. * gcc.dg/spellcheck-options-5.c: New test case. * gcc.dg/spellcheck-options-6.c: New test case. --- gcc/gcc.c | 85 +++++++++++++++++-------- gcc/opts-common.c | 41 ++++++++++++ gcc/opts.c | 97 ++++++++++++++--------------- gcc/opts.h | 11 ++++ gcc/spellcheck.c | 46 ++++++++++++++ gcc/spellcheck.h | 4 ++ gcc/testsuite/gcc.dg/spellcheck-options-3.c | 6 ++ gcc/testsuite/gcc.dg/spellcheck-options-4.c | 6 ++ gcc/testsuite/gcc.dg/spellcheck-options-5.c | 6 ++ gcc/testsuite/gcc.dg/spellcheck-options-6.c | 6 ++ 10 files changed, 232 insertions(+), 76 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/spellcheck-options-3.c create mode 100644 gcc/testsuite/gcc.dg/spellcheck-options-4.c create mode 100644 gcc/testsuite/gcc.dg/spellcheck-options-5.c create mode 100644 gcc/testsuite/gcc.dg/spellcheck-options-6.c diff --git a/gcc/gcc.c b/gcc/gcc.c index 319a073..8dcc356 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -7610,39 +7610,71 @@ driver::maybe_putenv_OFFLOAD_TARGETS () const Given an unrecognized option BAD_OPT (without the leading dash), locate the closest reasonable matching option (again, without the - leading dash), or NULL. */ + leading dash), or NULL. -static const char * + If non-NULL, the string is a copy, which must be freed by the caller. */ + +static char * suggest_option (const char *bad_opt) { - const cl_option *best_option = NULL; - edit_distance_t best_distance = MAX_EDIT_DISTANCE; + /* We build a vec of candidates, using add_misspelling_candidates + to add copies of strings, without a leading dash. */ + auto_vec <char *> candidates; for (unsigned int i = 0; i < cl_options_count; i++) { - edit_distance_t dist = levenshtein_distance (bad_opt, - cl_options[i].opt_text + 1); - if (dist < best_distance) + const char *opt_text = cl_options[i].opt_text; + switch (i) { - best_distance = dist; - best_option = &cl_options[i]; + default: + /* For most options, we simply consider the plain option text, + and its various variants. */ + add_misspelling_candidates (&candidates, opt_text); + break; + + case OPT_fsanitize_: + case OPT_fsanitize_recover_: + /* -fsanitize= and -fsanitize-recover= can take + a comma-separated list of arguments. Given that combinations + are supported, we can't add all potential candidates to the + vec, but if we at least add them individually without commas, + we should do a better job e.g. correcting + "-sanitize=address" + to + "-fsanitize=address" + rather than to "-Wframe-address" (PR driver/69265). */ + { + for (int j = 0; sanitizer_args[j].name != NULL; ++j) + { + /* Get one arg at a time e.g. "-fsanitize=address". */ + char *with_arg = concat (opt_text, + sanitizer_args[j].name, + NULL); + /* Add with_arg and all of its variant spellings e.g. + "-fno-sanitize=address" to candidates (albeit without + leading dashes). */ + add_misspelling_candidates (&candidates, with_arg); + free (with_arg); + } + } + break; } } - if (!best_option) - return NULL; + /* "candidates" is now populated. Use it. */ + const char *suggestion + = find_closest_string (bad_opt, + (auto_vec <const char *> *) (&candidates)); - /* If more than half of the letters were misspelled, the suggestion is - likely to be meaningless. */ - if (best_option) - { - unsigned int cutoff = MAX (strlen (bad_opt), - strlen (best_option->opt_text + 1)) / 2; - if (best_distance > cutoff) - return NULL; - } + /* Release the various copies in candidates, making a copy of the + result first (if non-NULL). */ + char *result = suggestion ? xstrdup (suggestion) : NULL; + int i; + char *str; + FOR_EACH_VEC_ELT (candidates, i, str) + free (str); - return best_option->opt_text + 1; + return result; } /* Reject switches that no pass was interested in. */ @@ -7653,11 +7685,14 @@ driver::handle_unrecognized_options () const for (size_t i = 0; (int) i < n_switches; i++) if (! switches[i].validated) { - const char *hint = suggest_option (switches[i].part1); + char *hint = suggest_option (switches[i].part1); if (hint) - error ("unrecognized command line option %<-%s%>;" - " did you mean %<-%s%>?", - switches[i].part1, hint); + { + error ("unrecognized command line option %<-%s%>;" + " did you mean %<-%s%>?", + switches[i].part1, hint); + free (hint); + } else error ("unrecognized command line option %<-%s%>", switches[i].part1); diff --git a/gcc/opts-common.c b/gcc/opts-common.c index 38c8058..a1fdfcc 100644 --- a/gcc/opts-common.c +++ b/gcc/opts-common.c @@ -365,6 +365,47 @@ static const struct option_map option_map[] = { "--no-", NULL, "-f", false, true } }; +/* Helper function for gcc.c's suggest_option, for populating the vec of + suggestions for misspelled options. + + option_map above provides various prefixes for spelling command-line + options, which decode_cmdline_option uses to map spellings of options + to specific options. We want to do the reverse: to find all the ways + that a user could validly spell an option. + + Given valid OPT_TEXT (with a leading dash), add it and all of its valid + variant spellings to CANDIDATES, each without a leading dash. + + For example, given "-Wabi-tag", the following are added to CANDIDATES: + "Wabi-tag" + "Wno-abi-tag" + "-warn-abi-tag" + "-warn-no-abi-tag". + + The added strings must be freed using free. */ + +void +add_misspelling_candidates (auto_vec<char *> *candidates, + const char *opt_text) +{ + gcc_assert (candidates); + gcc_assert (opt_text); + candidates->safe_push (xstrdup (opt_text + 1)); + for (unsigned i = 0; i < ARRAY_SIZE (option_map); i++) + { + const char *opt0 = option_map[i].opt0; + const char *new_prefix = option_map[i].new_prefix; + size_t new_prefix_len = strlen (new_prefix); + + if (strncmp (opt_text, new_prefix, new_prefix_len) == 0) + { + char *alternative = concat (opt0 + 1, opt_text + new_prefix_len, + NULL); + candidates->safe_push (alternative); + } + } +} + /* Decode the switch beginning at ARGV for the language indicated by LANG_MASK (including CL_COMMON and CL_TARGET if applicable), into the structure *DECODED. Returns the number of switches diff --git a/gcc/opts.c b/gcc/opts.c index 2add158..212a5d0 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1437,6 +1437,46 @@ enable_fdo_optimizations (struct gcc_options *opts, opts->x_flag_tree_loop_distribute_patterns = value; } +const struct sanitizer_arg sanitizer_args[] = { + { "address", SANITIZE_ADDRESS | SANITIZE_USER_ADDRESS, + sizeof "address" - 1 }, + { "kernel-address", SANITIZE_ADDRESS | SANITIZE_KERNEL_ADDRESS, + sizeof "kernel-address" - 1 }, + { "thread", SANITIZE_THREAD, sizeof "thread" - 1 }, + { "leak", SANITIZE_LEAK, sizeof "leak" - 1 }, + { "shift", SANITIZE_SHIFT, sizeof "shift" - 1 }, + { "integer-divide-by-zero", SANITIZE_DIVIDE, + sizeof "integer-divide-by-zero" - 1 }, + { "undefined", SANITIZE_UNDEFINED, sizeof "undefined" - 1 }, + { "unreachable", SANITIZE_UNREACHABLE, + sizeof "unreachable" - 1 }, + { "vla-bound", SANITIZE_VLA, sizeof "vla-bound" - 1 }, + { "return", SANITIZE_RETURN, sizeof "return" - 1 }, + { "null", SANITIZE_NULL, sizeof "null" - 1 }, + { "signed-integer-overflow", SANITIZE_SI_OVERFLOW, + sizeof "signed-integer-overflow" -1 }, + { "bool", SANITIZE_BOOL, sizeof "bool" - 1 }, + { "enum", SANITIZE_ENUM, sizeof "enum" - 1 }, + { "float-divide-by-zero", SANITIZE_FLOAT_DIVIDE, + sizeof "float-divide-by-zero" - 1 }, + { "float-cast-overflow", SANITIZE_FLOAT_CAST, + sizeof "float-cast-overflow" - 1 }, + { "bounds", SANITIZE_BOUNDS, sizeof "bounds" - 1 }, + { "bounds-strict", SANITIZE_BOUNDS | SANITIZE_BOUNDS_STRICT, + sizeof "bounds-strict" - 1 }, + { "alignment", SANITIZE_ALIGNMENT, sizeof "alignment" - 1 }, + { "nonnull-attribute", SANITIZE_NONNULL_ATTRIBUTE, + sizeof "nonnull-attribute" - 1 }, + { "returns-nonnull-attribute", + SANITIZE_RETURNS_NONNULL_ATTRIBUTE, + sizeof "returns-nonnull-attribute" - 1 }, + { "object-size", SANITIZE_OBJECT_SIZE, + sizeof "object-size" - 1 }, + { "vptr", SANITIZE_VPTR, sizeof "vptr" - 1 }, + { "all", ~0, sizeof "all" - 1 }, + { NULL, 0, 0 } +}; + /* Handle target- and language-independent options. Return zero to generate an "unknown option" message. Only options that need extra handling need to be listed here; if you simply want @@ -1638,51 +1678,6 @@ common_handle_option (struct gcc_options *opts, : &opts->x_flag_sanitize_recover; while (*p != 0) { - static const struct - { - const char *const name; - unsigned int flag; - size_t len; - } spec[] = - { - { "address", SANITIZE_ADDRESS | SANITIZE_USER_ADDRESS, - sizeof "address" - 1 }, - { "kernel-address", SANITIZE_ADDRESS | SANITIZE_KERNEL_ADDRESS, - sizeof "kernel-address" - 1 }, - { "thread", SANITIZE_THREAD, sizeof "thread" - 1 }, - { "leak", SANITIZE_LEAK, sizeof "leak" - 1 }, - { "shift", SANITIZE_SHIFT, sizeof "shift" - 1 }, - { "integer-divide-by-zero", SANITIZE_DIVIDE, - sizeof "integer-divide-by-zero" - 1 }, - { "undefined", SANITIZE_UNDEFINED, sizeof "undefined" - 1 }, - { "unreachable", SANITIZE_UNREACHABLE, - sizeof "unreachable" - 1 }, - { "vla-bound", SANITIZE_VLA, sizeof "vla-bound" - 1 }, - { "return", SANITIZE_RETURN, sizeof "return" - 1 }, - { "null", SANITIZE_NULL, sizeof "null" - 1 }, - { "signed-integer-overflow", SANITIZE_SI_OVERFLOW, - sizeof "signed-integer-overflow" -1 }, - { "bool", SANITIZE_BOOL, sizeof "bool" - 1 }, - { "enum", SANITIZE_ENUM, sizeof "enum" - 1 }, - { "float-divide-by-zero", SANITIZE_FLOAT_DIVIDE, - sizeof "float-divide-by-zero" - 1 }, - { "float-cast-overflow", SANITIZE_FLOAT_CAST, - sizeof "float-cast-overflow" - 1 }, - { "bounds", SANITIZE_BOUNDS, sizeof "bounds" - 1 }, - { "bounds-strict", SANITIZE_BOUNDS | SANITIZE_BOUNDS_STRICT, - sizeof "bounds-strict" - 1 }, - { "alignment", SANITIZE_ALIGNMENT, sizeof "alignment" - 1 }, - { "nonnull-attribute", SANITIZE_NONNULL_ATTRIBUTE, - sizeof "nonnull-attribute" - 1 }, - { "returns-nonnull-attribute", - SANITIZE_RETURNS_NONNULL_ATTRIBUTE, - sizeof "returns-nonnull-attribute" - 1 }, - { "object-size", SANITIZE_OBJECT_SIZE, - sizeof "object-size" - 1 }, - { "vptr", SANITIZE_VPTR, sizeof "vptr" - 1 }, - { "all", ~0, sizeof "all" - 1 }, - { NULL, 0, 0 } - }; const char *comma; size_t len, i; bool found = false; @@ -1699,12 +1694,12 @@ common_handle_option (struct gcc_options *opts, } /* Check to see if the string matches an option class name. */ - for (i = 0; spec[i].name != NULL; ++i) - if (len == spec[i].len - && memcmp (p, spec[i].name, len) == 0) + for (i = 0; sanitizer_args[i].name != NULL; ++i) + if (len == sanitizer_args[i].len + && memcmp (p, sanitizer_args[i].name, len) == 0) { /* Handle both -fsanitize and -fno-sanitize cases. */ - if (value && spec[i].flag == ~0U) + if (value && sanitizer_args[i].flag == ~0U) { if (code == OPT_fsanitize_) error_at (loc, "-fsanitize=all option is not valid"); @@ -1713,9 +1708,9 @@ common_handle_option (struct gcc_options *opts, | SANITIZE_LEAK); } else if (value) - *flag |= spec[i].flag; + *flag |= sanitizer_args[i].flag; else - *flag &= ~spec[i].flag; + *flag &= ~sanitizer_args[i].flag; found = true; break; } diff --git a/gcc/opts.h b/gcc/opts.h index 7e48dbe..10fd9b2 100644 --- a/gcc/opts.h +++ b/gcc/opts.h @@ -402,4 +402,15 @@ extern void set_struct_debug_option (struct gcc_options *opts, const char *value); extern bool opt_enum_arg_to_value (size_t opt_index, const char *arg, int *value, unsigned int lang_mask); + +extern const struct sanitizer_arg +{ + const char *const name; + unsigned int flag; + size_t len; +} sanitizer_args[]; + +extern void add_misspelling_candidates (auto_vec<char *> *candidates, + const char *base_option); + #endif diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c index e641a56..be540c0 100644 --- a/gcc/spellcheck.c +++ b/gcc/spellcheck.c @@ -119,3 +119,49 @@ levenshtein_distance (const char *s, const char *t) { return levenshtein_distance (s, strlen (s), t, strlen (t)); } + +/* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to + an autovec of non-NULL strings, determine which element within + CANDIDATES has the lowest edit distance to TARGET. If there are + multiple elements with the same minimal distance, the first in the + vector wins. + + If more than half of the letters were misspelled, the suggestion is + likely to be meaningless, so return NULL for this case. */ + +const char * +find_closest_string (const char *target, + const auto_vec<const char *> *candidates) +{ + gcc_assert (target); + gcc_assert (candidates); + + int i; + const char *candidate; + const char *best_candidate = NULL; + edit_distance_t best_distance = MAX_EDIT_DISTANCE; + size_t len_target = strlen (target); + FOR_EACH_VEC_ELT (*candidates, i, candidate) + { + gcc_assert (candidate); + edit_distance_t dist + = levenshtein_distance (target, len_target, + candidate, strlen (candidate)); + if (dist < best_distance) + { + best_distance = dist; + best_candidate = candidate; + } + } + + /* If more than half of the letters were misspelled, the suggestion is + likely to be meaningless. */ + if (best_candidate) + { + unsigned int cutoff = MAX (len_target, strlen (best_candidate)) / 2; + if (best_distance > cutoff) + return NULL; + } + + return best_candidate; +} diff --git a/gcc/spellcheck.h b/gcc/spellcheck.h index 4c662a7..040c33e 100644 --- a/gcc/spellcheck.h +++ b/gcc/spellcheck.h @@ -31,6 +31,10 @@ levenshtein_distance (const char *s, int len_s, extern edit_distance_t levenshtein_distance (const char *s, const char *t); +extern const char * +find_closest_string (const char *target, + const auto_vec<const char *> *candidates); + /* spellcheck-tree.c */ extern edit_distance_t diff --git a/gcc/testsuite/gcc.dg/spellcheck-options-3.c b/gcc/testsuite/gcc.dg/spellcheck-options-3.c new file mode 100644 index 0000000..4133df9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/spellcheck-options-3.c @@ -0,0 +1,6 @@ +/* Verify that we provide simple suggestions for the arguments of + "-fsanitize=" when it is misspelled (PR driver/69265). */ + +/* { dg-do compile } */ +/* { dg-options "-sanitize=address" } */ +/* { dg-error "unrecognized command line option '-sanitize=address'; did you mean '-fsanitize=address'?" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.dg/spellcheck-options-4.c b/gcc/testsuite/gcc.dg/spellcheck-options-4.c new file mode 100644 index 0000000..252376f --- /dev/null +++ b/gcc/testsuite/gcc.dg/spellcheck-options-4.c @@ -0,0 +1,6 @@ +/* Verify that we provide simple suggestions for the arguments of + "-fsanitize-recover=" when it is misspelled (PR driver/69265). */ + +/* { dg-do compile } */ +/* { dg-options "-sanitize-recover=integer-divide-by-0" } */ +/* { dg-error "unrecognized command line option '-sanitize-recover=integer-divide-by-0'; did you mean '-fsanitize-recover=integer-divide-by-zero'?" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.dg/spellcheck-options-5.c b/gcc/testsuite/gcc.dg/spellcheck-options-5.c new file mode 100644 index 0000000..9a02bb7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/spellcheck-options-5.c @@ -0,0 +1,6 @@ +/* Verify that we provide suggestions (with arguments) for the "-fno-" + variant of "-fsanitize=" when it is misspelled (PR driver/69265). */ + +/* { dg-do compile } */ +/* { dg-options "-no-sanitize=all" } */ +/* { dg-error "unrecognized command line option '-no-sanitize=all'; did you mean '-fno-sanitize=all'?" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.dg/spellcheck-options-6.c b/gcc/testsuite/gcc.dg/spellcheck-options-6.c new file mode 100644 index 0000000..4d6bf0d --- /dev/null +++ b/gcc/testsuite/gcc.dg/spellcheck-options-6.c @@ -0,0 +1,6 @@ +/* Verify that we can generate a suggestion of "--warn-no-abi-tag" + from c.opt's "Wabi-tag" (PR driver/69265). */ + +/* { dg-do compile } */ +/* { dg-options "-fwarn-no-abi-tag" } */ +/* { dg-error "unrecognized command line option '-fwarn-no-abi-tag'; did you mean '--warn-no-abi-tag'?" "" { target *-*-* } 0 } */ -- 1.8.5.3