On Thu, 2016-07-14 at 16:50 +0200, Jakub Jelinek wrote:
> Hi!
> 
> As mentioned in the PR, anticipated decls should be ignored from
> fuzzy
> lookups, unless the corresponding decl is declared first.

I wrote a patch for this, similar to yours, but with a slightly
different approach for handling builtins.

I think it's reasonable to offer suggestions for misspellings of e.g.
"snprintf" even if the pertinent header hasn't been included, since it
will help guide a novice developer to the missing header.  For example,
if the user has:

void
test_7 (int i, int j)
{
  int buffer[100];
  snprint (buffer, 100, "%i of %i", i, j);
}

...then they get:

test.c: In function ‘test_7’:
test.c:5:3: warning: implicit declaration of function ‘snprint’; did
you mean ‘snprintf’? [-Wimplicit-function-declaration]
   snprint (buffer, 100, "%i of %i", i, j);
   ^~~~~~~
   snprintf

...and on correcting it, they get the "you forgot <stdio.h>" warning:

test.c: In function ‘test_7’:
test.c:5:3: warning: implicit declaration of function ‘snprintf’ [
-Wimplicit-function-declaration]
   snprintf (buffer, 100, "%i of %i", i, j);
   ^~~~~~~~
test.c:5:3: warning: incompatible implicit declaration of built-in
function ‘snprintf’
test.c:5:3: note: include ‘<stdio.h>’ or provide a declaration of
‘snprintf’

(and more errors, it's missing a &size argument).


What I find surprising is the suggestion of "carg"; I've seen this a
few times as a suggestion.   I confess to having to look up carg; IMHO
"carg" is much less likely to be a useful suggestion than, say,
"printf".

Attached is the patch I wrote (not tested yet, and no ChangeLog,
sorry), which keeps the anticipated decls, but uses a crude heuristic
to reject short ones.

As noted in the patch, a more sophisticated approach might be to
analyze a large corpus of real-world code and see how often each
builtin is used, and filter out the less-frequently-used ones.

But I felt a simple "reject any builtin name with length <= 4" was good
enough as a first iteration.

Thoughts?

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2016-07-13  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR c/71858
>       * c-decl.c (lookup_name_fuzzy): Ignore binding->invisible.
> 
>       * gcc.dg/spellcheck-identifiers.c (snprintf): Declare.
>       * gcc.dg/spellcheck-identifiers-2.c: New test.
>       * gcc.dg/diagnostic-token-ranges.c (nanl): Declare.
>       * c-c++-common/attributes-1.c: Adjust dg-prune-output.
> 
> --- gcc/c/c-decl.c.jj 2016-06-24 12:59:22.000000000 +0200
> +++ gcc/c/c-decl.c    2016-07-13 22:40:23.410658411 +0200
> @@ -4021,7 +4021,7 @@ lookup_name_fuzzy (tree name, enum looku
>    for (c_scope *scope = current_scope; scope; scope = scope->outer)
>      for (c_binding *binding = scope->bindings; binding; binding =
> binding->prev)
>        {
> -     if (!binding->id)
> +     if (!binding->id || binding->invisible)
>         continue;
>       /* Don't use bindings from implicitly declared functions,
>          as they were likely misspellings themselves.  */
> --- gcc/testsuite/gcc.dg/spellcheck-identifiers.c.jj  2016-06
> -24 12:59:12.000000000 +0200
> +++ gcc/testsuite/gcc.dg/spellcheck-identifiers.c     2016-07-14
> 10:03:36.147466813 +0200
> @@ -121,7 +121,7 @@ test_6 (enum foo f)
>      }
>  }
>  
> -/* Verify that we offer names of builtins as suggestions.  */
> +int snprintf (char *, __SIZE_TYPE__, const char *, ...);
>  void
>  test_7 (int i, int j)
> --- gcc/testsuite/gcc.dg/spellcheck-identifiers-2.c.jj        2016-07
> -14 09:44:16.351537449 +0200
> +++ gcc/testsuite/gcc.dg/spellcheck-identifiers-2.c   2016-07-14
> 10:02:21.965426567 +0200
> @@ -0,0 +1,33 @@
> +/* PR c/71858 */
> +/* Make sure anticipated builtins are not considered before they are
> declared.  */
> +/* { dg-do compile } */
> +/* { dg-options "-Wimplicit-function-declaration -fdiagnostics-show
> -caret" } */
> +
> +int sscafn (const char *, const char *, ...);
> +
> +int
> +test_1 (const char *p)
> +{
> +  int i;
> +  return ssacnf (p, "%d", &i); /* { dg-warning "10: implicit
> declaration of function .ssacnf.; did you mean .sscafn.?" } */
> +  /* { dg-begin-multiline-output "" }
> +   return ssacnf (p, "%d", &i);
> +          ^~~~~~
> +          sscafn
> +   { dg-end-multiline-output "" } */
> +}
> +
> +int scafn (const char *, ...);
> +int scanf (const char *, ...);
> +
> +int
> +test_2 (void)
> +{
> +  int i;
> +  return sacnf ("%d", &i); /* { dg-warning "10: implicit declaration
> of function .sacnf.; did you mean .scanf.?" } */
> +  /* { dg-begin-multiline-output "" }
> +   return sacnf ("%d", &i);
> +          ^~~~~
> +          scanf
> +   { dg-end-multiline-output "" } */
> +}
> --- gcc/testsuite/gcc.dg/diagnostic-token-ranges.c.jj 2016-06
> -24 12:59:12.000000000 +0200
> +++ gcc/testsuite/gcc.dg/diagnostic-token-ranges.c    2016-07-14
> 11:06:23.013803011 +0200
> @@ -2,6 +2,8 @@
>  
>  /* Verify that various diagnostics show source code ranges.  */
>  
> +long double nanl (const char *);
> +
>  /* These ones merely use token ranges; they don't use tree ranges. 
>  */
>  
>  void undeclared_identifier (void)
> --- gcc/testsuite/c-c++-common/attributes-1.c.jj      2016-06-23
> 14:31:57.000000000 +0200
> +++ gcc/testsuite/c-c++-common/attributes-1.c 2016-07-14
> 14:51:34.871006659 +0200
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-prune-output "undeclared here \\(not in a function\\); did
> you mean .carg..|\[^\n\r\]* was not declared in this scope" } */
> +/* { dg-prune-output "undeclared here \\(not in a function\\); did
> you mean .char..|\[^\n\r\]* was not declared in this scope" } */
>  
>  void* my_calloc(unsigned, unsigned)
> __attribute__((alloc_size(1,bar))); /* { dg-warning "outside range" }
> */
>  void* my_realloc(void*, unsigned) __attribute__((alloc_size(bar)));
> /* { dg-warning "outside range" } */
> 
>       Jakub
From 7f3cd32fd4338db7b8b8f09b16429975c5907cb6 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalc...@redhat.com>
Date: Wed, 13 Jul 2016 13:40:22 -0400
Subject: [PATCH] FIXME: PR c/71858 - Don't always offer names of builtins as
 misspellings

---
 gcc/c-family/c-common.h                       |  3 +++
 gcc/c/c-decl.c                                | 25 ++++++++++++++++++++++++-
 gcc/testsuite/gcc.dg/spellcheck-identifiers.c | 15 +++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 44d98d1..32e68d0 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -994,6 +994,9 @@ enum lookup_name_fuzzy_kind {
   /* Names of types.  */
   FUZZY_LOOKUP_TYPENAME,
 
+  /* Names of functions.  */
+  FUZZY_LOOKUP_FUNCTION_NAME,
+
   /* Any name.  */
   FUZZY_LOOKUP_NAME
 };
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index dc14485..de03ad4 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -3090,7 +3090,7 @@ implicit_decl_warning (location_t loc, tree id, tree olddecl)
       bool warned;
       const char *hint = NULL;
       if (!olddecl)
-	hint = lookup_name_fuzzy (id, FUZZY_LOOKUP_NAME);
+	hint = lookup_name_fuzzy (id, FUZZY_LOOKUP_FUNCTION_NAME);
 
       if (flag_isoc99)
 	if (hint)
@@ -3984,6 +3984,9 @@ lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind)
       {
 	if (!binding->id)
 	  continue;
+
+	gcc_assert (TREE_CODE (binding->id) == IDENTIFIER_NODE);
+
 	/* Don't use bindings from implicitly declared functions,
 	   as they were likely misspellings themselves.  */
 	if (TREE_CODE (binding->decl) == FUNCTION_DECL)
@@ -3992,6 +3995,26 @@ lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind)
 	if (kind == FUZZY_LOOKUP_TYPENAME)
 	  if (TREE_CODE (binding->decl) != TYPE_DECL)
 	    continue;
+	if (kind == FUZZY_LOOKUP_FUNCTION_NAME)
+	  if (TREE_CODE (binding->decl) != FUNCTION_DECL)
+	    continue;
+
+	/* There are numerous builtin functions with short names, such as
+	   "carg" which are prone to being suggested, and are generally
+	   unhelpful as suggestions. (PR c/71858)
+	   Avoid suggesting them by imposing a minimum length on builtin
+	   functions as suggestions.
+	   A minimum length of 5 is rather arbitrary, but it allows for
+	   "fopen", but prevents "carg".  A more sophisticated approach
+	   might be to analyze how often each builtin is used in real
+	   code, and filter out the less-frequently-used ones.  */
+	const int builtin_length_threshold = 5;
+	if (TREE_CODE (binding->decl) == FUNCTION_DECL)
+	  if (DECL_IS_BUILTIN (binding->decl))
+	    if (IDENTIFIER_LENGTH (binding->id) < builtin_length_threshold)
+	      continue;
+
+	/* Passed all tests; consider this as a suggestion.  */
 	bm.consider (binding->id);
       }
 
diff --git a/gcc/testsuite/gcc.dg/spellcheck-identifiers.c b/gcc/testsuite/gcc.dg/spellcheck-identifiers.c
index 22a12d0..2111499 100644
--- a/gcc/testsuite/gcc.dg/spellcheck-identifiers.c
+++ b/gcc/testsuite/gcc.dg/spellcheck-identifiers.c
@@ -134,3 +134,18 @@ test_7 (int i, int j)
    snprintf
    { dg-end-multiline-output "" } */
 }
+
+/* PR c/71858 - Don't always offer names of builtins as misspellings;
+   in particular, don't offer "carg" as a suggestion for "bar".  */
+
+int
+test_8 (void)
+{
+  return bar ();
+  /* { dg-bogus "did you mean" "" { target *-*-* } 144 } */
+  /* { dg-warning "implicit declaration" "" { target *-*-* } 144 } */
+  /* { dg-begin-multiline-output "" }
+   return bar ();
+          ^~~
+   { dg-end-multiline-output "" } */
+}
-- 
1.8.5.3

Reply via email to