[PATCH 2/6] detect unterminated const arrays in strlen calls (PR 86552)

The attached changes implement the detection of past-the-end reads
by strlen due to unterminated arguments.
PR tree-optimization/86552 - missing warning for reading past the end

gcc/ChangeLog:

	* builtins.c (warn_string_no_nul): New function.
	(expand_builtin_strlen): Warn for unterminated arrays.
	(fold_builtin_strlen): Add argument.  Warn for unterminated arrays.
	(fold_builtin_1): Adjust call to fold_builtin_strlen.
	* builtins.h (warn_string_no_nul): New function.

gcc/testsuite/ChangeLog:

	* gcc.dg/warn-strlen-no-nul.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index a7aa4b2..78ced93 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -150,7 +150,7 @@ static tree stabilize_va_list_loc (location_t, tree, int);
 static rtx expand_builtin_expect (tree, rtx);
 static tree fold_builtin_constant_p (tree);
 static tree fold_builtin_classify_type (tree);
-static tree fold_builtin_strlen (location_t, tree, tree);
+static tree fold_builtin_strlen (location_t, tree, tree, tree);
 static tree fold_builtin_inf (location_t, tree, int);
 static tree rewrite_call_expr (location_t, tree, int, tree, int, ...);
 static bool validate_arg (const_tree, enum tree_code code);
@@ -550,6 +550,36 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
   return n;
 }
 
+/* For a call expression EXP to a function that expects a string argument,
+   issue a diagnostic due to it being a called with an argument NONSTR
+   that is a character array with no terminating NUL.  */
+
+void
+warn_string_no_nul (location_t loc, tree exp, tree fndecl, tree nonstr)
+{
+  loc = expansion_point_location_if_in_system_header (loc);
+
+  bool warned;
+  if (exp)
+    {
+      if (!fndecl)
+	fndecl = get_callee_fndecl (exp);
+      warned = warning_at (loc, OPT_Wstringop_overflow_,
+			   "%K%qD argument missing terminating nul",
+			   exp, fndecl);
+    }
+  else
+    {
+      gcc_assert (fndecl);
+      warned = warning_at (loc, OPT_Wstringop_overflow_,
+			   "%qD argument missing terminating nul",
+			   fndecl);
+    }
+
+  if (warned && DECL_P (nonstr))
+    inform (DECL_SOURCE_LOCATION (nonstr), "referenced argument declared here");
+}
+
 /* Compute the length of a null-terminated character string or wide
    character string handling character sizes of 1, 2, and 4 bytes.
    TREE_STRING_LENGTH is not the right way because it evaluates to
@@ -2874,7 +2904,6 @@ expand_builtin_strlen (tree exp, rtx target,
 
   struct expand_operand ops[4];
   rtx pat;
-  tree len;
   tree src = CALL_EXPR_ARG (exp, 0);
   rtx src_reg;
   rtx_insn *before_strlen;
@@ -2883,20 +2912,39 @@ expand_builtin_strlen (tree exp, rtx target,
   unsigned int align;
 
   /* If the length can be computed at compile-time, return it.  */
-  len = c_strlen (src, 0);
+  tree array;
+  tree len = c_strlen (src, 0, &array);
   if (len)
-    return expand_expr (len, target, target_mode, EXPAND_NORMAL);
+    {
+      if (array)
+	{
+	  /* Array refers to the non-nul terminated constant array
+	     whose length is attempted to be computed.  */
+	  warn_string_no_nul (EXPR_LOCATION (exp), exp, NULL_TREE, array);
+	  return NULL_RTX;
+	}
+      return expand_expr (len, target, target_mode, EXPAND_NORMAL);
+    }
 
   /* If the length can be computed at compile-time and is constant
      integer, but there are side-effects in src, evaluate
      src for side-effects, then return len.
      E.g. x = strlen (i++ ? "xfoo" + 1 : "bar");
      can be optimized into: i++; x = 3;  */
-  len = c_strlen (src, 1);
-  if (len && TREE_CODE (len) == INTEGER_CST)
+  len = c_strlen (src, 1, &array);
+  if (len)
     {
-      expand_expr (src, const0_rtx, VOIDmode, EXPAND_NORMAL);
-      return expand_expr (len, target, target_mode, EXPAND_NORMAL);
+      if (array)
+	{
+	  warn_string_no_nul (EXPR_LOCATION (exp), exp, NULL_TREE, array);
+	  return NULL_RTX;
+	}
+
+      if (TREE_CODE (len) == INTEGER_CST)
+	{
+	  expand_expr (src, const0_rtx, VOIDmode, EXPAND_NORMAL);
+	  return expand_expr (len, target, target_mode, EXPAND_NORMAL);
+	}
     }
 
   align = get_pointer_alignment (src) / BITS_PER_UNIT;
@@ -8339,19 +8387,30 @@ fold_builtin_classify_type (tree arg)
   return build_int_cst (integer_type_node, type_to_class (TREE_TYPE (arg)));
 }
 
-/* Fold a call to __builtin_strlen with argument ARG.  */
+/* Fold a strlen call to FNDECL of TYPE, and with argument ARG.  */
 
 static tree
-fold_builtin_strlen (location_t loc, tree type, tree arg)
+fold_builtin_strlen (location_t loc, tree fndecl, tree type, tree arg)
 {
   if (!validate_arg (arg, POINTER_TYPE))
     return NULL_TREE;
   else
     {
-      tree len = c_strlen (arg, 0);
-
+      tree nonstr = NULL_TREE;
+      tree len = c_strlen (arg, 0, &nonstr);
       if (len)
-	return fold_convert_loc (loc, type, len);
+	{
+	  if (loc == UNKNOWN_LOCATION && EXPR_HAS_LOCATION (arg))
+	    loc = EXPR_LOCATION (arg);
+
+	  /* To avoid warning multiple times about unterminated
+	     arrays only warn if its length has been determined
+	     and is being folded to a constant.  */
+	  if (nonstr)
+	    warn_string_no_nul (loc, NULL_TREE, fndecl, nonstr);
+
+	  return fold_convert_loc (loc, type, len);
+	}
 
       return NULL_TREE;
     }
@@ -9259,7 +9318,7 @@ fold_builtin_1 (location_t loc, tree fndecl, tree arg0)
       return fold_builtin_classify_type (arg0);
 
     case BUILT_IN_STRLEN:
-      return fold_builtin_strlen (loc, type, arg0);
+      return fold_builtin_strlen (loc, fndecl, type, arg0);
 
     CASE_FLT_FN (BUILT_IN_FABS):
     CASE_FLT_FN_FLOATN_NX (BUILT_IN_FABS):
diff --git a/gcc/builtins.h b/gcc/builtins.h
index 27e6959..73b0b0b 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -103,7 +103,7 @@ extern bool target_char_cst_p (tree t, char *p);
 
 extern internal_fn associated_internal_fn (tree);
 extern internal_fn replacement_internal_fn (gcall *);
-
+extern void warn_string_no_nul (location_t, tree, tree, tree);
 extern tree max_object_size ();
 
 #endif /* GCC_BUILTINS_H */
diff --git a/gcc/testsuite/gcc.dg/warn-strlen-no-nul.c b/gcc/testsuite/gcc.dg/warn-strlen-no-nul.c
new file mode 100644
index 0000000..c2b0438
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/warn-strlen-no-nul.c
@@ -0,0 +1,304 @@
+/* PR tree-optimization/86552 - missing warning for reading past the end
+   of non-string arrays
+   { dg-do compile }
+   { dg-options "-O2 -Wall -ftrack-macro-expansion=0" } */
+
+extern __SIZE_TYPE__ strlen (const char*);
+
+const char a[5] = "12345";   /* { dg-message "declared here" } */
+
+int v0 = 0;
+int v1 = 1;
+
+void sink (int, ...);
+
+#define CONCAT(a, b)   a ## b
+#define CAT(a, b)      CONCAT(a, b)
+
+#define T(str)						\
+  __attribute__ ((noipa))				\
+  void CAT (test_, __LINE__) (void) {			\
+    int i0 = 0, i1 = i0 + 1, i2 = i1 + 1, i3 = i2 + 1;	\
+    sink (strlen (str), i0, i1, i2, i3);		\
+  } typedef void dummy_type
+
+T (a);                /* { dg-warning "argument missing terminating nul" }  */
+T (&a[0]);            /* { dg-warning "nul" }  */
+T (&a[0] + 1);        /* { dg-warning "nul" }  */
+T (&a[1]);            /* { dg-warning "nul" }  */
+T (&a[v0]);           /* { dg-warning "nul" }  */
+T (&a[v0] + 1);       /* { dg-warning "nul" }  */
+
+
+const char b[][5] = { /* { dg-message "declared here" } */
+  "12", "123", "1234", "54321"
+};
+
+T (b[0]);
+T (b[1]);
+T (b[2]);
+T (b[3]);             /* { dg-warning "nul" }  */
+
+T (b[i0]);
+T (b[i1]);
+T (b[i2]);
+T (b[i3]);            /* { dg-warning "nul" }  */
+
+T (b[v0]);
+
+T (&b[i2][i1]);
+T (&b[i2][i1] + i1);
+T (&b[i2][v0]);
+T (&b[i2][i1] + v0);
+
+T (&b[2][1]);
+T (&b[2][1] + i1);
+T (&b[2][i0]);
+T (&b[2][1] + i0);
+
+T (&b[2][1]);
+T (&b[2][1] + v0);
+T (&b[2][v0]);
+
+T (&b[3][1]);           /* { dg-warning "nul" }  */
+T (&b[3][1] + 1);       /* { dg-warning "nul" }  */
+T (&b[3][1] + i1);      /* { dg-warning "nul" }  */
+T (&b[3][v0]);          /* { dg-warning "nul" }  */
+T (&b[3][1] + v0);      /* { dg-warning "nul" }  */
+T (&b[3][v0] + v1);     /* { dg-warning "nul" }  */
+
+T (&b[i3][i1]);         /* { dg-warning "nul" }  */
+T (&b[i3][i1] + 1);     /* { dg-warning "nul" }  */
+T (&b[i3][i1] + i1);    /* { dg-warning "nul" }  */
+T (&b[i3][v0]);         /* { dg-warning "nul" "pr86919" { xfail *-*-* } }  */
+T (&b[i3][i1] + v0);    /* { dg-warning "nul" "pr86919" { xfail *-*-* } }  */
+T (&b[i3][v0] + v1);    /* { dg-warning "nul" "pr86919" { xfail *-*-* } }  */
+
+T (v0 ? "" : b[0]);
+T (v0 ? "" : b[1]);
+T (v0 ? "" : b[2]);
+T (v0 ? "" : b[3]);               /* { dg-warning "nul" }  */
+T (v0 ? b[0] : "");
+T (v0 ? b[1] : "");
+T (v0 ? b[2] : "");
+T (v0 ? b[3] : "");               /* { dg-warning "nul" }  */
+
+T (v0 ? "" : b[i0]);
+T (v0 ? "" : b[i1]);
+T (v0 ? "" : b[i2]);
+/* The following is diagnosed but the warning location is wrong
+   (the PRE pass loses it).  */
+T (v0 ? "" : b[i3]);              /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+T (v0 ? b[i0] : "");
+T (v0 ? b[i1] : "");
+T (v0 ? b[i2] : "");
+T (v0 ? b[i3] : "");              /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+
+T (v0 ? "1234" : b[3]);           /* { dg-warning "nul" }  */
+T (v0 ? "1234" : b[i3]);          /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+T (v0 ? b[3] : "1234");           /* { dg-warning "nul" }  */
+T (v0 ? b[i3] : "1234");          /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+
+T (v0 ? a : b[3]);                /* { dg-warning "nul" }  */
+T (v0 ? b[0] : b[2]);
+T (v0 ? b[2] : b[3]);             /* { dg-warning "nul" }  */
+T (v0 ? b[3] : b[2]);             /* { dg-warning "nul" }  */
+
+T (v0 ? a : b[i3]);               /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+T (v0 ? b[i0] : b[i2]);
+T (v0 ? b[i2] : b[i3]);           /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+T (v0 ? b[i3] : b[i2]);           /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+
+T (v0 ? b[0] : &b[3][0] + 1);     /* { dg-warning "nul" }  */
+T (v0 ? b[0] : &b[3][0] + i1);    /* { dg-warning "nul" }  */
+T (v0 ? b[1] : &b[3][1] + v0);    /* { dg-warning "nul" }  */
+
+T (v0 ? b[i0] : &b[i3][i0] + i1);    /* { dg-warning "nul" }  */
+T (v0 ? b[i0] : &b[i3][i0] + i1);    /* { dg-warning "nul" }  */
+T (v0 ? b[i1] : &b[i3][i1] + v0);    /* { dg-warning "nul" "pr?????" { xfail *-*-* } }  */
+
+/* It's possible to detect the missing nul in the following two
+   expressions but GCC doesn't do it yet.  */
+T (v0 ? &b[3][1] + v0 : b[2]);    /* { dg-warning "nul" "bug" { xfail *-*-* } }  */
+T (v0 ? &b[3][v0] : &b[3][v1]);   /* { dg-warning "nul" "bug" { xfail *-*-* } }  */
+
+
+struct A { char a[5], b[5]; };
+
+const struct A s = { "1234", "12345" };
+
+T (s.a);
+T (&s.a[0]);
+T (&s.a[0] + 1);
+T (&s.a[0] + v0);
+T (&s.a[1]);
+T (&s.a[1] + 1);
+T (&s.a[1] + v0);
+
+T (&s.a[i0]);
+T (&s.a[i0] + i1);
+T (&s.a[i0] + v0);
+T (&s.a[i1]);
+T (&s.a[i1] + i1);
+T (&s.a[i1] + v0);
+
+T (s.b);              /* { dg-warning "nul" }  */
+T (&s.b[0]);          /* { dg-warning "nul" }  */
+T (&s.b[0] + 1);      /* { dg-warning "nul" }  */
+T (&s.b[0] + v0);     /* { dg-warning "nul" }  */
+T (&s.b[1]);          /* { dg-warning "nul" }  */
+T (&s.b[1] + 1);      /* { dg-warning "nul" }  */
+T (&s.b[1] + i0);     /* { dg-warning "nul" }  */
+T (&s.b[1] + v0);     /* { dg-warning "nul" }  */
+
+T (&s.b[i0]);         /* { dg-warning "nul" }  */
+T (&s.b[i0] + i1);    /* { dg-warning "nul" }  */
+T (&s.b[i0] + v0);    /* { dg-warning "nul" "pr86919" { xfail *-*-* } }  */
+T (&s.b[i1]);         /* { dg-warning "nul" }  */
+T (&s.b[i1] + i1);    /* { dg-warning "nul" }  */
+T (&s.b[i1] + v0);    /* { dg-warning "nul" "pr86919" { xfail *-*-* } }  */
+
+struct B { struct A a[2]; };
+
+const struct B ba[] = {
+  { { { "123", "12345" }, { "12345", "123" } } },
+  { { { "12345", "123" }, { "123", "12345" } } },
+  { { { "1", "12" },      { "123", "1234" } } },
+  { { { "123", "1234" },  { "12345", "12" } } }
+};
+
+T (ba[0].a[0].a);
+T (&ba[0].a[0].a[0]);
+T (&ba[0].a[0].a[0] + 1);
+T (&ba[0].a[0].a[0] + v0);
+T (&ba[0].a[0].a[1]);
+T (&ba[0].a[0].a[1] + 1);
+T (&ba[0].a[0].a[1] + v0);
+
+T (ba[0].a[0].b);           /* { dg-warning "nul" }  */
+T (&ba[0].a[0].b[0]);       /* { dg-warning "nul" }  */
+T (&ba[0].a[0].b[0] + 1);   /* { dg-warning "nul" }  */
+T (&ba[0].a[0].b[0] + v0);  /* { dg-warning "nul" }  */
+T (&ba[0].a[0].b[1]);       /* { dg-warning "nul" }  */
+T (&ba[0].a[0].b[1] + 1);   /* { dg-warning "nul" }  */
+T (&ba[0].a[0].b[1] + v0);  /* { dg-warning "nul" }  */
+
+T (ba[0].a[1].a);           /* { dg-warning "nul" }  */
+T (&ba[0].a[1].a[0]);       /* { dg-warning "nul" }  */
+T (&ba[0].a[1].a[0] + 1);   /* { dg-warning "nul" }  */
+T (&ba[0].a[1].a[0] + v0);  /* { dg-warning "nul" }  */
+T (&ba[0].a[1].a[1]);       /* { dg-warning "nul" }  */
+T (&ba[0].a[1].a[1] + 1);   /* { dg-warning "nul" }  */
+T (&ba[0].a[1].a[1] + v0);  /* { dg-warning "nul" }  */
+
+T (ba[0].a[1].b);
+T (&ba[0].a[1].b[0]);
+T (&ba[0].a[1].b[0] + 1);
+T (&ba[0].a[1].b[0] + v0);
+T (&ba[0].a[1].b[1]);
+T (&ba[0].a[1].b[1] + 1);
+T (&ba[0].a[1].b[1] + v0);
+
+
+T (ba[1].a[0].a);           /* { dg-warning "nul" }  */
+T (&ba[1].a[0].a[0]);       /* { dg-warning "nul" }  */
+T (&ba[1].a[0].a[0] + 1);   /* { dg-warning "nul" }  */
+T (&ba[1].a[0].a[0] + v0);  /* { dg-warning "nul" }  */
+T (&ba[1].a[0].a[1]);       /* { dg-warning "nul" }  */
+T (&ba[1].a[0].a[1] + 1);   /* { dg-warning "nul" }  */
+T (&ba[1].a[0].a[1] + v0);  /* { dg-warning "nul" }  */
+
+T (ba[1].a[0].b);
+T (&ba[1].a[0].b[0]);
+T (&ba[1].a[0].b[0] + 1);
+T (&ba[1].a[0].b[0] + v0);
+T (&ba[1].a[0].b[1]);
+T (&ba[1].a[0].b[1] + 1);
+T (&ba[1].a[0].b[1] + v0);
+
+T (ba[1].a[1].a);
+T (&ba[1].a[1].a[0]);
+T (&ba[1].a[1].a[0] + 1);
+T (&ba[1].a[1].a[0] + v0);
+T (&ba[1].a[1].a[1]);
+T (&ba[1].a[1].a[1] + 1);
+T (&ba[1].a[1].a[1] + v0);
+
+T (ba[1].a[1].b);           /* { dg-warning "nul" }  */
+T (&ba[1].a[1].b[0]);       /* { dg-warning "nul" }  */
+T (&ba[1].a[1].b[0] + 1);   /* { dg-warning "nul" }  */
+T (&ba[1].a[1].b[0] + v0);  /* { dg-warning "nul" }  */
+T (&ba[1].a[1].b[1]);       /* { dg-warning "nul" }  */
+T (&ba[1].a[1].b[1] + 1);   /* { dg-warning "nul" }  */
+T (&ba[1].a[1].b[1] + v0);  /* { dg-warning "nul" }  */
+
+
+T (ba[2].a[0].a);
+T (&ba[2].a[0].a[0]);
+T (&ba[2].a[0].a[0] + 1);
+T (&ba[2].a[0].a[0] + v0);
+T (&ba[2].a[0].a[1]);
+T (&ba[2].a[0].a[1] + 1);
+T (&ba[2].a[0].a[1] + v0);
+
+T (ba[2].a[0].b);
+T (&ba[2].a[0].b[0]);
+T (&ba[2].a[0].b[0] + 1);
+T (&ba[2].a[0].b[0] + v0);
+T (&ba[2].a[0].b[1]);
+T (&ba[2].a[0].b[1] + 1);
+T (&ba[2].a[0].b[1] + v0);
+
+T (ba[2].a[1].a);
+T (&ba[2].a[1].a[0]);
+T (&ba[2].a[1].a[0] + 1);
+T (&ba[2].a[1].a[0] + v0);
+T (&ba[2].a[1].a[1]);
+T (&ba[2].a[1].a[1] + 1);
+T (&ba[2].a[1].a[1] + v0);
+
+
+T (ba[3].a[0].a);
+T (&ba[3].a[0].a[0]);
+T (&ba[3].a[0].a[0] + 1);
+T (&ba[3].a[0].a[0] + v0);
+T (&ba[3].a[0].a[1]);
+T (&ba[3].a[0].a[1] + 1);
+T (&ba[3].a[0].a[1] + v0);
+
+T (ba[3].a[0].b);
+T (&ba[3].a[0].b[0]);
+T (&ba[3].a[0].b[0] + 1);
+T (&ba[3].a[0].b[0] + v0);
+T (&ba[3].a[0].b[1]);
+T (&ba[3].a[0].b[1] + 1);
+T (&ba[3].a[0].b[1] + v0);
+
+T (ba[3].a[1].a);           /* { dg-warning "nul" }  */
+T (&ba[3].a[1].a[0]);	    /* { dg-warning "nul" }  */
+T (&ba[3].a[1].a[0] + 1);   /* { dg-warning "nul" }  */
+T (&ba[3].a[1].a[0] + v0);  /* { dg-warning "nul" }  */
+T (&ba[3].a[1].a[1]);	    /* { dg-warning "nul" }  */
+T (&ba[3].a[1].a[1] + 1);   /* { dg-warning "nul" }  */
+T (&ba[3].a[1].a[1] + v0);  /* { dg-warning "nul" }  */
+
+T (ba[3].a[1].b);
+T (&ba[3].a[1].b[0]);	
+T (&ba[3].a[1].b[0] + 1);
+T (&ba[3].a[1].b[0] + v0);
+T (&ba[3].a[1].b[1]);	
+T (&ba[3].a[1].b[1] + 1);
+T (&ba[3].a[1].b[1] + v0);
+
+
+T (v0 ? ba[0].a[0].a : ba[0].a[0].b);           /* { dg-warning "nul" }  */
+T (v0 ? ba[0].a[0].a : ba[0].a[0].b);           /* { dg-warning "nul" }  */
+
+T (v0 ? &ba[0].a[0].a[0] : &ba[3].a[1].a[0]);   /* { dg-warning "nul" }  */
+T (v0 ? &ba[3].a[1].a[1] :  ba[0].a[0].a);      /* { dg-warning "nul" }  */
+
+T (v0 ? ba[0].a[0].a : ba[0].a[1].b);
+T (v0 ? ba[0].a[1].b : ba[0].a[0].a);
+
+/* Prune out warnings with no location (pr?????).
+   { dg-prune-output "cc1:" } */

Reply via email to