[PATCH] relax lower bound for infinite arguments in gimple-ssa-sprinf.c (PR 86274)

2018-07-03 Thread Martin Sebor

In computing the size of expected output for non-constant floating
arguments the sprintf pass doesn't consider the possibility that
the argument value may be not finite (i.e., it can be infinity or
NaN).  Infinities and NaNs are formatted as "inf" or "infinity"
and "nan".  As a result, any floating directive can produce as
few bytes on output as three for an non-finite argument, when
the least amount directives such as %f produce for finite
arguments is 8.

The attached patch adjusts the floating point code to correctly
reflect the lower bound.

Martin
PR tree-optimization/86274 - SEGFAULT when logging std::to_string(NAN)

gcc/ChangeLog:

	PR tree-optimization/86274
	* gimple-ssa-sprintf.c (fmtresult::type_max_digits): Verify
	precondition.
	(format_floating): Correct handling of infinities and NaNs.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86274
	* gcc.dg/tree-ssa/builtin-sprintf-9.c: New test.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-10.c: Same.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-15.c: Same.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-7.c: Same.
	* gcc.dg/tree-ssa/builtin-sprintf.c: Same.
	* gcc.dg/tree-ssa/pr83198.c: Same.

Index: gcc/gimple-ssa-sprintf.c
===
--- gcc/gimple-ssa-sprintf.c	(revision 262312)
+++ gcc/gimple-ssa-sprintf.c	(working copy)
@@ -781,15 +781,19 @@ unsigned
 fmtresult::type_max_digits (tree type, int base)
 {
   unsigned prec = TYPE_PRECISION (type);
-  if (base == 8)
-return (prec + 2) / 3;
+  switch (base)
+{
+case 8:
+  return (prec + 2) / 3;
+case 10:
+  /* Decimal approximation: yields 3, 5, 10, and 20 for precision
+	 of 8, 16, 32, and 64 bits.  */
+  return prec * 301 / 1000 + 1;
+case 16:
+  return prec / 4;
+}
 
-  if (base == 16)
-return prec / 4;
-
-  /* Decimal approximation: yields 3, 5, 10, and 20 for precision
- of 8, 16, 32, and 64 bits.  */
-  return prec * 301 / 1000 + 1;
+  gcc_unreachable ();
 }
 
 static bool
@@ -1759,6 +1763,11 @@ format_floating (const directive &dir, const HOST_
   unsigned flagmin = (1 /* for the first digit */
 		  + (dir.get_flag ('+') | dir.get_flag (' ')));
 
+  /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
+ for the plus sign/space with the '+' and ' ' flags, respectively,
+ unless reduced below.  */
+  res.range.min = 2 + flagmin;
+
   /* When the pound flag is set the decimal point is included in output
  regardless of precision.  Whether or not a decimal point is included
  otherwise depends on the specification and precision.  */
@@ -1775,14 +1784,13 @@ format_floating (const directive &dir, const HOST_
 	else if (dir.prec[0] > 0)
 	  minprec = dir.prec[0] + !radix /* decimal point */;
 
-	res.range.min = (2 /* 0x */
-			 + flagmin
-			 + radix
-			 + minprec
-			 + 3 /* p+0 */);
+	res.range.likely = (2 /* 0x */
+			+ flagmin
+			+ radix
+			+ minprec
+			+ 3 /* p+0 */);
 
 	res.range.max = format_floating_max (type, 'a', prec[1]);
-	res.range.likely = res.range.min;
 
 	/* The unlikely maximum accounts for the longest multibyte
 	   decimal point character.  */
@@ -1800,15 +1808,14 @@ format_floating (const directive &dir, const HOST_
 	   non-zero, decimal point.  */
 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
 
-	/* The minimum output is "[-+]1.234567e+00" regardless
+	/* The likely minimum output is "[-+]1.234567e+00" regardless
 	   of the value of the actual argument.  */
-	res.range.min = (flagmin
-			 + radix
-			 + minprec
-			 + 2 /* e+ */ + 2);
+	res.range.likely = (flagmin
+			+ radix
+			+ minprec
+			+ 2 /* e+ */ + 2);
 
 	res.range.max = format_floating_max (type, 'e', prec[1]);
-	res.range.likely = res.range.min;
 
 	/* The unlikely maximum accounts for the longest multibyte
 	   decimal point character.  */
@@ -1827,12 +1834,15 @@ format_floating (const directive &dir, const HOST_
 	   decimal point.  */
 	HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
 
-	/* The lower bound when precision isn't specified is 8 bytes
-	   ("1.23456" since precision is taken to be 6).  When precision
-	   is zero, the lower bound is 1 byte (e.g., "1").  Otherwise,
-	   when precision is greater than zero, then the lower bound
-	   is 2 plus precision (plus flags).  */
-	res.range.min = flagmin + radix + minprec;
+	/* For finite numbers (i.e., not infinity or NaN) the lower bound
+	   when precision isn't specified is 8 bytes ("1.23456" since
+	   precision is taken to be 6).  When precision is zero, the lower
+	   bound is 1 byte (e.g., "1").  Otherwise, when precision is greater
+	   than zero, then the lower bound is 2 plus precision (plus flags).
+	   But in all cases, the lower bound is no greater than 3.  */
+	unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
+	if (min < res.range.min)
+	  res.range.min = min;
 
 	/* Compute the upper bound for -TYPE_MAX.  */
 	

[PATCH] have pretty printer include NaN representation

2018-07-03 Thread Martin Sebor

The pretty-printer formats NaNs simply as Nan, even though
there is much more to a NaN than than that.  At the very
least, one might like to know if the NaN is signaling or
quiet, negative or positive.  If it's not in a canonical
form, one might also be interested in the significand
and exponent parts.  The attached patch enhances
the pretty printer to include all these details in
its detailed output.

Tested by bootstrapping & regtesting on x86_64-linux.

Martin
gcc/ChangeLog:

	* print-tree.c (print_real_cst): New function.
	(print_node_brief): Call it.
	(print_node): Ditto.

Index: gcc/print-tree.c
===
--- gcc/print-tree.c	(revision 262312)
+++ gcc/print-tree.c	(working copy)
@@ -52,6 +52,71 @@ dump_addr (FILE *file, const char *prefix, const v
 fprintf (file, "%s" HOST_PTR_PRINTF, prefix, addr);
 }
 
+/* Print to FILE a NODE representing a REAL_CST constant, including
+   Infinity and NaN.  Be verbose when BFRIEF is false.  */
+
+static void
+print_real_cst (FILE *file, const_tree node, bool brief)
+{
+  if (TREE_OVERFLOW (node))
+fprintf (file, " overflow");
+
+  REAL_VALUE_TYPE d = TREE_REAL_CST (node);
+  if (REAL_VALUE_ISINF (d))
+fprintf (file,  REAL_VALUE_NEGATIVE (d) ? " -Inf" : " Inf");
+  else if (REAL_VALUE_ISNAN (d))
+{
+  /* Print a NaN in the format [-][Q|S]NaN[(significand[exponent])]
+	 where significand is a hexadecimal string that starts with
+	 the 0x prefix followed by 0 if the number is not canonical
+	 and a non-zero digit if it is, and exponent is decimal.  */
+  unsigned start = 0;
+  const char *psig = (const char *) d.sig;
+  for (unsigned i = 0; i != sizeof d.sig; ++i)
+	if (psig[i])
+	  {
+	start = i;
+	break;
+	  }
+
+  fprintf (file, " %s%sNaN", d.sign ? "-" : "",
+	   d.signalling ? "S" : "Q");
+
+  if (brief)
+	return;
+
+  if (start)
+	fprintf (file, "(0x%s", d.canonical ? "" : "0");
+  else if (d.uexp)
+	fprintf (file, "(%s", d.canonical ? "" : "0");
+  else if (!d.canonical)
+	{
+	  fprintf (file, "(0)");
+	  return;
+	}
+
+  if (psig[start])
+	{
+	  for (unsigned i = start; i != sizeof d.sig; ++i)
+	if (i == start)
+	  fprintf (file, "%x", psig[i]);
+	else
+	  fprintf (file, "%02x", psig[i]);
+	}
+
+  if (d.uexp)
+	fprintf (file, "%se%u)", psig[start] ? "," : "", d.uexp);
+  else if (psig[start])
+	fputc (')', file);
+}
+  else
+{
+  char string[64];
+  real_to_decimal (string, &d, sizeof (string), 0, 1);
+  fprintf (file, " %s", string);
+}
+}
+
 /* Print a node in brief fashion, with just the code, address and name.  */
 
 void
@@ -121,24 +186,7 @@ print_node_brief (FILE *file, const char *prefix,
   print_dec (wi::to_wide (node), file, TYPE_SIGN (TREE_TYPE (node)));
 }
   if (TREE_CODE (node) == REAL_CST)
-{
-  REAL_VALUE_TYPE d;
-
-  if (TREE_OVERFLOW (node))
-	fprintf (file, " overflow");
-
-  d = TREE_REAL_CST (node);
-  if (REAL_VALUE_ISINF (d))
-	fprintf (file,  REAL_VALUE_NEGATIVE (d) ? " -Inf" : " Inf");
-  else if (REAL_VALUE_ISNAN (d))
-	fprintf (file, " Nan");
-  else
-	{
-	  char string[60];
-	  real_to_decimal (string, &d, sizeof (string), 0, 1);
-	  fprintf (file, " %s", string);
-	}
-}
+print_real_cst (file, node, true);
   if (TREE_CODE (node) == FIXED_CST)
 {
   FIXED_VALUE_TYPE f;
@@ -730,24 +778,7 @@ print_node (FILE *file, const char *prefix, tree n
 	  break;
 
 	case REAL_CST:
-	  {
-	REAL_VALUE_TYPE d;
-
-	if (TREE_OVERFLOW (node))
-	  fprintf (file, " overflow");
-
-	d = TREE_REAL_CST (node);
-	if (REAL_VALUE_ISINF (d))
-	  fprintf (file,  REAL_VALUE_NEGATIVE (d) ? " -Inf" : " Inf");
-	else if (REAL_VALUE_ISNAN (d))
-	  fprintf (file, " Nan");
-	else
-	  {
-		char string[64];
-		real_to_decimal (string, &d, sizeof (string), 0, 1);
-		fprintf (file, " %s", string);
-	  }
-	  }
+	  print_real_cst (file, node, false);
 	  break;
 
 	case FIXED_CST:


PING [PATCH] specify large command line option arguments (PR 82063)

2018-07-03 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01509.html

On 06/24/2018 03:05 PM, Martin Sebor wrote:

Storing integer command line option arguments in type int
limits options such as -Wlarger-than= or -Walloca-larger-than
to at most INT_MAX (see bug 71905).  Larger values wrap around
zero.  The value zero is considered to disable the option,
making it impossible to specify a zero limit.

To get around these limitations, the -Walloc-size-larger-than=
option accepts a string argument that it then parses itself
and interprets as HOST_WIDE_INT.  The option also accepts byte
size suffixes like KB, MB, GiB, etc. to make it convenient to
specify very large limits.

The int limitation is obviously less than ideal in a 64-bit
world.  The treatment of zero as a toggle is just a minor wart.
The special treatment to make it work for just a single option
makes option handling inconsistent.  It should be possible for
any option that takes an integer argument to use the same logic.

The attached patch enhances GCC option processing to do that.
It changes the storage type of option arguments from int to
HOST_WIDE_INT and extends the existing (although undocumented)
option property Host_Wide_Int to specify wide option arguments.
It also introduces the ByteSize property for options for which
specifying the byte-size suffix makes sense.

To make it possible to consider zero as a meaningful argument
value rather than a flag indicating that an option is disabled
the patch also adds a CLVC_SIZE enumerator to the cl_var_type
enumeration, and modifies how options of the kind are handled.

Warning options that take large byte-size arguments can be
disabled by specifying a value equal to or greater than
HOST_WIDE_INT_M1U.  For convenience, aliases in the form of
-Wno-xxx-larger-than have been provided for all the affected
options.

In the patch all the existing -larger-than options are set
to PTRDIFF_MAX.  This makes them effectively enabled, but
because the setting is exceedingly permissive, and because
some of the existing warnings are already set to the same
value and some other checks detect and reject such exceedingly
large values with errors, this change shouldn't noticeably
affect what constructs are diagnosed.

Although all the options are set to PTRDIFF_MAX, I think it
would make sense to consider setting some of them lower, say
to PTRDIFF_MAX / 2.  I'd like to propose that in a followup
patch.

To minimize observable changes the -Walloca-larger-than and
-Wvla-larger-than warnings required more extensive work to
make of the new mechanism because of the "unbounded" argument
handling (the warnings trigger for arguments that are not
visibly constrained), and because of the zero handling
(the warnings also trigger


Martin





Re: [PATCH] relax lower bound for infinite arguments in gimple-ssa-sprinf.c (PR 86274)

2018-07-03 Thread Martin Sebor

Committed to trunk in r86274.  Jakub/Richard, can you please
also review and approve the corresponding fix for the release
branches?

Martin

On 07/03/2018 06:32 PM, Jeff Law wrote:

On 07/03/2018 04:50 PM, Martin Sebor wrote:

In computing the size of expected output for non-constant floating
arguments the sprintf pass doesn't consider the possibility that
the argument value may be not finite (i.e., it can be infinity or
NaN).  Infinities and NaNs are formatted as "inf" or "infinity"
and "nan".  As a result, any floating directive can produce as
few bytes on output as three for an non-finite argument, when
the least amount directives such as %f produce for finite
arguments is 8.

The attached patch adjusts the floating point code to correctly
reflect the lower bound.

Martin

gcc-86274.diff


PR tree-optimization/86274 - SEGFAULT when logging std::to_string(NAN)

gcc/ChangeLog:

PR tree-optimization/86274
* gimple-ssa-sprintf.c (fmtresult::type_max_digits): Verify
precondition.
(format_floating): Correct handling of infinities and NaNs.

gcc/testsuite/ChangeLog:

PR tree-optimization/86274
* gcc.dg/tree-ssa/builtin-sprintf-9.c: New test.
* gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust.
* gcc.dg/tree-ssa/builtin-sprintf-warn-10.c: Same.
* gcc.dg/tree-ssa/builtin-sprintf-warn-15.c: Same.
* gcc.dg/tree-ssa/builtin-sprintf-warn-7.c: Same.
* gcc.dg/tree-ssa/builtin-sprintf.c: Same.
* gcc.dg/tree-ssa/pr83198.c: Same.

OK
jeff





Re: [RFC PATCH] diagnose built-in declarations without prototype (PR 83656)

2018-07-04 Thread Martin Sebor

On 07/03/2018 08:33 PM, Jeff Law wrote:

On 06/29/2018 09:54 AM, Martin Sebor wrote:


All the warnings I have seen are because of declarations like
the one in the test below that checks for the presence of symbol
sin in the library:

  char sin ();
  int main () { return sin (); }

GCC has warned for this code by default since at least 4.1 so if
it is, in fact, a problem it has been with us for over a decade.

There's a comment in the test that explains that the char return
type is deliberate to prevent GCC from treating the function as
a built-in.  (That, of course, relies on undefined behavior
because names of extern library functions are reserved and
conforming compilers could simply avoid emitting the call or
replace it with a trap.)

As you noted, by doing this the test can verify if there's a sin()
function in the library or not, regardless of whether or not the
compiler has a builtin for sin().

I wonder if stepping forward to a more modern version of autoconf is
going to help here and if we should be feeding them updates to make this
kind of stuff less pervasive, at least in standard autoconf tests.


That would make sense to me.  The tests should not rely on
undefined behavior.  They should declare standard functions with
the right prototypes.  IMO, for GCC and compatible compilers they
should disable built-in expansion instead via -fno-builtin.  For
all other compilers, they could store the address of each function
in a (perhaps volatile) pointer and use it to make the call instead.

I think the problem is they can't rely on the compiler having the
-fno-builtin flag.  Of course they're relying on other compilers having
the same kind of behavior as GCC WRT which is possibly worse.

I guess there's a reason why they didn't extract the set of symbols from
the library. :-)



But since the number of warnings here hasn't changed, the ones
in GCC logs predate my changes.  So updating the tests seems
like an improvement to consider independently of the patch.

Agreed.  I'm still wary of proceeding given the general concerns about
configure tests.  It's good that GCC's configury bits aren't affected,
but I'm not sure we can generalize a whole lot from that.


So what's the next step?  I'm open to relaxing the warning
so it only triggers with -Wall or -Wextra and not by default
if that's considered necessary.

At the same time, the instances of the warning we have seen
have all been issued for the configure tests for years and
we have not seen any new instances of it as a result of
this change, so the concern that the patch might lead to some
more while at the same time accepting the ones we know about
doesn't make sense to me.

Any new warning that's enabled by default has the potential
to trigger for invalid configure tests or any other such code.
I would expect tests deliberately written to rely on undefined
behavior to be prepared for compiler warnings, so I'm not sure
I understand the basis for the concern in this case.  I also
don't recall the concern being raised for newly added warnings
in the past that were enabled by default and I'm not sure I see
what makes this one different.

Martin


Re: [PATCH] relax lower bound for infinite arguments in gimple-ssa-sprinf.c (PR 86274)

2018-07-04 Thread Martin Sebor

On 07/04/2018 08:04 AM, Christophe Lyon wrote:

Hi,

On Wed, 4 Jul 2018 at 09:26, Richard Biener  wrote:


On Tue, 3 Jul 2018, Martin Sebor wrote:


Committed to trunk in r86274.  Jakub/Richard, can you please
also review and approve the corresponding fix for the release
branches?


If it is a regression and the patch was approved for trunk it is
automatically OK for release branches without further review unless
you think it is too invasive.


Will do.  Thanks for clarifying.




I've noticed a failure with this new test (on arm and aarch64, but
I've seen other targets are affected on gcc-testresults):
gcc.dg/tree-ssa/builtin-sprintf-9.c: dump file does not exist
UNRESOLVED: gcc.dg/tree-ssa/builtin-sprintf-9.c scan-tree-dump-times
optimized" "call_made_in_true_branch_" 6"

The test is compiled with -fdump-tree-optimized


Thanks.  There was a typo in the dg-final directive in the test
and I missed the UNRESOLVED result among all the others that
have recently started showing up.  I fixed the typo in r262419
and raised bug 86405 for the unresolved tests.

Martin



[PATCH] use TYPE_SIZE instead of TYPE_DOMAIN to compute array size (PR 86400)

2018-07-04 Thread Martin Sebor

A change of mine to the strlen pass assumes that the strlen
argument points to an object of the correct type and does
not correctly handle GIMPLE where the argument has the wrong
type such as in:

  extern char a[1][2];
  n = strlen (*a);

where the strlen pass actually sees

  n = strlen (a);

The attached patch corrects the code to use TYPE_SIZE to
determine the size of the array argument rather than using
TYPE_DOMAIN.

Tested on x86_64-linux.

Martin

PR tree-optimization/86400 - set::set::set

Re: [PATCH][Middle-end]3rd patch of PR78809

2018-07-05 Thread Martin Sebor

On 07/05/2018 09:46 AM, Qing Zhao wrote:

Hi,

I have sent two emails with the updated patches on 7/3:

https://gcc.gnu.org/ml/gcc-patches/2018-07/msg00065.html
https://gcc.gnu.org/ml/gcc-patches/2018-07/msg00070.html

however, these 2 emails  were not successfully forwarded to the 
gcc-patches@gcc.gnu.org mailing list.

So, I am sending the same email again in this one, hopefully this time it can 
go through.
Qing


Thanks for taking care to issue the warnings (and thanks to
Jeff for pointing it out)!

One of the basic design principles that I myself have
accidentally violated in the past is that warning options
should not impact the emitted object code.  I don't think
your patch actually does introduce this dependency by having
the codegen depend on the result of check_access() -- I'm
pretty sure the function is designed to do the validation
irrespective of warning options and return based on
the result of the validation and not based on whether
a warning was issued.  But the choice of the variable name,
no_overflow_warn, suggests that it does, in fact, have this
effect.  So I would suggest to rename the variable and add
a test that verifies that this dependency does not exist.

Beyond that, an enhancement to this optimization that might
be worth considering is inlining even non-constant calls
with array arguments whose size is no greater than the limit.
As in:

  extern char a[4], *b;

  int n = strcmp (a, b);

Because strcmp arguments are required to be nul-terminated
strings, a's length above must be at most 3.  This is analogous
to similar optimizations GCC performs, such as folding to zero
calls to strlen() with one-element arrays.

Martin



Hi, Jeff,

thanks a lot for your review and comments.

I have addressed your comments,updated the patch, retested on both
aarch64 and x86.

The major changes in this version compared to the previous version are:

1. in routine expand_builtin_memcmp:
* move the inlining transformation AFTER the warning is issues for
-Wstringop-overflow;
* only apply inlining when there is No warning is issued.
2. in the testsuite, add a new testcase strcmpopt_6.c for this case.
3. update comments to:
* capitalize the first word.
* capitalize all the arguments.

NOTE, the routine expand_builtin_strcmp and expand_builtin_strncmp are not 
changed.
the reason is:  there is NO overflow checking for these two routines currently.
if we need overflow checking for these two routines, I think that a separate 
patch is needed.
if this is needed, let me know, I can work on this separate patch for issuing 
warning for strcmp/strncmp when
-Wstringop-overflow is specified.

The new patch is as following, please take a look at it.

thanks.

Qing

gcc/ChangeLog

+2018-07-02  Qing Zhao  
+
+   PR middle-end/78809
+   * builtins.c (expand_builtin_memcmp): Inline the calls first
+   when result_eq is false.
+   (expand_builtin_strcmp): Inline the calls first.
+   (expand_builtin_strncmp): Likewise.
+   (inline_string_cmp): New routine. Expand a string compare
+   call by using a sequence of char comparison.
+   (inline_expand_builtin_string_cmp): New routine. Inline expansion
+   a call to str(n)cmp/memcmp.
+   * doc/invoke.texi (--param builtin-string-cmp-inline-length): New 
option.
+   * params.def (BUILTIN_STRING_CMP_INLINE_LENGTH): New.
+

gcc/testsuite/ChangeLog

+2018-07-02  Qing Zhao  
+
+   PR middle-end/78809
+   * gcc.dg/strcmpopt_5.c: New test.
+   * gcc.dg/strcmpopt_6.c: New test.
+






On Jun 28, 2018, at 12:10 AM, Jeff Law  wrote:


So this generally looks pretty good.  THe biggest technical concern is
making sure we're doing the right thing WRT issuing warnings.  You can
tackle that problem by deferring inlining to a later point after
warnings have been issued or by verifying that your routines do not
inline in cases where warnings will be issued.  It may be worth adding
testcases for these issues.

There's a large number of comments that need capitalization fixes.

Given there was no measured runtime performance impact, but slight
improvements on codesize for values <= 3, let's go ahead with that as
the default.

Can you address the issues above and repost for final review?

Thanks,
jeff






[PATCH] fold strlen() of aggregate members (PR 77357)

2018-07-05 Thread Martin Sebor

GCC folds accesses to members of constant aggregates except
for character arrays/strings.  For example, the strlen() call
below is not folded:

  const char a[][4] = { "1", "12" };

  int f (void) { retturn strlen (a[1]); }

The attached change set enhances the string_constant() function
to make it possible to extract string constants from aggregate
initializers (CONSTRUCTORS).

The initial solution was much simpler but as is often the case,
MEM_REF made it fail to fold things like:

  int f (void) { retturn strlen (a[1] + 1); }

Handling those made the project a bit more interesting and
the final solution somewhat more involved.

To handle offsets into aggregate string members the patch also
extends the fold_ctor_reference() function to extract entire
string array initializers even if the offset points past
the beginning of the string and even though the size and
exact type of the reference are not known (there isn't enough
information in a MEM_REF to determine that).

Tested along with the patch for PR 86415 on x86_64-linux.

Martin
PR middle-end/77357 - strlen of constant strings not folded

gcc/ChangeLog:

	* builtins.c (c_strlen): Avoid out-of-bounds warnings when
	accessing implicitly initialized array elements.
	* expr.c (string_constant): Handle string initializers of
	character arrays within aggregates.
	* gimple-fold.c (fold_array_ctor_reference): Add argument.
	Store element offset.  As a special case, handle zero size.
	(fold_nonarray_ctor_reference): Same.
	(fold_ctor_reference): Add argument.  Store subobject offset.
	* gimple-fold.h (fold_ctor_reference): Add argument.

gcc/testsuite/ChangeLog:

	PR middle-end/77357
	* gcc.dg/strlenopt-49.c: New test.
	* gcc.dg/strlenopt-50.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 91658e8..2f9d5d7 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -602,8 +602,15 @@ c_strlen (tree src, int only_value)
 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src;
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
- length of SRC.  */
-  unsigned maxelts = TREE_STRING_LENGTH (src) / eltsize - 1;
+ length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
+ in case the latter is less than the size of the array.  */
+  HOST_WIDE_INT maxelts = TREE_STRING_LENGTH (src);
+  tree type = TREE_TYPE (src);
+  if (tree size = TYPE_SIZE_UNIT (type))
+if (tree_fits_shwi_p (size))
+  maxelts = tree_to_uhwi (size);
+
+  maxelts = maxelts / eltsize - 1;
 
   /* PTR can point to the byte representation of any string type, including
  char* and wchar_t*.  */
diff --git a/gcc/expr.c b/gcc/expr.c
index 56751df..be3ab93 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -54,11 +54,13 @@ along with GCC; see the file COPYING3.  If not see
 #include "reload.h"
 #include "langhooks.h"
 #include "common/common-target.h"
+#include "tree-dfa.h"
 #include "tree-ssa-live.h"
 #include "tree-outof-ssa.h"
 #include "tree-ssa-address.h"
 #include "builtins.h"
 #include "ccmp.h"
+#include "gimple-fold.h"
 #include "rtx-vector-builder.h"
 
 
@@ -11274,54 +11276,20 @@ is_aligning_offset (const_tree offset, const_tree exp)
 tree
 string_constant (tree arg, tree *ptr_offset)
 {
-  tree array, offset, lower_bound;
+  tree array;
   STRIP_NOPS (arg);
 
+  poly_int64 base_off = 0;
+
   if (TREE_CODE (arg) == ADDR_EXPR)
 {
-  if (TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST)
-	{
-	  *ptr_offset = size_zero_node;
-	  return TREE_OPERAND (arg, 0);
-	}
-  else if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
-	{
-	  array = TREE_OPERAND (arg, 0);
-	  offset = size_zero_node;
-	}
-  else if (TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF)
-	{
-	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
-	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	return 0;
-
-	  /* Check if the array has a nonzero lower bound.  */
-	  lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0));
-	  if (!integer_zerop (lower_bound))
-	{
-	  /* If the offset and base aren't both constants, return 0.  */
-	  if (TREE_CODE (lower_bound) != INTEGER_CST)
-	return 0;
-	  if (TREE_CODE (offset) != INTEGER_CST)
-		return 0;
-	  /* Adjust offset by the lower bound.  */
-	  offset = size_diffop (fold_convert (sizetype, offset),
-fold_convert (sizetype, lower_bound));
-	}
-	}
-  else if (TREE_CODE (TREE_OPERAND (arg, 0)) == MEM_REF)
-	{
-	  array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-	  offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
-	  if (TREE_CODE (array) != ADDR_EXPR)
-	return 0;
-	  array = TREE_OPERAND (array, 0);
-	  if (TREE_CODE (array) != STRING_CST && !VAR_P (array))
-	return 0;
-	}
-  else
-	return 0;
+  arg = TREE_OPERAND (arg, 0);
+  array = get_addr_base_and_unit_offset (arg, &base_off);
+  if (!array
+	  || (TREE_CODE (array) != VAR_DECL
+	  && TREE_CODE (array) !=

[PATCH] fold strlen() of substrings within strings (PR 86415)

2018-07-05 Thread Martin Sebor

GCC folds strlen() calls to empty substrings within arrays
whose elements are explicitly initialized with NULs but
fails to do the same for elements that are zeroed out
implicitly.  For example:

  const char a[7] = "123\000\000\000";
  int f (void)
  {
return strlen (a + 5);   // folded
  }

but

  const char b[7] = "123";
  int g (void)
  {
return strlen (b + 5);   // not folded
  }

This is because the c_getstr() function only considers
the leading TREE_STRING_LENGTH() number of elements of
an array and not also the remaining elements up the full
size of the array the string may be stored in.

The attached patch enhances the function to also consider
those elements.  If there are more elements in the array
than TREE_STRING_LENGTH() evaluates to they must be all
NULs because the array is constant and initialized.

Tested along with the patch for PR 77357 on x86_64-linux.

Martin
PR tree-optimization/86415 - strlen() not folded for substrings within constant arrays

gcc/ChangeLog:

	PR tree-optimization/86415
	* fold-const.c (c_getstr): Handle substrings.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86415
	* gcc.dg/strlenopt-48.c: New test.

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 8476c22..1729348 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -14567,14 +14567,13 @@ fold_build_pointer_plus_hwi_loc (location_t loc, tree ptr, HOST_WIDE_INT off)
 			  ptr, size_int (off));
 }
 
-/* Return a char pointer for a C string if it is a string constant
-   or sum of string constant and integer constant.  We only support
-   string constants properly terminated with '\0' character.
-   If STRLEN is a valid pointer, length (including terminating character)
-   of returned string is stored to the argument.  */
+/* Return a char pointer for a C string if SRC refers to a NUL
+   terminated string constant or a NUL-terminated substring at
+   some offset within one.  If STRLEN is non-null, store
+   the length of the returned string in *STRLEN.  */
 
 const char *
-c_getstr (tree src, unsigned HOST_WIDE_INT *strlen)
+c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
 {
   tree offset_node;
 
@@ -14594,18 +14593,37 @@ c_getstr (tree src, unsigned HOST_WIDE_INT *strlen)
 	offset = tree_to_uhwi (offset_node);
 }
 
+  /* STRING_LENGTH is the size of the string literal, including any
+ embedded NULs.  STRING_SIZE is the size of the array the string
+ literal is stored in.  */
   unsigned HOST_WIDE_INT string_length = TREE_STRING_LENGTH (src);
+  unsigned HOST_WIDE_INT string_size = string_length;
+  tree type = TREE_TYPE (src);
+  if (tree size = TYPE_SIZE_UNIT (type))
+if (tree_fits_shwi_p (size))
+  string_size = tree_to_uhwi (size);
+
   const char *string = TREE_STRING_POINTER (src);
 
-  /* Support only properly null-terminated strings.  */
+  /* Support only properly null-terminated strings but handle
+ consecutive strings within the same array, such as the six
+ substrings in "1\0002\0003".  */
   if (string_length == 0
   || string[string_length - 1] != '\0'
-  || offset >= string_length)
+  || offset >= string_size)
 return NULL;
 
   if (strlen)
-*strlen = string_length - offset;
-  return string + offset;
+{
+  /* Compute and store the length of the substring at OFFSET.
+	 All offsets past the initial length refer to null strings.  */
+  if (offset <= string_length)
+	*strlen = string_length - offset;
+  else
+	*strlen = 0;
+}
+
+  return offset <= string_length ? string + offset : "";
 }
 
 /* Given a tree T, compute which bits in T may be nonzero.  */
diff --git a/gcc/testsuite/gcc.dg/strlenopt-48.c b/gcc/testsuite/gcc.dg/strlenopt-48.c
new file mode 100644
index 000..1d1d368
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-48.c
@@ -0,0 +1,116 @@
+/* PR tree-optimization/86415 - strlen() not folded for substrings
+   within constant arrays
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-gimple -fdump-tree-ccp" } */
+
+#include "strlenopt.h"
+
+#define CONCAT(x, y) x ## y
+#define CAT(x, y) CONCAT (x, y)
+#define FAILNAME(name) CAT (call_ ## name ##_on_line_, __LINE__)
+
+#define FAIL(name) do {\
+extern void FAILNAME (name) (void);		\
+FAILNAME (name)();\
+  } while (0)
+
+/* Macro to emit a call to funcation named
+ call_in_true_branch_not_eliminated_on_line_NNN()
+   for each call that's expected to be eliminated.  The dg-final
+   scan-tree-dump-time directive at the bottom of the test verifies
+   that no such call appears in output.  */
+#define ELIM(expr) \
+  if (!(expr)) FAIL (in_true_branch_not_eliminated); else (void)0
+
+#define T(s, n) ELIM (strlen (s) == n)
+
+/*  1
+	 0 1  23 4  567 8  901234  */
+#define STR "1\00012\000123\0001234\0"
+
+const char a[]   = STR;
+const char b[20] = STR;
+
+void test_literal (void)
+{
+  /* Verify that strlen() of substrings within a string literal are
+ correctly folded.

[PATCH] fold strings as long as the array they're stored in (PR 86428)

2018-07-07 Thread Martin Sebor

While working on other string folding improvements (PR 77357)
I came across another distinct case where GCC could do better:
it doesn't consider as candidates strings that have as many
elements as the size of the array they are stored in, even if
their length is within the bounds of the array.  For instance,
only the first strlen() call below is folded even though
the arguments to both are valid NUL-terminated strings.

   const char a[4] = "123";
   int f (void)
   {
 return strlen (a);   // folded
   }

   const char b[4] = "123\0";
   int g (void)
   {
 return strlen (b);   // not folded
   }

The attached tiny patch adjusts the the string_constant() function
to recognize as valid string constants all those whose length (as
returned by strlen()) is less than the size of the array they are
stored in.

Tested on x86_64-linux.

Testing of an earlier version of the patch exposed what I think
is a deficiency in the (very) early strlen folding: c_strlen()
folds expressions of the form strlen(A + I) to sizeof(A) - I when
A is an array of known size and I a non-const variable.  This not
only prevents -Warray-bounds from warning on invalid indices but
also defeats other, possibly more profitable optimizations based
on the range of the result of the strlen() call.  The logs show
that the code dates at least as far back as 1992.  With VRP and
other data flow based optimizations I think we can do better
today.  I opened bug 86434 to improve things.

Martin

PR tree-optimization/86428 - strlen of const array initialized with a string of the same length not folded

gcc/ChangeLog:

	PR tree-optimization/86428
	* expr.c (string_constant): Handle string literals of
	length up to the size of the array they are stored in.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86428
	* gcc.dg/strlenopt-49.c: New test.
	* gcc.c-torture/execute/builtins/strlen-3.c: Adjust.

Index: gcc/expr.c
===
--- gcc/expr.c	(revision 262437)
+++ gcc/expr.c	(working copy)
@@ -11358,7 +11358,6 @@ string_constant (tree arg, tree *ptr_offset)
 }
   else if (VAR_P (array) || TREE_CODE (array) == CONST_DECL)
 {
-  int length;
   tree init = ctor_for_folding (array);
 
   /* Variables initialized to string literals can be handled too.  */
@@ -11367,22 +11366,25 @@ string_constant (tree arg, tree *ptr_offset)
 	  || TREE_CODE (init) != STRING_CST)
 	return 0;
 
-  /* Avoid const char foo[4] = "abcde";  */
-  if (DECL_SIZE_UNIT (array) == NULL_TREE
-	  || TREE_CODE (DECL_SIZE_UNIT (array)) != INTEGER_CST
-	  || (length = TREE_STRING_LENGTH (init)) <= 0
-	  || compare_tree_int (DECL_SIZE_UNIT (array), length) < 0)
-	return 0;
+  tree array_size = DECL_SIZE_UNIT (array);
+  if (!array_size || TREE_CODE (array_size) != INTEGER_CST)
+	return NULL_TREE;
 
-  /* If variable is bigger than the string literal, OFFSET must be constant
-	 and inside of the bounds of the string literal.  */
-  offset = fold_convert (sizetype, offset);
-  if (compare_tree_int (DECL_SIZE_UNIT (array), length) > 0
-	  && (! tree_fits_uhwi_p (offset)
-	  || compare_tree_int (offset, length) >= 0))
-	return 0;
+  /* Avoid returning a string that doesn't fit in the array
+	 it is stored in, like
+	   const char a[4] = "abcde";
+	 but do handle those that fit even if they have excess
+	 initializers, such as in
+	   const char a[4] = "abc\000\000";
+	 The excess elements contribute to TREE_STRING_LENGTH()
+	 but not to strlen().  */
+  unsigned HOST_WIDE_INT length = strlen (TREE_STRING_POINTER (init));
+  if (compare_tree_int (array_size, length + 1) < 0)
+	return NULL_TREE;
 
-  *ptr_offset = offset;
+  /* Callers should verify that OFFSET is within the bounds
+	 of the array and warn for out-of-bounds offsets.  */
+  *ptr_offset = fold_convert (sizetype, offset);
   return init;
 }
 
Index: gcc/testsuite/gcc.c-torture/execute/builtins/strlen-3.c
===
--- gcc/testsuite/gcc.c-torture/execute/builtins/strlen-3.c	(revision 262437)
+++ gcc/testsuite/gcc.c-torture/execute/builtins/strlen-3.c	(working copy)
@@ -2,8 +2,11 @@
 
Test strlen on const variables initialized to string literals.
 
-   Written by Jakub Jelinek, 9/14/2004.  */
+   Written by Jakub Jelinek, 9/14/2004.
 
+   { dg-do compile }
+   { dg-options "-O2 -Wall -fdump-tree-optimized" } */
+
 extern void abort (void);
 extern __SIZE_TYPE__ strlen (const char *);
 extern char *strcpy (char *, const char *);
@@ -10,7 +13,6 @@ extern char *strcpy (char *, const char *);
 static const char bar[] = "Hello, World!";
 static const char baz[] = "hello, world?";
 static const char larger[20] = "short string";
-extern int inside_main;
 
 int l1 = 1;
 int x = 6;
@@ -59,12 +61,10 @@ main_test(void)
   if (strlen (&larger[10]) != 2)
 abort ();
 
-  inside_main = 0;
-  /* This will result in strlen call, because larger

Re: [PATCH] fold strlen() of aggregate members (PR 77357)

2018-07-07 Thread Martin Sebor

On 07/06/2018 09:52 AM, Richard Biener wrote:

On Fri, Jul 6, 2018 at 1:54 AM Martin Sebor  wrote:


GCC folds accesses to members of constant aggregates except
for character arrays/strings.  For example, the strlen() call
below is not folded:

   const char a[][4] = { "1", "12" };

   int f (void) { retturn strlen (a[1]); }

The attached change set enhances the string_constant() function
to make it possible to extract string constants from aggregate
initializers (CONSTRUCTORS).

The initial solution was much simpler but as is often the case,
MEM_REF made it fail to fold things like:

   int f (void) { retturn strlen (a[1] + 1); }

Handling those made the project a bit more interesting and
the final solution somewhat more involved.

To handle offsets into aggregate string members the patch also
extends the fold_ctor_reference() function to extract entire
string array initializers even if the offset points past
the beginning of the string and even though the size and
exact type of the reference are not known (there isn't enough
information in a MEM_REF to determine that).

Tested along with the patch for PR 86415 on x86_64-linux.


+  if (TREE_CODE (init) == CONSTRUCTOR)
+   {
+ tree type;
+ if (TREE_CODE (arg) == ARRAY_REF
+ || TREE_CODE (arg) == MEM_REF)
+   type = TREE_TYPE (arg);
+ else if (TREE_CODE (arg) == COMPONENT_REF)
+   {
+ tree field = TREE_OPERAND (arg, 1);
+ type = TREE_TYPE (field);
+   }
+ else
+   return NULL_TREE;

what's wrong with just

type = TREE_TYPE (field);


In response to your comment below abut size I simplified things
further so determining the type a priori is no longer necessary.


?

+ base_off *= BITS_PER_UNIT;

poly_uint64 isn't enough for "bits", with wide-int you'd use offset_int,
for poly you'd then use poly_offset?


Okay, I tried to avoid the overflow.  (Converting between all
these flavors of wide int types is a monumental PITA.)



You extend fold_ctor_reference to treat size == 0 specially but then
bother to compute a size here - that looks unneeded?


Yes, well spotted, thanks!  I simplified the code so this isn't
necessary, and neither is the type.



While the offset of the reference determines the first field in the
CONSTRUCTOR, how do you know the access doesn't touch
adjacent ones?  STRING_CSTs do not have to be '\0' terminated,
so consider

  char x[2][4] = { "abcd", "abcd" };

and MEM[&x] with a char[8] type?  memcpy "inlining" will create
such MEMs for example.


The code is only used to find string constants in initializer
expressions where I don't think the size of the access comes
into play.  If a memcpy() call results in a MEM_REF[char[8],
&x, 8] that's fine.  It's a valid reference and we can still
get the underlying character sequence (which is represented
as two STRING_CSTs with the two string literals).  I might
be missing the point of your question.



@@ -6554,8 +6577,16 @@ fold_nonarray_ctor_reference (tree type, tree ctor,
   tree byte_offset = DECL_FIELD_OFFSET (cfield);
   tree field_offset = DECL_FIELD_BIT_OFFSET (cfield);
   tree field_size = DECL_SIZE (cfield);
-  offset_int bitoffset;
-  offset_int bitoffset_end, access_end;
+
+  if (!field_size && TREE_CODE (cval) == STRING_CST)
+   {
+ /* Determine the size of the flexible array member from
+the size of the string initializer provided for it.  */
+ unsigned HOST_WIDE_INT len = TREE_STRING_LENGTH (cval);
+ tree eltype = TREE_TYPE (TREE_TYPE (cval));
+ len *= tree_to_uhwi (TYPE_SIZE (eltype));
+ field_size = build_int_cst (size_type_node, len);
+   }

Why does this only apply to STRING_CST initializers and not CONSTRUCTORS,
say, for

struct S { int i; int a[]; } s = { 1, { 2, 3, 4, 5, 6 } };


I can't think of a use for it.  Do you have something in mind?



?  And why not use simply

  field_size = TYPE_SIZE (TREE_TYPE (cval));

like you do in c_strlen?


Yes, that's simpler, thanks.



Otherwise looks reasonable.


Attached is an updated patch.  I also enhanced the handling
of non-constant indices.  They were already handled before
to a smaller extent.  (There may be other opportunities
here.)

Martin
PR middle-end/77357 - strlen of constant strings not folded

gcc/ChangeLog:

	* builtins.c (c_strlen): Avoid out-of-bounds warnings when
	accessing implicitly initialized array elements.
	* expr.c (string_constant): Handle string initializers of
	character arrays within aggregates.
	* gimple-fold.c (fold_array_ctor_reference): Add argument.
	Store element offset.  As a special case, handle zero size.
	(fold_nonarray_ctor_reference): Same.
	(fold_ctor_reference): Add argument.  Store subobject offset.
	* gimple-fold.h (fold_ctor_reference

Re: [PATCH][Middle-end]3rd patch of PR78809

2018-07-09 Thread Martin Sebor

On 07/09/2018 01:28 PM, Qing Zhao wrote:

Hi, Martin,

thanks a lot for your comments.


On Jul 5, 2018, at 11:30 AM, Martin Sebor  wrote:

One of the basic design principles that I myself have
accidentally violated in the past is that warning options
should not impact the emitted object code.  I don't think
your patch actually does introduce this dependency by having
the codegen depend on the result of check_access() -- I'm
pretty sure the function is designed to do the validation
irrespective of warning options and return based on
the result of the validation and not based on whether
a warning was issued.  But the choice of the variable name,
no_overflow_warn, suggests that it does, in fact, have this
effect.  So I would suggest to rename the variable and add
a test that verifies that this dependency does not exist.


I agree that warning options should not impact the emitted object code.
and my current change seems violate this principle:

in the following change:

+  bool no_overflow_warn = true;

   /* Diagnose calls where the specified length exceeds the size of either
  object.  */
   if (warn_stringop_overflow)
 {
   tree size = compute_objsize (arg1, 0);
-  if (check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len,
-   /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE))
+  no_overflow_warn = check_access (exp, /*dst=*/NULL_TREE, 
/*src=*/NULL_TREE,
+  len, /*maxread=*/NULL_TREE, size,
+  /*objsize=*/NULL_TREE);
+  if (no_overflow_warn)
{
  size = compute_objsize (arg2, 0);
- check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len,
-   /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE);
+ no_overflow_warn = check_access (exp, /*dst=*/NULL_TREE, 
/*src=*/NULL_TREE,
+  len,  /*maxread=*/NULL_TREE, size,
+  /*objsize=*/NULL_TREE);
}
 }

+  /* Due to the performance benefit, always inline the calls first
+ when result_eq is false.  */
+  rtx result = NULL_RTX;
+
+  if (!result_eq && fcode != BUILT_IN_BCMP && no_overflow_warn)
+{
+  result = inline_expand_builtin_string_cmp (exp, target, true);
+  if (result)

The variable no_overflow_warn DEPENDs on the warning option 
warn_stringop_overflow, and this
variable is used to control the code generation.  such behavior seems violate 
the above mentioned
principle.

However, this is not a problem that can be easily fixed based on the the 
current design, which has the following issues as my
understanding:

1. the routine check_access issues warnings by default, then it seems 
necessary to guard the call
to this routine with the warning option;
2. then the returned value of the routine check_access has to depend on 
the warning option.

in order to fix the current problem I have, an approach is to rewrite the 
routine check_access to guard the issue warning inside
the routine with the warning option passed as an additional parameter.

let me know anything I am missing so far.


check_access() calls warning_at() to issue warnings, and that
function only issues warnings if they are enabled, so the guard
isn't necessary to make it work this way.


Beyond that, an enhancement to this optimization that might
be worth considering is inlining even non-constant calls
with array arguments whose size is no greater than the limit.
As in:

 extern char a[4], *b;

 int n = strcmp (a, b);

Because strcmp arguments are required to be nul-terminated
strings, a's length above must be at most 3.  This is analogous
to similar optimizations GCC performs, such as folding to zero
calls to strlen() with one-element arrays.


Yes, I agree that this will be another good enhancement to the strcmp inlining.

however, it’s not easy to be integrated with my current patch.  The major issue 
is:

 The inlined code for the strcmp call without string constant will be 
different than the inlined code for the
strcmp call with string constant,  then:

1. the default value for the threshold that control the maximum length 
of the string length for inlining will
be different than the one for the strcmp call with string constant,  more 
experiments need to be run and a new parameter
need to be added to control this;
2. the inlined transformed code will be different than the current one.

based on the above, I’d like to open a new PR to record this new enhancement 
and finish it with a new patch later.

what’s your opinion on this?


I'm not sure I see the issues above as problems and I would expect
the non-constant optimization to naturally handle the constant case
as well.  But if you prefer it that way, implementing the non-constant
optimization in a separate step sounds reasonable to me.  It's your
call.

Martin


Re: [PATCH] add support for strnlen (PR 81384)

2018-07-09 Thread Martin Sebor

On 07/09/2018 01:51 PM, Jeff Law wrote:

On 07/09/2018 08:36 AM, Aldy Hernandez wrote:

   { dg-do run }
   { do-options "-O2 -fno-tree-strlen" }  */

 I don't think this is doing anything.

If you look at the test run you can see that -fno-tree-strlen is never
passed (I think you actually mean -fno-optimize-strlen for that
matter).  Also, the builtins.exp harness runs your test for an
assortment of other flags, not just -O2.

This test is failing on my range branch for -Og, because
expand_builtin_strnlen() needs range info:

+  wide_int min, max;
+  enum value_range_type rng = get_range_info (bound, &min, &max);
+  if (rng != VR_RANGE)
+return NULL_RTX;

but interestingly enough, it seems to be calculated in the sprintf
pass as part of the DOM walk:

  /* First record ranges generated by this statement.  */
  evrp_range_analyzer.record_ranges_from_stmt (stmt, false);

It feels wrong that the sprintf warning pass is generating range info
that you may later depend on at rtl expansion time (and for a totally
unrelated thing-- strlen expansion).

We have a fair amount of expansion code these days that will use
globally valid range information if it's been computed.




I don't know if this is just a quirk of builtins.exp calling your test
with flags you didn't intend, but the inconsistency could cause
problems in the future.  Errr, or my present ;-).

Would it be too much to ask for you to either fix the flags being
passed down to the test, or better yet, find some non-sprintf
dependent way of calculating range info earlier?

I think the general issue here is if we do something like
-O <-fblahblahblah> -Wsprintf-blah

Where  is whatever aggregate of -f options are need to
disable any optimizer that generates range information.

In that case the sprintf warning pass becomes the only pass that
generates ranges.  So whether or not we run the sprintf warning pass
would affect the generated code.  And I think that's really the bigger
issue here -- warnings should affect code generation.

The question is what to do about it.  It probably wouldn't be too
terrible to have clients of the EVRP analyzer to specify if any
discovered ranges should be mirrored into the global range information.

Optimization passes would ask for the mirroring, the sprintf pass or any
other warning pass would not ask for mirroring.

Thoughts?


The sprintf pass doesn't just emit warnings -- it also performs
the sprintf optimizations, and the two functions are independent
of one another.  I'm pretty sure there are tests to verify that
it's so.

The sprintf optimization include replacing constant calls with
their results and setting ranges for others.  As you mentioned,
the sprintf pass isn't the only one in GCC that does the latter.
The strlen pass does as well, and so does gimple-fold.c.  There
is also a call to set_range_info() in tree-vect-loop-manip.c
though I'm not familiar with that one yet.

Martin


Re: [PATCH] add support for strnlen (PR 81384)

2018-07-09 Thread Martin Sebor

On 07/09/2018 08:36 AM, Aldy Hernandez wrote:

   { dg-do run }
   { do-options "-O2 -fno-tree-strlen" }  */

 I don't think this is doing anything.

If you look at the test run you can see that -fno-tree-strlen is never
passed (I think you actually mean -fno-optimize-strlen for that
matter).  Also, the builtins.exp harness runs your test for an
assortment of other flags, not just -O2.


I didn't know the harness ignores dg-options specified in these
tests.  That's surprising and feels like a bug in the harness
not to complain about it.  The purpose of the test is to verify
that the strnlen expansion in builtins.c does the right thing
and it deliberately tries to disable the earlier strlen
optimizations to make sure the expansion in builtins.c is fully
exercised.  By not pointing out my mistake the harness effectively
let me commit a change without making sure it's thoroughly tested
(I tested it manually before committing the patch but things could
regress without us noticing).  I'll look into fixing this somehow.



This test is failing on my range branch for -Og, because
expand_builtin_strnlen() needs range info:

+  wide_int min, max;
+  enum value_range_type rng = get_range_info (bound, &min, &max);
+  if (rng != VR_RANGE)
+return NULL_RTX;

but interestingly enough, it seems to be calculated in the sprintf
pass as part of the DOM walk:

  /* First record ranges generated by this statement.  */
  evrp_range_analyzer.record_ranges_from_stmt (stmt, false);

It feels wrong that the sprintf warning pass is generating range info
that you may later depend on at rtl expansion time (and for a totally
unrelated thing-- strlen expansion).


Any pass that records ranges for statements will have this
effect.  The sprintf pass seems to be the first one to make
use of this utility (and it's not just a warning pass but also
an optimization pass) but it would be a shame to put it off
limits to warning-only passes only because it happens to set
ranges.



I don't know if this is just a quirk of builtins.exp calling your test
with flags you didn't intend, but the inconsistency could cause
problems in the future.  Errr, or my present ;-).

Would it be too much to ask for you to either fix the flags being
passed down to the test, or better yet, find some non-sprintf
dependent way of calculating range info earlier?


At the time I wrote the test I didn't realize the statement
range info was being computed only in the sprintf pass --
I thought it was done as "a basic service for the greater
good" by VRP.  It seems that it should be such a service.

Let me look into tweaking the test.

Martin



Aldy
On Mon, Jun 18, 2018 at 6:35 PM Martin Sebor  wrote:


On 06/12/2018 03:11 PM, Jeff Law wrote:

On 06/05/2018 03:43 PM, Martin Sebor wrote:

The attached patch adds basic support for handling strnlen
as a built-in function.  It touches the strlen pass where
it folds constant results of the function, and builtins.c
to add simple support for expanding strnlen calls with known
results.  It also changes calls.c to detect excessive bounds
to the function and unsafe calls with arguments declared
attribute nonstring.

A side-effect of the strlen change I should call out is that
strlen() calls to all zero-length arrays that aren't considered
flexible array members (i.e., internal members or non-members)
are folded into zero.  No warning is issued for such invalid
uses of zero-length arrays but based on the responses to my
question Re: aliasing between internal zero-length-arrays and
other members(*) it sounds like one would be appropriate.
I will see about adding one in a separate patch.

Martin

[*] https://gcc.gnu.org/ml/gcc/2018-06/msg00046.html

gcc-81384.diff


PR tree-optimization/81384 - built-in form of strnlen missing

gcc/ChangeLog:

 PR tree-optimization/81384
 * builtin-types.def (BT_FN_SIZE_CONST_STRING_SIZE): New.
 * builtins.c (expand_builtin_strnlen): New function.
 (expand_builtin): Call it.
 (fold_builtin_n): Avoid setting TREE_NO_WARNING.
 * builtins.def (BUILT_IN_STRNLEN): New.
 * calls.c (maybe_warn_nonstring_arg): Handle BUILT_IN_STRNLEN.
 Warn for bounds in excess of maximum object size.
 * tree-ssa-strlen.c (maybe_set_strlen_range): Return tree representing
 single-value ranges.  Handle strnlen.
 (handle_builtin_strlen): Handle strnlen.
 (strlen_check_and_optimize_stmt): Same.

gcc/testsuite/ChangeLog:

 PR tree-optimization/81384
 * gcc.c-torture/execute/builtins/lib/strnlen.c: New test.
 * gcc.c-torture/execute/builtins/strnlen-lib.c: New test.
 * gcc.c-torture/execute/builtins/strnlen.c: New test.
 * gcc.dg/attr-nonstring-2.c: New test.
 * gcc.dg/attr-nonstring-3.c: New test.
 * gcc.dg/attr-nonstring-4.c: New test.
 * gcc.dg/strlenopt-44.c: New test.
 * gcc.dg/strlenopt.h (strnlen):  Declare.

diff --git a/gcc/builtin-types.def b/g

Re: [PATCH] fold strlen() of aggregate members (PR 77357)

2018-07-09 Thread Martin Sebor

On 07/09/2018 06:40 AM, Richard Biener wrote:

On Sun, Jul 8, 2018 at 4:56 AM Martin Sebor  wrote:


On 07/06/2018 09:52 AM, Richard Biener wrote:

On Fri, Jul 6, 2018 at 1:54 AM Martin Sebor  wrote:


GCC folds accesses to members of constant aggregates except
for character arrays/strings.  For example, the strlen() call
below is not folded:

   const char a[][4] = { "1", "12" };

   int f (void) { retturn strlen (a[1]); }

The attached change set enhances the string_constant() function
to make it possible to extract string constants from aggregate
initializers (CONSTRUCTORS).

The initial solution was much simpler but as is often the case,
MEM_REF made it fail to fold things like:

   int f (void) { retturn strlen (a[1] + 1); }

Handling those made the project a bit more interesting and
the final solution somewhat more involved.

To handle offsets into aggregate string members the patch also
extends the fold_ctor_reference() function to extract entire
string array initializers even if the offset points past
the beginning of the string and even though the size and
exact type of the reference are not known (there isn't enough
information in a MEM_REF to determine that).

Tested along with the patch for PR 86415 on x86_64-linux.


+  if (TREE_CODE (init) == CONSTRUCTOR)
+   {
+ tree type;
+ if (TREE_CODE (arg) == ARRAY_REF
+ || TREE_CODE (arg) == MEM_REF)
+   type = TREE_TYPE (arg);
+ else if (TREE_CODE (arg) == COMPONENT_REF)
+   {
+ tree field = TREE_OPERAND (arg, 1);
+ type = TREE_TYPE (field);
+   }
+ else
+   return NULL_TREE;

what's wrong with just

type = TREE_TYPE (field);


In response to your comment below abut size I simplified things
further so determining the type a priori is no longer necessary.


?

+ base_off *= BITS_PER_UNIT;

poly_uint64 isn't enough for "bits", with wide-int you'd use offset_int,
for poly you'd then use poly_offset?


Okay, I tried to avoid the overflow.  (Converting between all
these flavors of wide int types is a monumental PITA.)



You extend fold_ctor_reference to treat size == 0 specially but then
bother to compute a size here - that looks unneeded?


Yes, well spotted, thanks!  I simplified the code so this isn't
necessary, and neither is the type.



While the offset of the reference determines the first field in the
CONSTRUCTOR, how do you know the access doesn't touch
adjacent ones?  STRING_CSTs do not have to be '\0' terminated,
so consider

  char x[2][4] = { "abcd", "abcd" };

and MEM[&x] with a char[8] type?  memcpy "inlining" will create
such MEMs for example.


The code is only used to find string constants in initializer
expressions where I don't think the size of the access comes
into play.  If a memcpy() call results in a MEM_REF[char[8],
&x, 8] that's fine.  It's a valid reference and we can still
get the underlying character sequence (which is represented
as two STRING_CSTs with the two string literals).  I might
be missing the point of your question.


Maybe irrelevant for strlen folding depending on what you do
for missing '\0' termination.



@@ -6554,8 +6577,16 @@ fold_nonarray_ctor_reference (tree type, tree ctor,
   tree byte_offset = DECL_FIELD_OFFSET (cfield);
   tree field_offset = DECL_FIELD_BIT_OFFSET (cfield);
   tree field_size = DECL_SIZE (cfield);
-  offset_int bitoffset;
-  offset_int bitoffset_end, access_end;
+
+  if (!field_size && TREE_CODE (cval) == STRING_CST)
+   {
+ /* Determine the size of the flexible array member from
+the size of the string initializer provided for it.  */
+ unsigned HOST_WIDE_INT len = TREE_STRING_LENGTH (cval);
+ tree eltype = TREE_TYPE (TREE_TYPE (cval));
+ len *= tree_to_uhwi (TYPE_SIZE (eltype));
+ field_size = build_int_cst (size_type_node, len);
+   }

Why does this only apply to STRING_CST initializers and not CONSTRUCTORS,
say, for

struct S { int i; int a[]; } s = { 1, { 2, 3, 4, 5, 6 } };


I can't think of a use for it.  Do you have something in mind?


Well, you basically implemented a get-CONSTRUCTOR-elt-at-offset
which is useful in other parts of the compiler.  So I don't see why
it shouldn't work for general flex-arrays.



?  And why not use simply

  field_size = TYPE_SIZE (TREE_TYPE (cval));

like you do in c_strlen?


Yes, that's simpler, thanks.



Otherwise looks reasonable.


Attached is an updated patch.  I also enhanced the handling
of non-constant indices.  They were already handled before
to a smaller extent.  (There may be other opportunities
here.)


Please don't do functional changes to a patch in review, without
exactly pointing out the change.  It makes review inefficent for me.

Looks like it might be th

PING #2 [PATCH] specify large command line option arguments (PR 82063)

2018-07-09 Thread Martin Sebor

Ping #2: https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01509.html

On 07/03/2018 08:12 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01509.html

On 06/24/2018 03:05 PM, Martin Sebor wrote:

Storing integer command line option arguments in type int
limits options such as -Wlarger-than= or -Walloca-larger-than
to at most INT_MAX (see bug 71905).  Larger values wrap around
zero.  The value zero is considered to disable the option,
making it impossible to specify a zero limit.

To get around these limitations, the -Walloc-size-larger-than=
option accepts a string argument that it then parses itself
and interprets as HOST_WIDE_INT.  The option also accepts byte
size suffixes like KB, MB, GiB, etc. to make it convenient to
specify very large limits.

The int limitation is obviously less than ideal in a 64-bit
world.  The treatment of zero as a toggle is just a minor wart.
The special treatment to make it work for just a single option
makes option handling inconsistent.  It should be possible for
any option that takes an integer argument to use the same logic.

The attached patch enhances GCC option processing to do that.
It changes the storage type of option arguments from int to
HOST_WIDE_INT and extends the existing (although undocumented)
option property Host_Wide_Int to specify wide option arguments.
It also introduces the ByteSize property for options for which
specifying the byte-size suffix makes sense.

To make it possible to consider zero as a meaningful argument
value rather than a flag indicating that an option is disabled
the patch also adds a CLVC_SIZE enumerator to the cl_var_type
enumeration, and modifies how options of the kind are handled.

Warning options that take large byte-size arguments can be
disabled by specifying a value equal to or greater than
HOST_WIDE_INT_M1U.  For convenience, aliases in the form of
-Wno-xxx-larger-than have been provided for all the affected
options.

In the patch all the existing -larger-than options are set
to PTRDIFF_MAX.  This makes them effectively enabled, but
because the setting is exceedingly permissive, and because
some of the existing warnings are already set to the same
value and some other checks detect and reject such exceedingly
large values with errors, this change shouldn't noticeably
affect what constructs are diagnosed.

Although all the options are set to PTRDIFF_MAX, I think it
would make sense to consider setting some of them lower, say
to PTRDIFF_MAX / 2.  I'd like to propose that in a followup
patch.

To minimize observable changes the -Walloca-larger-than and
-Wvla-larger-than warnings required more extensive work to
make of the new mechanism because of the "unbounded" argument
handling (the warnings trigger for arguments that are not
visibly constrained), and because of the zero handling
(the warnings also trigger


Martin







Re: [PATCH] add support for strnlen (PR 81384)

2018-07-10 Thread Martin Sebor

On 07/10/2018 01:12 AM, Richard Biener wrote:

On Mon, Jul 9, 2018 at 11:26 PM Martin Sebor  wrote:


On 07/09/2018 08:36 AM, Aldy Hernandez wrote:

   { dg-do run }
   { do-options "-O2 -fno-tree-strlen" }  */

 I don't think this is doing anything.

If you look at the test run you can see that -fno-tree-strlen is never
passed (I think you actually mean -fno-optimize-strlen for that
matter).  Also, the builtins.exp harness runs your test for an
assortment of other flags, not just -O2.


I didn't know the harness ignores dg-options specified in these
tests.  That's surprising and feels like a bug in the harness
not to complain about it.  The purpose of the test is to verify
that the strnlen expansion in builtins.c does the right thing
and it deliberately tries to disable the earlier strlen
optimizations to make sure the expansion in builtins.c is fully
exercised.  By not pointing out my mistake the harness effectively
let me commit a change without making sure it's thoroughly tested
(I tested it manually before committing the patch but things could
regress without us noticing).  I'll look into fixing this somehow.



This test is failing on my range branch for -Og, because
expand_builtin_strnlen() needs range info:

+  wide_int min, max;
+  enum value_range_type rng = get_range_info (bound, &min, &max);
+  if (rng != VR_RANGE)
+return NULL_RTX;

but interestingly enough, it seems to be calculated in the sprintf
pass as part of the DOM walk:

  /* First record ranges generated by this statement.  */
  evrp_range_analyzer.record_ranges_from_stmt (stmt, false);

It feels wrong that the sprintf warning pass is generating range info
that you may later depend on at rtl expansion time (and for a totally
unrelated thing-- strlen expansion).


Any pass that records ranges for statements will have this
effect.  The sprintf pass seems to be the first one to make
use of this utility (and it's not just a warning pass but also
an optimization pass) but it would be a shame to put it off
limits to warning-only passes only because it happens to set
ranges.


As you noted elsewhere warning options shouldn't change code-generation.
This means that ranges may not be set to the IL in case they are only
computed when a warning option is enabled.


I agree.  That's also why I think it should be a basic service
rather than a side-effect of tree traversal, either one done
to implement a particular optimization, or one done by a warning.

The main reason for the work Jeff put in to extracting it out
of EVRP, if I recall correctly, was to expose more accurate
range information to warning passes.  Would setting statement
ranges make sense as part of EVRP (or some other similar pass)?
If not, the only way to conform to the policy that I can think
of is to have warning-only  passes that need it make
the traversal and call record_ranges regardless of whether or
not the warning itself is enabled.  That would seem needlessly
inefficient, even if the code implementing the warning itself
were disabled.

Martin


Re: [PATCH] add support for strnlen (PR 81384)

2018-07-10 Thread Martin Sebor

On 07/10/2018 08:34 AM, Jeff Law wrote:

On 07/10/2018 08:25 AM, Richard Biener wrote:

On Tue, Jul 10, 2018 at 4:10 PM Martin Sebor  wrote:


On 07/10/2018 01:12 AM, Richard Biener wrote:

On Mon, Jul 9, 2018 at 11:26 PM Martin Sebor  wrote:


On 07/09/2018 08:36 AM, Aldy Hernandez wrote:

   { dg-do run }
   { do-options "-O2 -fno-tree-strlen" }  */

 I don't think this is doing anything.

If you look at the test run you can see that -fno-tree-strlen is never
passed (I think you actually mean -fno-optimize-strlen for that
matter).  Also, the builtins.exp harness runs your test for an
assortment of other flags, not just -O2.


I didn't know the harness ignores dg-options specified in these
tests.  That's surprising and feels like a bug in the harness
not to complain about it.  The purpose of the test is to verify
that the strnlen expansion in builtins.c does the right thing
and it deliberately tries to disable the earlier strlen
optimizations to make sure the expansion in builtins.c is fully
exercised.  By not pointing out my mistake the harness effectively
let me commit a change without making sure it's thoroughly tested
(I tested it manually before committing the patch but things could
regress without us noticing).  I'll look into fixing this somehow.



This test is failing on my range branch for -Og, because
expand_builtin_strnlen() needs range info:

+  wide_int min, max;
+  enum value_range_type rng = get_range_info (bound, &min, &max);
+  if (rng != VR_RANGE)
+return NULL_RTX;

but interestingly enough, it seems to be calculated in the sprintf
pass as part of the DOM walk:

  /* First record ranges generated by this statement.  */
  evrp_range_analyzer.record_ranges_from_stmt (stmt, false);

It feels wrong that the sprintf warning pass is generating range info
that you may later depend on at rtl expansion time (and for a totally
unrelated thing-- strlen expansion).


Any pass that records ranges for statements will have this
effect.  The sprintf pass seems to be the first one to make
use of this utility (and it's not just a warning pass but also
an optimization pass) but it would be a shame to put it off
limits to warning-only passes only because it happens to set
ranges.


As you noted elsewhere warning options shouldn't change code-generation.
This means that ranges may not be set to the IL in case they are only
computed when a warning option is enabled.


I agree.  That's also why I think it should be a basic service
rather than a side-effect of tree traversal, either one done
to implement a particular optimization, or one done by a warning.

The main reason for the work Jeff put in to extracting it out
of EVRP, if I recall correctly, was to expose more accurate
range information to warning passes.  Would setting statement
ranges make sense as part of EVRP (or some other similar pass)?
If not, the only way to conform to the policy that I can think
of is to have warning-only  passes that need it make
the traversal and call record_ranges regardless of whether or
not the warning itself is enabled.  That would seem needlessly
inefficient, even if the code implementing the warning itself
were disabled.


Well, simply not set range-info on SSA names but use the
lattice values?  Should be easy to key that to a flag.

Right.  That was essentially my suggestion.  For a client that uses EVRP
analysis to drive warnings, they do not mirror results into the global
range info we attach to SSA_NAMEs.  An optimization pass which utilizes
EVRP can (of course) set the global range info.

THe sprintf bits are a bit tricky as it's a mix of warning and
optimization, but I think there's enough separation that we can arrange
to do the right thing.

Since I introduced EVRP into the sprintf bits, I'm happy to own fixing
this up.  I just wanted to get general agreement on the approach.


I'm not sure I understand what about the sprintf pass needs
changing since that the warning is independent of the optimization.
The gate function makes the intent clear:

pass_sprintf_length::gate (function *)
{
  /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
 is specified and either not optimizing and the pass is being invoked
 early, or when optimizing and the pass is being invoked during
 optimization (i.e., "late").  */
  return ((warn_format_overflow > 0
   || warn_format_trunc > 0
   || flag_printf_return_value)
  && (optimize > 0) == fold_return_value);
}

The only other use of the warn_format_overflow and
warn_format_trunc variables is to set the warn_level variable,
and that one is only used to affect the LIKELY counter which
is used for warnings only but not for optimization.

Am I missing something?

Martin


Re: [PATCH][Middle-end]3rd patch of PR78809

2018-07-10 Thread Martin Sebor

On 07/10/2018 09:14 AM, Qing Zhao wrote:



On Jul 9, 2018, at 3:25 PM, Martin Sebor  wrote:

check_access() calls warning_at() to issue warnings, and that
function only issues warnings if they are enabled, so the guard
isn't necessary to make it work this way.


Okay I see.

then, in the current code: (for routine expand_builtin_memcmp)

  /* Diagnose calls where the specified length exceeds the size of either
 object.  */
  if (warn_stringop_overflow)
{
  tree size = compute_objsize (arg1, 0);
  if (check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len,
/*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE))
{
  size = compute_objsize (arg2, 0);
  check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len,
/*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE);
}
}

Is the above condition on variable warn_stringop_overflow unnecessary?
all the warnings inside check_access are controlled by OPT_Wstringop_overflow_.

can I safely delete the above condition if (warn_stringop_overflow)?


I think the check above is only there to avoid the overhead
of the two calls to compute_objsize and check_access.  There
are a few more like it in other functions in the file and
they all should be safe to remove, but also safe to keep.
(Some of them might make it easy to inadvertently introduce
a dependency between the warning option and an optimization
so that's something to consider.)


Beyond that, an enhancement to this optimization that might
be worth considering is inlining even non-constant calls
with array arguments whose size is no greater than the limit.
As in:

extern char a[4], *b;

int n = strcmp (a, b);

Because strcmp arguments are required to be nul-terminated
strings, a's length above must be at most 3.  This is analogous
to similar optimizations GCC performs, such as folding to zero
calls to strlen() with one-element arrays.


Yes, I agree that this will be another good enhancement to the strcmp inlining.

however, it’s not easy to be integrated with my current patch.  The major issue 
is:

 The inlined code for the strcmp call without string constant will be 
different than the inlined code for the
strcmp call with string constant,  then:

1. the default value for the threshold that control the maximum length 
of the string length for inlining will
be different than the one for the strcmp call with string constant,  more 
experiments need to be run and a new parameter
need to be added to control this;
2. the inlined transformed code will be different than the current one.

based on the above, I’d like to open a new PR to record this new enhancement 
and finish it with a new patch later.

what’s your opinion on this?


I'm not sure I see the issues above as problems and I would expect
the non-constant optimization to naturally handle the constant case
as well.  But if you prefer it that way, implementing the non-constant
optimization in a separate step sounds reasonable to me.  It's your
call.


the inlined code for call to strcmp with constant string will only have one 
load instruction for each byte, but for call to strcmp
without constant string, there will be  two load instructions for each byte.  
So, the run time performance impact will be different.
we need separate default values of the maximum length of the string to enable 
the transformation.


You're right, that's true for builtins.c where all we have to
work with is arrays with unknown contents and string literals.
The strlen pass, on the other hand, has access to the lengths
of even unknown strings.  That suggests that an even better
place for the optimization might be the strlen pass where
the folding could happen earlier and at a higher level, which
might even obviate having to worry about the constant vs non-
constant handling.


I will create a PR on this and add a new patch after this one.


Sure, one step at a time makes sense.  I don't think there is
any harm in having the optimization in two places: builtins.c
and strlen.

Martin



thanks.

Qing





Re: [PATCH] fold strlen() of aggregate members (PR 77357)

2018-07-11 Thread Martin Sebor

On 07/11/2018 07:50 AM, Andre Vieira (lists) wrote:

On 11/07/18 11:00, Andre Vieira (lists) wrote:

On 09/07/18 22:44, Martin Sebor wrote:

On 07/09/2018 06:40 AM, Richard Biener wrote:

On Sun, Jul 8, 2018 at 4:56 AM Martin Sebor  wrote:


On 07/06/2018 09:52 AM, Richard Biener wrote:

On Fri, Jul 6, 2018 at 1:54 AM Martin Sebor  wrote:


GCC folds accesses to members of constant aggregates except
for character arrays/strings.  For example, the strlen() call
below is not folded:

   const char a[][4] = { "1", "12" };

   int f (void) { retturn strlen (a[1]); }

The attached change set enhances the string_constant() function
to make it possible to extract string constants from aggregate
initializers (CONSTRUCTORS).

The initial solution was much simpler but as is often the case,
MEM_REF made it fail to fold things like:

   int f (void) { retturn strlen (a[1] + 1); }

Handling those made the project a bit more interesting and
the final solution somewhat more involved.

To handle offsets into aggregate string members the patch also
extends the fold_ctor_reference() function to extract entire
string array initializers even if the offset points past
the beginning of the string and even though the size and
exact type of the reference are not known (there isn't enough
information in a MEM_REF to determine that).

Tested along with the patch for PR 86415 on x86_64-linux.


+  if (TREE_CODE (init) == CONSTRUCTOR)
+   {
+ tree type;
+ if (TREE_CODE (arg) == ARRAY_REF
+ || TREE_CODE (arg) == MEM_REF)
+   type = TREE_TYPE (arg);
+ else if (TREE_CODE (arg) == COMPONENT_REF)
+   {
+ tree field = TREE_OPERAND (arg, 1);
+ type = TREE_TYPE (field);
+   }
+ else
+   return NULL_TREE;

what's wrong with just

type = TREE_TYPE (field);


In response to your comment below abut size I simplified things
further so determining the type a priori is no longer necessary.


?

+ base_off *= BITS_PER_UNIT;

poly_uint64 isn't enough for "bits", with wide-int you'd use
offset_int,
for poly you'd then use poly_offset?


Okay, I tried to avoid the overflow.  (Converting between all
these flavors of wide int types is a monumental PITA.)



You extend fold_ctor_reference to treat size == 0 specially but then
bother to compute a size here - that looks unneeded?


Yes, well spotted, thanks!  I simplified the code so this isn't
necessary, and neither is the type.



While the offset of the reference determines the first field in the
CONSTRUCTOR, how do you know the access doesn't touch
adjacent ones?  STRING_CSTs do not have to be '\0' terminated,
so consider

  char x[2][4] = { "abcd", "abcd" };

and MEM[&x] with a char[8] type?  memcpy "inlining" will create
such MEMs for example.


The code is only used to find string constants in initializer
expressions where I don't think the size of the access comes
into play.  If a memcpy() call results in a MEM_REF[char[8],
&x, 8] that's fine.  It's a valid reference and we can still
get the underlying character sequence (which is represented
as two STRING_CSTs with the two string literals).  I might
be missing the point of your question.


Maybe irrelevant for strlen folding depending on what you do
for missing '\0' termination.



@@ -6554,8 +6577,16 @@ fold_nonarray_ctor_reference (tree type, tree
ctor,
   tree byte_offset = DECL_FIELD_OFFSET (cfield);
   tree field_offset = DECL_FIELD_BIT_OFFSET (cfield);
   tree field_size = DECL_SIZE (cfield);
-  offset_int bitoffset;
-  offset_int bitoffset_end, access_end;
+
+  if (!field_size && TREE_CODE (cval) == STRING_CST)
+   {
+ /* Determine the size of the flexible array member from
+the size of the string initializer provided for it.  */
+ unsigned HOST_WIDE_INT len = TREE_STRING_LENGTH (cval);
+ tree eltype = TREE_TYPE (TREE_TYPE (cval));
+ len *= tree_to_uhwi (TYPE_SIZE (eltype));
+ field_size = build_int_cst (size_type_node, len);
+   }

Why does this only apply to STRING_CST initializers and not
CONSTRUCTORS,
say, for

struct S { int i; int a[]; } s = { 1, { 2, 3, 4, 5, 6 } };


I can't think of a use for it.  Do you have something in mind?


Well, you basically implemented a get-CONSTRUCTOR-elt-at-offset
which is useful in other parts of the compiler.  So I don't see why
it shouldn't work for general flex-arrays.



?  And why not use simply

  field_size = TYPE_SIZE (TREE_TYPE (cval));

like you do in c_strlen?


Yes, that's simpler, thanks.



Otherwise looks reasonable.


Attached is an updated patch.  I also enhanced the handling
of non-constant indices.  They were already handled before
to a smaller extent.  (There may be other opportunities
here.)


Please don't do fu

[PATCH] reject conflicting attributes before calling handlers (PR 86453)

2018-07-11 Thread Martin Sebor

The attached change set adjusts the attribute exclusion code
to detect and reject incompatible attributes before attribute
handlers are called to have a chance to make changes despite
the exclusions.  The handlers are not run when a conflict is
found.

Tested on x86_64-linux.  I expected the fallout to be bigger
but only a handful of tests needed adjusting and the changes
all look like clear improvements.  I.e., conflicting attributes
that diagnosed as being ignored really are being ignored as one
would expect.

Martin
PR c/86453 - error: type variant differs by TYPE_PACKED in free_lang_data since r255469

gcc/ChangeLog:

	PR c/86453
	* attribs.c (decl_attributes): Reject conflicting attributes before
	calling attribute handlers.

gcc/testsuite/ChangeLog:

	PR c/86453
	* c-c++-common/Wattributes.c: Adjust.
	* gcc.dg/Wattributes-10.c: New test.
	* g++.dg/Wattributes-3.C: Adjust.
	* g++.dg/lto/pr86453_0.C: New test.
	* gcc.dg/Wattributes-6.c: Adjust.
	* gcc.dg/pr18079.c: Adjust.
	* gcc.dg/torture/pr42363.c: Adjust.

Index: gcc/attribs.c
===
--- gcc/attribs.c	(revision 262542)
+++ gcc/attribs.c	(working copy)
@@ -672,6 +672,35 @@ decl_attributes (tree *node, tree attributes, int
 
   bool no_add_attrs = false;
 
+  /* Check for exclusions with other attributes on the current
+	 declation as well as the last declaration of the same
+	 symbol already processed (if one exists).  Detect and
+	 reject incompatible attributes.  */
+  bool built_in = flags & ATTR_FLAG_BUILT_IN;
+  if (spec->exclude
+	  && (flag_checking || !built_in))
+	{
+	  /* Always check attributes on user-defined functions.
+	 Check them on built-ins only when -fchecking is set.
+	 Ignore __builtin_unreachable -- it's both const and
+	 noreturn.  */
+
+	  if (!built_in
+	  || !DECL_P (*anode)
+	  || (DECL_FUNCTION_CODE (*anode) != BUILT_IN_UNREACHABLE
+		  && (DECL_FUNCTION_CODE (*anode)
+		  != BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE)))
+	{
+	  bool no_add = diag_attr_exclusions (last_decl, *anode, name, spec);
+	  if (!no_add && anode != node)
+		no_add = diag_attr_exclusions (last_decl, *node, name, spec);
+	  no_add_attrs |= no_add;
+	}
+	}
+
+  if (no_add_attrs)
+	continue;
+
   if (spec->handler != NULL)
 	{
 	  int cxx11_flag =
@@ -695,33 +724,6 @@ decl_attributes (tree *node, tree attributes, int
 	returned_attrs = chainon (ret, returned_attrs);
 	}
 
-  /* If the attribute was successfully handled on its own and is
-	 about to be added check for exclusions with other attributes
-	 on the current declation as well as the last declaration of
-	 the same symbol already processed (if one exists).  */
-  bool built_in = flags & ATTR_FLAG_BUILT_IN;
-  if (spec->exclude
-	  && !no_add_attrs
-	  && (flag_checking || !built_in))
-	{
-	  /* Always check attributes on user-defined functions.
-	 Check them on built-ins only when -fchecking is set.
-	 Ignore __builtin_unreachable -- it's both const and
-	 noreturn.  */
-
-	  if (!built_in
-	  || !DECL_P (*anode)
-	  || (DECL_FUNCTION_CODE (*anode) != BUILT_IN_UNREACHABLE
-		  && (DECL_FUNCTION_CODE (*anode)
-		  != BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE)))
-	{
-	  bool no_add = diag_attr_exclusions (last_decl, *anode, name, spec);
-	  if (!no_add && anode != node)
-		no_add = diag_attr_exclusions (last_decl, *node, name, spec);
-	  no_add_attrs |= no_add;
-	}
-	}
-
   /* Layout the decl in case anything changed.  */
   if (spec->type_required && DECL_P (*node)
 	  && (VAR_P (*node)
Index: gcc/testsuite/c-c++-common/Wattributes.c
===
--- gcc/testsuite/c-c++-common/Wattributes.c	(revision 262542)
+++ gcc/testsuite/c-c++-common/Wattributes.c	(working copy)
@@ -39,13 +39,13 @@ PackedPacked { int i; };
aligned and packed on a function declaration.  */
 
 void ATTR ((aligned (8), packed))
-faligned8_1 (void);   /* { dg-warning ".packed. attribute ignored" } */
+faligned8_1 (void);   /* { dg-warning "ignoring attribute .packed. because it conflicts with attribute .aligned." } */
 
 void ATTR ((aligned (8)))
-faligned8_2 (void);   /* { dg-message "previous declaration here" "" { xfail *-*-* } } */
+faligned8_2 (void);   /* { dg-message "previous declaration here" } */
 
 void ATTR ((packed))
-faligned8_2 (void);   /* { dg-warning ".packed. attribute ignored" } */
+faligned8_2 (void);   /* { dg-warning "ignoring attribute .packed. because it conflicts with attribute .aligned." } */
 
 /* Exercise the handling of the mutually exclusive attributes
always_inline and noinline (in that order).  */
Index: gcc/testsuite/g++.dg/Wattributes-3.C
===
--- gcc/testsuite/g++.dg/Wattributes-3.C	(revision 262542)
+++ gcc/testsuite/g++.dg/Wa

Re: [PATCH] reject conflicting attributes before calling handlers (PR 86453)

2018-07-13 Thread Martin Sebor

On 07/13/2018 02:53 AM, Christophe Lyon wrote:

Hi,

On Thu, 12 Jul 2018 at 00:04, Martin Sebor  wrote:


The attached change set adjusts the attribute exclusion code
to detect and reject incompatible attributes before attribute
handlers are called to have a chance to make changes despite
the exclusions.  The handlers are not run when a conflict is
found.

Tested on x86_64-linux.  I expected the fallout to be bigger
but only a handful of tests needed adjusting and the changes
all look like clear improvements.  I.e., conflicting attributes
that diagnosed as being ignored really are being ignored as one
would expect.



Since you committed this patch (r262596), I've noticed regressions on
aarch64/arm:
g++.dg/warn/pr86453.C  -std=c++11  (test for warnings, line 4)
g++.dg/warn/pr86453.C  -std=c++11 (test for excess errors)
g++.dg/warn/pr86453.C  -std=c++14  (test for warnings, line 4)
g++.dg/warn/pr86453.C  -std=c++14 (test for excess errors)
g++.dg/warn/pr86453.C  -std=c++98  (test for warnings, line 4)
g++.dg/warn/pr86453.C  -std=c++98 (test for excess errors)

The log says:
Excess errors:
/gcc/testsuite/g++.dg/warn/pr86453.C:4:44: warning: ignoring attribute
'packed' because it conflicts with attribute 'aligned' [-Wattributes]

Isn't there the same message on x86_64?


There was.  The test above was added between the time I tested
my patch and the time I committed it.  I adjusted it yesterday
via r262609 so the failure should be gone.

Martin


Re: [PATCH] restore -Warray-bounds for string literals (PR 83776)

2018-07-13 Thread Martin Sebor

+/* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds
+   references to string constants.  If VRP can determine that the array
+   subscript is a constant, check if it is outside valid range.
+   If the array subscript is a RANGE, warn if it is non-overlapping
+   with valid range.
+   IGNORE_OFF_BY_ONE is true if the MEM_REF is inside an ADDR_EXPR.  */

This function doesn't have IGNORE_OFF_BY_ONE as a parameter.  Drop it
from the comment.


In the latest update (yet to be posted) the function uses it.


+  /* Determine the offsets and increment OFFRANGE for the bounds of each.  */
+  while (TREE_CODE (arg) == SSA_NAME)
+{
+  gimple *def = SSA_NAME_DEF_STMT (arg);
+  if (!is_gimple_assign (def))
+   {
+ if (tree var = SSA_NAME_VAR (arg))
+   arg = var;
+ break;
+   }

What's the point of looking at the underlying SSA_NAME_VAR here? I can't
see how that's ever helpful.  You'll always exit the loop at this point
which does something like

if (TREE_CODE (arg) == ADDR_EXPR)
  {
 do something interesting
  }
else
  return;

ISTM that any time you dig into SSA_NAME_VAR (arg) what you're going to
get back is some kind of _DECL node -- I'm not aware of a case where
you're going to get back an ADDR_EXPR.


I have removed the code.  It was a vestige of something that
didn't work out and I didn't notice.


+
+  tree_code code = gimple_assign_rhs_code (def);
+  if (code == POINTER_PLUS_EXPR)
+   {
+ arg = gimple_assign_rhs1 (def);
+ varoff = gimple_assign_rhs2 (def);
+   }
+  else if (code == ASSERT_EXPR)
+   {
+ arg = TREE_OPERAND (gimple_assign_rhs1 (def), 0);
+ continue;
+   }
+  else
+   return;
+
+  if (TREE_CODE (varoff) != SSA_NAME)
+   break;
+
+  vr = get_value_range (varoff);
+  if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+   break;

Doesn't this let VR_ANTI_RANGE through?  Why not instead

  if (!vr || vr->type != VR_RANGE || !vr->min || !vr->max)

?  I'm having trouble convincing myself the subsequent code will DTRT
for an anti-range.


The anti-range code adds PTRDIFF_MIN and PTRDIFF_MAX to
the offset (that's what ARRBOUNDS is initially set to, until
we have found the array that's being dereferenced).

Because of the loop bailing for an anti-range could be too
early (the subsequent iterations might compensate for
the conservative anti-range handling and find a bug in
offsets added later).  It's unlikely but so are all bugs
so I try to handle even corner cases.


+
+  if (TREE_CODE (vr->min) != INTEGER_CST
+  || TREE_CODE (vr->max) != INTEGER_CST)
+break;
+
+  offset_int ofr[] = {
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
+  };
+
+  if (vr->type == VR_RANGE)
+   {
+ if (ofr[0] < ofr[1])
+   {
+ offrange[0] += ofr[0];
+ offrange[1] += ofr[1];
+   }
+ else
+   {
+ offrange[0] += strbounds[0];
+ offrange[1] += strbounds[1];
+   }

When can the ELSE clause above happen for a VR_RANGE?


For a maximum in excess of PTRDIFF_MAX and a non-negative
minimum that's less than that.


+  /* At level 2 check also intermediate offsets.  */
+  int i = 0;
+  if (extrema[i] < -strbounds[1]
+  || extrema[i = 1] > strbounds[1] + eltsize)
+{
+  HOST_WIDE_INT tmpidx = extrema[i].to_shwi () / eltsize.to_shwi ();
+
+  warning_at (location, OPT_Warray_bounds,
+ "intermediate array offset %wi is outside array bounds "
+ "of %qT",
+ tmpidx,  strtype);
+  TREE_NO_WARNING (ref) = 1;
+}
+}

This seems ill-advised.  All that matters is the actual index used in
the dereference.  Intermediate values (ie, address computations) along
the way are uninteresting -- we may form an address out of the bounds of
the array as an intermediate computation.  But the actual memory
reference will be within the range.


The idea is to help detect bugs that cannot be detected otherwise.
Take the example below:

  void g (int i)
  {
if (i < 1 || 2 < i)
  i = 1;

const char *p1 = "12" + i;
const char *p2 = p1 + i;

extern int x, y;

x = p2[-4];   // #1: only valid if p2 is out of bounds
y = p2[0];// #2 therefore this must be out of bounds
  }

For the dereference at #1 to be valid i must be 2 (i.e.,
the upper bound of its range) and so p2 is therefore out
of bounds.

We may be able to compensate for it and compute the right
address at #1 but if the out-of-bounds value is used in
another dereference that makes a different assumption
(such as #2) one of the two is definitely wrong.  It might
be possible to do something smarter and determine if the
pointer really is used this way but I didn't want to
complicate the things too much so I put the logic under
level 2.

I will

Re: [PATCH] restore -Warray-bounds for string literals (PR 83776)

2018-07-13 Thread Martin Sebor

On 05/02/2018 01:22 AM, Richard Biener wrote:

On Fri, Jan 26, 2018 at 3:16 AM, Martin Sebor  wrote:

PR tree-optimization/83776 - [6/7/8 Regression] missing
-Warray-bounds indexing past the end of a string literal,
identified a not-so-recent improvement to constant propagation
as the reason for GCC no longer being able to detect out-of-
bounds accesses to string literals.  The root cause is that
the change caused accesses to strings to be transformed into
MEM_REFs that the -Warray-bounds checker isn't prepared to
handle.  A simple example is:

  int h (void)
  {
const char *p = "1234";
return p[16];   // missing -Warray-bounds
  }

To fix the regression the attached patch extends the array bounds
checker to handle the small subset of MEM_REF expressions that
refer to string literals but stops of short of doing more than
that.  There are outstanding gaps in the detection that the patch
intentionally doesn't handle.  They are either caused by other
regressions (PR 84047) or by other latent bugs/limitations, or
by limitations in the approach I took to try to keep the patch
simple.  I hope to address some of those in a follow-up patch
for GCC 9.


+  gimple *def = SSA_NAME_DEF_STMT (arg);
+  if (!is_gimple_assign (def))
+   {
+ if (tree var = SSA_NAME_VAR (arg))
+   arg = var;

this is never correct.  Whether sth has an underlying VAR_DECL or not
is irrelevant.  It looks like you'll always take the

+  else
+return;

path then anyways.  So why obfuscate the code this way?


I have removed the code.  It was a vestige of something that
didn't pan out and I didn't notice.



+  offset_int ofr[] = {
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
+  };

huh.  Do you maybe want to use widest_int for ofr[]?  What's
wrong with wi::to_offset (vr->min)?  Building another intermediate
tree node here looks just bogus.


I need to convert size_type indices to signed to keep their sign
if it's negative and include it in warnings.  I've moved the code
into a conditional where it's used to minimize the cost but other
than that I don't know how else to convert it.



What are you trying to do in this loop anyways?


The loop It determines the range of the final index/offset for
a series of POINTER_PLUS_EXPRs.  It handles cases like this:

  int g (int i, int j, int k)
  {
if (i < 1) i = 1;
if (j < 1) j = 1;
if (k < 1) k = 1;

const char *p0 = "123";
const char *p1 = p0 + i;
const char *p2 = p1 + j;
const char *p3 = p2 + k;

// p2[3] and p3[1] are out of bounds
return p0[4] + p1[3] + p2[2] + p3[1];
  }


I suppose
look at

  p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
  ... = MEM[p_1 + CST];

?  But then

+  if (TREE_CODE (varoff) != SSA_NAME)
+   break;

you should at least handle INTEGER_CSTs here?


It's already handled (and set in CSTOFF).  There should be no
more constant offsets after that (I haven't come across any.)



+  if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+   break;

please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
the anti-range handling looks odd.  Please add comments so we can follow
what you were thinking when writing range merging code.  Even better if you
can stick to use existing code rather than always re-inventing the wheel...


The anti-range handling is to conservatively add
[-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
to make it clear.  I'd be more than happy to reuse existing
code if I knew where to find it (if it exists).  It sure would
save me lots of work to have a library of utility functions to
call instead of rolling my own code each time.



I think I commented on earlier variants but this doesn't seem to resemble
any of them.


I've reworked the patch (sorry) to also handle arrays.  For GCC
9 it seems I might as well do both in one go.

Attached is an updated patch with these changes.

Martin
PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds index into an array
PR tree-optimization/83776 - missing -Warray-bounds indexing past the end of a string literal

gcc/ChangeLog:

	PR tree-optimization/84047
	PR tree-optimization/83776
	* tree-vrp.c (vrp_prop::check_mem_ref): New function.
	(check_array_bounds): Call it.
	* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
	array offsets.

gcc/testsuite/ChangeLog:

	PR tree-optimization/83776
	PR tree-optimization/84047
	* gcc.dg/Warray-bounds-29.c: New test.
	* gcc.dg/Warray-bounds-30.c: New test.
	* gcc.dg/Warray-bounds-31.c: New test.
	* gcc.dg/Warray-bounds-32.c: New test.

diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-29.c b/gcc/testsuite/gcc.dg/Warray-bounds-29.c
new file mode 100644
index 000..72c5d1c
-

Re: [PATCH] Fix middle-end/86528

2018-07-16 Thread Martin Sebor

On 07/16/2018 09:36 AM, Bernd Edlinger wrote:

Hi,

this fixes PR middle-end/86528.


Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?


Thanks -- the string_constant change also fixes bug 86532.

There is another problem in the subsequent handling of
the CONSTRUCTOR -- it ignores the non-constant VARIDX.  Fixing
that is easy but unfortunately prevents many strlen calls that
could be folded from folding (and makes the newly added tests
fail). For instance:

  static const char a[2][3] = { "1", "12" };
  if (strlen (&a[1][i]) > 2)
abort ();

That needs some other changes to handle.  Let me take care of
that.

Martin


PING #3 [PATCH] specify large command line option arguments (PR 82063)

2018-07-16 Thread Martin Sebor

Ping #3: https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01509.html

On 07/09/2018 09:13 PM, Martin Sebor wrote:

Ping #2: https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01509.html

On 07/03/2018 08:12 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-06/msg01509.html

On 06/24/2018 03:05 PM, Martin Sebor wrote:

Storing integer command line option arguments in type int
limits options such as -Wlarger-than= or -Walloca-larger-than
to at most INT_MAX (see bug 71905).  Larger values wrap around
zero.  The value zero is considered to disable the option,
making it impossible to specify a zero limit.

To get around these limitations, the -Walloc-size-larger-than=
option accepts a string argument that it then parses itself
and interprets as HOST_WIDE_INT.  The option also accepts byte
size suffixes like KB, MB, GiB, etc. to make it convenient to
specify very large limits.

The int limitation is obviously less than ideal in a 64-bit
world.  The treatment of zero as a toggle is just a minor wart.
The special treatment to make it work for just a single option
makes option handling inconsistent.  It should be possible for
any option that takes an integer argument to use the same logic.

The attached patch enhances GCC option processing to do that.
It changes the storage type of option arguments from int to
HOST_WIDE_INT and extends the existing (although undocumented)
option property Host_Wide_Int to specify wide option arguments.
It also introduces the ByteSize property for options for which
specifying the byte-size suffix makes sense.

To make it possible to consider zero as a meaningful argument
value rather than a flag indicating that an option is disabled
the patch also adds a CLVC_SIZE enumerator to the cl_var_type
enumeration, and modifies how options of the kind are handled.

Warning options that take large byte-size arguments can be
disabled by specifying a value equal to or greater than
HOST_WIDE_INT_M1U.  For convenience, aliases in the form of
-Wno-xxx-larger-than have been provided for all the affected
options.

In the patch all the existing -larger-than options are set
to PTRDIFF_MAX.  This makes them effectively enabled, but
because the setting is exceedingly permissive, and because
some of the existing warnings are already set to the same
value and some other checks detect and reject such exceedingly
large values with errors, this change shouldn't noticeably
affect what constructs are diagnosed.

Although all the options are set to PTRDIFF_MAX, I think it
would make sense to consider setting some of them lower, say
to PTRDIFF_MAX / 2.  I'd like to propose that in a followup
patch.

To minimize observable changes the -Walloca-larger-than and
-Wvla-larger-than warnings required more extensive work to
make of the new mechanism because of the "unbounded" argument
handling (the warnings trigger for arguments that are not
visibly constrained), and because of the zero handling
(the warnings also trigger


Martin









[PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-17 Thread Martin Sebor

My enhancement to extract constant strings out of complex
aggregates committed last week introduced a couple of bugs in
dealing with non-constant indices and offsets.  One of the bugs
was fixed earlier today (PR 86528) but another one remains.  It
causes strlen (among other functions) to incorrectly fold
expressions involving a non-constant index into an array of
strings by treating the index the same as a non-consatnt
offset into it.

The non-constant index should either prevent the folding, or it
needs to handle it differently from an offset.

The attached patch takes the conservative approach of avoiding
the folding in this case.  The remaining changes deal with
the fallout from the fix.

Tested on x86_64-linux.

Martin

PR tree-optimization/86532 - Wrong code due to a wrong strlen folding starting with r262522

gcc/ChangeLog:

	PR tree-optimization/86532
	* builtins.c (c_strlen): Correct handling of non-constant offsets.	
	(check_access): Be prepared for non-constant length ranges.
	* expr.c (string_constant): Only handle the minor non-constant
	array index.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86532
	* gcc.c-torture/execute/strlen-2.c: New test.

Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 262764)
+++ gcc/builtins.c	(working copy)
@@ -517,7 +517,7 @@ get_pointer_alignment (tree exp)
   return align;
 }
 
-/* Return the number of non-zero elements in the sequence
+/* Return the number of leading non-zero elements in the sequence
[ PTR, PTR + MAXELTS ) where each element's size is ELTSIZE bytes.
ELTSIZE must be a power of 2 less than 8.  Used by c_strlen.  */
 
@@ -605,15 +605,22 @@ c_strlen (tree src, int only_value)
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
  length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
- in case the latter is less than the size of the array.  */
-  HOST_WIDE_INT maxelts = TREE_STRING_LENGTH (src);
+ in case the latter is less than the size of the array, such as when
+ SRC refers to a short string literal used to initialize a large array.
+ In that case, the elements of the array after the terminating NUL are
+ all NUL.  */
+  HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
+  strelts = strelts / eltsize - 1;
+
+  HOST_WIDE_INT maxelts = strelts;
   tree type = TREE_TYPE (src);
   if (tree size = TYPE_SIZE_UNIT (type))
 if (tree_fits_shwi_p (size))
-  maxelts = tree_to_uhwi (size);
+  {
+	maxelts = tree_to_uhwi (size);
+	maxelts = maxelts / eltsize - 1;
+  }
 
-  maxelts = maxelts / eltsize - 1;
-
   /* PTR can point to the byte representation of any string type, including
  char* and wchar_t*.  */
   const char *ptr = TREE_STRING_POINTER (src);
@@ -620,10 +627,12 @@ c_strlen (tree src, int only_value)
 
   if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
 {
-  /* If the string has an internal zero byte (e.g., "foo\0bar"), we can't
-	 compute the offset to the following null if we don't know where to
+  /* If the string has an internal NUL character followed by any
+	 non-NUL characters (e.g., "foo\0bar"), we can't compute
+	 the offset to the following NUL if we don't know where to
 	 start searching for it.  */
-  if (string_length (ptr, eltsize, maxelts) < maxelts)
+  unsigned len = string_length (ptr, eltsize, strelts);
+  if (len < strelts)
 	{
 	  /* Return when an embedded null character is found.  */
 	  return NULL_TREE;
@@ -633,12 +642,18 @@ c_strlen (tree src, int only_value)
 	return ssize_int (0);
 
   /* We don't know the starting offset, but we do know that the string
-	 has no internal zero bytes.  We can assume that the offset falls
-	 within the bounds of the string; otherwise, the programmer deserves
-	 what he gets.  Subtract the offset from the length of the string,
-	 and return that.  This would perhaps not be valid if we were dealing
-	 with named arrays in addition to literal string constants.  */
-  return size_diffop_loc (loc, size_int (maxelts * eltsize), byteoff);
+	 has no internal zero bytes.  If the offset falls within the bounds
+	 of the string subtract the offset from the length of the string,
+	 and return that.  Otherwise the length is zero.  Take care to
+	 use SAVE_EXPR in case the OFFSET has side-effects.  */
+  tree offsave = fold_build1_loc (loc, SAVE_EXPR,
+  TREE_TYPE (byteoff), byteoff);
+  tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
+  build_int_cst (size_type_node,
+		 len * eltsize));
+  tree lenexp = size_diffop_loc (loc, size_int (strelts * eltsize), offsave);
+  return fold_build3_loc (loc, COND_EXPR, size_type_node, condexp, lenexp,
+			  build_zero_cst (size_type_node));
 }
 
   /* Offset from the beginning of the string in elements.  */
@@ -3192,15 +3207,13 @@ check_access (tree exp, tree, tree, tree dstwrite,
   if (dstw

Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-17 Thread Martin Sebor

On 07/17/2018 09:38 AM, Jeff Law wrote:

On 07/17/2018 09:19 AM, Martin Sebor wrote:

My enhancement to extract constant strings out of complex
aggregates committed last week introduced a couple of bugs in
dealing with non-constant indices and offsets.  One of the bugs
was fixed earlier today (PR 86528) but another one remains.  It
causes strlen (among other functions) to incorrectly fold
expressions involving a non-constant index into an array of
strings by treating the index the same as a non-consatnt
offset into it.

The non-constant index should either prevent the folding, or it
needs to handle it differently from an offset.

The attached patch takes the conservative approach of avoiding
the folding in this case.  The remaining changes deal with
the fallout from the fix.

Tested on x86_64-linux.

Martin


gcc-86532.diff


PR tree-optimization/86532 - Wrong code due to a wrong strlen folding starting 
with r262522

gcc/ChangeLog:

PR tree-optimization/86532
* builtins.c (c_strlen): Correct handling of non-constant offsets.  
(check_access): Be prepared for non-constant length ranges.
* expr.c (string_constant): Only handle the minor non-constant
array index.

gcc/testsuite/ChangeLog:

PR tree-optimization/86532
* gcc.c-torture/execute/strlen-2.c: New test.


[ ... ]


Index: gcc/expr.c
===
--- gcc/expr.c  (revision 262764)
+++ gcc/expr.c  (working copy)



@@ -11343,16 +11356,15 @@ string_constant (tree arg, tree *ptr_offset)
 {
   if (TREE_CODE (TREE_TYPE (array)) != ARRAY_TYPE)
return NULL_TREE;
-  if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (array
-   {
- /* Add the scaled variable index to the constant offset.  */
- tree eltoff = fold_build2 (MULT_EXPR, TREE_TYPE (offset),
-fold_convert (sizetype, varidx),
-eltsize);
- offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, eltoff);
-   }
-  else
-   return NULL_TREE;
+
+  while (TREE_CODE (chartype) != INTEGER_TYPE)
+   chartype = TREE_TYPE (chartype);

This is a bit concerning.  First under what conditions is chartype not
going to be an INTEGER_TYPE?  And under what conditions will extracting
its type ultimately lead to something that is an INTEGER_TYPE?


chartype is usually (maybe even always) pointer type here:

  const char a[] = "123";
  extern int i;
  n = strlen (&a[i]);

Martin



backporting fix for 85602 to GCC 8

2018-07-17 Thread Martin Sebor

If there are no objections I'd like to backport the solution
for PR 85602 to avoid a class of unnecessary warnings for
safe uses of nonstring arrays.  With the release coming up
later this week I'll go ahead and commit the patch tomorrow.

https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=261718

Thanks
Martin


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-17 Thread Martin Sebor

The attached update takes care of a couple of problems pointed
out by Bernd Edlinger in his comments on the bug.  The ICE he
mentioned in comment #20 was due mixing sizetype, ssizetype,
and size_type_node in c_strlen().  AFAICS, some of it predates
the patch but my changes made it worse and also managed trigger
it.

On 07/17/2018 09:19 AM, Martin Sebor wrote:

My enhancement to extract constant strings out of complex
aggregates committed last week introduced a couple of bugs in
dealing with non-constant indices and offsets.  One of the bugs
was fixed earlier today (PR 86528) but another one remains.  It
causes strlen (among other functions) to incorrectly fold
expressions involving a non-constant index into an array of
strings by treating the index the same as a non-consatnt
offset into it.

The non-constant index should either prevent the folding, or it
needs to handle it differently from an offset.

The attached patch takes the conservative approach of avoiding
the folding in this case.  The remaining changes deal with
the fallout from the fix.

Tested on x86_64-linux.

Martin



PR tree-optimization/86532 - Wrong code due to a wrong strlen folding starting with r262522

gcc/ChangeLog:

	PR tree-optimization/86532
	* builtins.c (c_strlen): Correct handling of non-constant offsets.	
	(check_access): Be prepared for non-constant length ranges.
	* expr.c (string_constant): Only handle the minor non-constant
	array index.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86532
	* gcc.c-torture/execute/strlen-2.c: New test.
	* gcc.c-torture/execute/strlen-3.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index c069d66..03cf012 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -517,7 +517,7 @@ get_pointer_alignment (tree exp)
   return align;
 }
 
-/* Return the number of non-zero elements in the sequence
+/* Return the number of leading non-zero elements in the sequence
[ PTR, PTR + MAXELTS ) where each element's size is ELTSIZE bytes.
ELTSIZE must be a power of 2 less than 8.  Used by c_strlen.  */
 
@@ -605,14 +605,21 @@ c_strlen (tree src, int only_value)
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
  length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
- in case the latter is less than the size of the array.  */
-  HOST_WIDE_INT maxelts = TREE_STRING_LENGTH (src);
+ in case the latter is less than the size of the array, such as when
+ SRC refers to a short string literal used to initialize a large array.
+ In that case, the elements of the array after the terminating NUL are
+ all NUL.  */
+  HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
+  strelts = strelts / eltsize - 1;
+
+  HOST_WIDE_INT maxelts = strelts;
   tree type = TREE_TYPE (src);
   if (tree size = TYPE_SIZE_UNIT (type))
 if (tree_fits_shwi_p (size))
-  maxelts = tree_to_uhwi (size);
-
-  maxelts = maxelts / eltsize - 1;
+  {
+	maxelts = tree_to_uhwi (size);
+	maxelts = maxelts / eltsize - 1;
+  }
 
   /* PTR can point to the byte representation of any string type, including
  char* and wchar_t*.  */
@@ -620,10 +627,12 @@ c_strlen (tree src, int only_value)
 
   if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
 {
-  /* If the string has an internal zero byte (e.g., "foo\0bar"), we can't
-	 compute the offset to the following null if we don't know where to
+  /* If the string has an internal NUL character followed by any
+	 non-NUL characters (e.g., "foo\0bar"), we can't compute
+	 the offset to the following NUL if we don't know where to
 	 start searching for it.  */
-  if (string_length (ptr, eltsize, maxelts) < maxelts)
+  unsigned len = string_length (ptr, eltsize, strelts);
+  if (len < strelts)
 	{
 	  /* Return when an embedded null character is found.  */
 	  return NULL_TREE;
@@ -633,12 +642,17 @@ c_strlen (tree src, int only_value)
 	return ssize_int (0);
 
   /* We don't know the starting offset, but we do know that the string
-	 has no internal zero bytes.  We can assume that the offset falls
-	 within the bounds of the string; otherwise, the programmer deserves
-	 what he gets.  Subtract the offset from the length of the string,
-	 and return that.  This would perhaps not be valid if we were dealing
-	 with named arrays in addition to literal string constants.  */
-  return size_diffop_loc (loc, size_int (maxelts * eltsize), byteoff);
+	 has no internal zero bytes.  If the offset falls within the bounds
+	 of the string subtract the offset from the length of the string,
+	 and return that.  Otherwise the length is zero.  Take care to
+	 use SAVE_EXPR in case the OFFSET has side-effects.  */
+  tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : byteoff;
+  offsave = fold_convert (ssizetype, offsave);
+  tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
+   

Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-18 Thread Martin Sebor

On 07/18/2018 02:31 AM, Richard Biener wrote:

On Tue, 17 Jul 2018, Martin Sebor wrote:


The attached update takes care of a couple of problems pointed
out by Bernd Edlinger in his comments on the bug.  The ICE he
mentioned in comment #20 was due mixing sizetype, ssizetype,
and size_type_node in c_strlen().  AFAICS, some of it predates
the patch but my changes made it worse and also managed trigger
it.


+has no internal zero bytes.  If the offset falls within the
bounds
+of the string subtract the offset from the length of the string,
+and return that.  Otherwise the length is zero.  Take care to
+use SAVE_EXPR in case the OFFSET has side-effects.  */
+  tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) :
byteoff;
+  offsave = fold_convert (ssizetype, offsave);
+  tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node,
offsave,
+ build_int_cst (ssizetype, len *
eltsize));
+  tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize),
offsave);
+  return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp,
+ build_zero_cst (ssizetype));

in what case are you expecting to return an actual COND_EXRP and
why is that useful?


It's necessary to correctly handle strings with multiple trailing
nuls, like in:

  const char a[8] = "123";
  int f (int i)
  {
return strlen (a + i);
  }

If (i <= 3) then the length is i.  If it's greater than 3 then
the length is zero.  I'd expect such strings to be quite common,
even pervasive, in the case of multidimensional arrays or arrays
of structs with array members.  (Probably less so in plain one-
dimensional arrays like the one above.)


You return a signed value but bother to
guard it so it is never less than zero.  Why?  Why not simply
return the difference as you did before but with the side-effects
properly handled?


Hopefully the above answers this question (if there's a way
to do it in a more straightforward way please let me know).

FWIW, as I said in bug 86434, I think this folding is premature
and prevents other optimizations that I suspect would be more
profitable.  I'm only preserving it here for now but at some
point I hope we can agree to defer it until later when more
information about the offset is known and when it will
benefit other optimizations.  I read your comments on the bug
and I'll see if it's possible to have it both ways.

Martin




On 07/17/2018 09:19 AM, Martin Sebor wrote:

My enhancement to extract constant strings out of complex
aggregates committed last week introduced a couple of bugs in
dealing with non-constant indices and offsets.  One of the bugs
was fixed earlier today (PR 86528) but another one remains.  It
causes strlen (among other functions) to incorrectly fold
expressions involving a non-constant index into an array of
strings by treating the index the same as a non-consatnt
offset into it.

The non-constant index should either prevent the folding, or it
needs to handle it differently from an offset.

The attached patch takes the conservative approach of avoiding
the folding in this case.  The remaining changes deal with
the fallout from the fix.

Tested on x86_64-linux.

Martin










Re: [PATCH 1/4] Clean up of new format of -falign-FOO.

2018-07-18 Thread Martin Sebor

On 07/04/2018 04:23 AM, marxin wrote:


gcc/ChangeLog:

2018-07-11  Martin Liska  

* align.h: New file.


Martin,

I'm seeing lots of warnings for this file:

/ssd/src/gcc/svn/gcc/align.h:53:32: warning: extended initializer lists 
only available with -std=c++11 or -std=gnu++11


The code that triggers them is:

+struct align_flags
+{
+  /* Default constructor.  */
+  align_flags (int log0 = 0, int maxskip0 = 0, int log1 = 0, int 
maxskip1 = 0)

+  {
+levels[0] = {log0, maxskip0};
+levels[1] = {log1, maxskip1};
+normalize ();
+  }

This form of assignment isn't valid in C++ 98.

Thanks
Martin


Re: backporting fix for 85602 to GCC 8

2018-07-18 Thread Martin Sebor

On 07/18/2018 09:09 AM, Franz Sirl wrote:

Am 2018-07-18 um 01:50 schrieb Martin Sebor:

If there are no objections I'd like to backport the solution
for PR 85602 to avoid a class of unnecessary warnings for
safe uses of nonstring arrays.  With the release coming up
later this week I'll go ahead and commit the patch tomorrow.

https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=261718


Hi Martin,

and please remember the follow-up fix

https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=261751


I've committed the 85602 changes to 8-branch.  I also updated
the manual to mention that -Wstringop-truncation is enabled by
-Wall (thanks).  The rest seems out of scope so I'll look into
it for trunk.

Martin



The patch for PR 85602 makes the extended and enabled-by-Wall string
warnings (which I like!) complete. There's a warning for the majority of
cases and for the char-array-without-NUL cases there is the nonstring
attribute describing it nicely, much better than to turn off the warning
around such code.
I know that probably not too many codebases will be affected, but for
anyone affected the nonstring attribute is a much better way to avoid
the warnings than to turn it off (and if they turn off the warnings for
gcc-8 they often won't turn it on again for gcc-9+).
The nonstring attribute is also the documented way to silence the warnings.

BTW, while re-reading the documentation I noticed some minor omissions,
I attached a patch (untested). Feel free to commit it (I have no access)
if you think it's correct.

Franz.


2018-07-12  Franz Sirl  

* invoke.texi (Wstringop-overflow, Wstringop-truncation):
Mention enabling via -Wall.
(Wall): Add -Wstringop-overflow02 and -Wstringop-truncation.






Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-18 Thread Martin Sebor

+  while (TREE_CODE (chartype) != INTEGER_TYPE)
+chartype = TREE_TYPE (chartype);

This is a bit concerning.  First under what conditions is chartype not
going to be an INTEGER_TYPE?  And under what conditions will extracting
its type ultimately lead to something that is an INTEGER_TYPE?


chartype is usually (maybe even always) pointer type here:

  const char a[] = "123";
  extern int i;
  n = strlen (&a[i]);


But your hunch was correct that the loop isn't safe because
the element type need not be an integer (I didn't know/forgot
that the function is called for non-strings too).  The loop
should be replaced by:

  while (TREE_CODE (chartype) == ARRAY_TYPE
 || TREE_CODE (chartype) == POINTER_TYPE)
chartype = TREE_TYPE (chartype);

 if (TREE_CODE (chartype) != INTEGER_TYPE)
return NULL;

I will update the patch before committing.

FWIW, it seems like it would be useful to extend the function to
non-string arguments.  That way it would be able to return array
initializers in cases like this:

  const struct A { int a[2]; }
a = { { 1, 2 } },
b = { { 1, 2 } };

  int f (void)
  {
return __builtin_memcmp (&a, &b, sizeof a);
  }

which would in turn make it possible to fold the result of
such calls analogously to strlen or strcmp.

Martin


Re: [PATCH, obvious?] Some minor nits in string folding functions

2018-07-19 Thread Martin Sebor

On 07/19/2018 12:04 PM, Bernd Edlinger wrote:

Hi,


this fixes a few minor nits, which I spotted while
looking at the string folding functions:


Please hold off until the patch for bug 86532 has been reviewed,
approved, and committed.  I'm making changes in this area,
partly to address some of your comments on it, including some
of the same ones you are making here.  It doesn't help for you
to be making other changes to the same code at the same time.

Thanks
Martin



string_constant: Remove impossible check: TREE_CODE (arg)
can't be COMPONENT_REF and MEM_REF at the same time.

c_strlen: maxelts is (signed) HOST_WIDE_INT, therefore
use tree_to_shwi.

c_getstr: tree_to_uhwi needs to be protected by
tree_fits_uhwi_p.

BTW: c_getstr uses string_constant which appears to be
able to extract wide char string initializers, but c_getstr
does not seem to be prepared for wide char strings:

   else if (string[string_length - 1] != '\0')
 {
   /* Support only properly NUL-terminated strings but handle
  consecutive strings within the same array, such as the six
  substrings in "1\0002\0003".  */
   return NULL;
 }


Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?


Thanks
Bernd.





Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-19 Thread Martin Sebor

On 07/19/2018 01:17 AM, Richard Biener wrote:

On Wed, 18 Jul 2018, Martin Sebor wrote:


+  while (TREE_CODE (chartype) != INTEGER_TYPE)
+chartype = TREE_TYPE (chartype);

This is a bit concerning.  First under what conditions is chartype not
going to be an INTEGER_TYPE?  And under what conditions will extracting
its type ultimately lead to something that is an INTEGER_TYPE?


chartype is usually (maybe even always) pointer type here:

  const char a[] = "123";
  extern int i;
  n = strlen (&a[i]);


But your hunch was correct that the loop isn't safe because
the element type need not be an integer (I didn't know/forgot
that the function is called for non-strings too).  The loop
should be replaced by:

  while (TREE_CODE (chartype) == ARRAY_TYPE
 || TREE_CODE (chartype) == POINTER_TYPE)
chartype = TREE_TYPE (chartype);


As this function may be called "late" you need to cope with
the middle-end ignoring type changes and thus happily
passing int *** directly rather than (char *) of that.

Also doesn't the above yield int for int *[]?


I don't think it ever gets this far for either a pointer to
an array of int, or for an array of pointers to int.  So for
something like the following the function fails earlier:

  const int* const a[2] = { ... };
  const char* (const *p)[2] = &a;

  int f (void)
  {
return __builtin_memcmp (*p, "12345678", 8);
  }

(Assuming this is what you were asking about.)


I guess you really want

   if (POINTER_TYPE_P (chartype))
 chartype = TREE_TYPE (chartype);
   while (TREE_CODE (chartype) == ARRAY_TYPE)
 chartype = TREE_TYPE (chartype);

?


That seems to work too.  Attached is an update with this tweak.
The update also addresses some of Bernd's comments: it removes
the pointless second test in:

if (TREE_CODE (type) == ARRAY_TYPE
&& TREE_CODE (type) != INTEGER_TYPE)

the unused assignment to chartype in:

   else if (DECL_P (arg))
 {
   array = arg;
   chartype = TREE_TYPE (arg);
 }

and calls string_constant() instead of strnlen() to compute
the length of a generic string.

Other improvements  are possible in this area but they are
orthogonal to the bug I'm trying to fix so I'll post separate
patches for some of those.

Martin
PR tree-optimization/86532 - Wrong code due to a wrong strlen folding starting with r262522

gcc/ChangeLog:

	PR tree-optimization/86532
	* builtins.h (string_length): Declare.
	* builtins.c (c_strlen): Correct handling of non-constant offsets.	
	(check_access): Be prepared for non-constant length ranges.
	(string_length): Make extern.
	* expr.c (string_constant): Only handle the minor non-constant
	array index.  Use string_constant to compute the length of
	a generic string constant.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86532
	* gcc.c-torture/execute/strlen-2.c: New test.
	* gcc.c-torture/execute/strlen-3.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index c069d66..ceb477d 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -517,11 +517,11 @@ get_pointer_alignment (tree exp)
   return align;
 }
 
-/* Return the number of non-zero elements in the sequence
+/* Return the number of leading non-zero elements in the sequence
[ PTR, PTR + MAXELTS ) where each element's size is ELTSIZE bytes.
ELTSIZE must be a power of 2 less than 8.  Used by c_strlen.  */
 
-static unsigned
+unsigned
 string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
 {
   gcc_checking_assert (eltsize == 1 || eltsize == 2 || eltsize == 4);
@@ -605,14 +605,21 @@ c_strlen (tree src, int only_value)
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
  length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
- in case the latter is less than the size of the array.  */
-  HOST_WIDE_INT maxelts = TREE_STRING_LENGTH (src);
+ in case the latter is less than the size of the array, such as when
+ SRC refers to a short string literal used to initialize a large array.
+ In that case, the elements of the array after the terminating NUL are
+ all NUL.  */
+  HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
+  strelts = strelts / eltsize - 1;
+
+  HOST_WIDE_INT maxelts = strelts;
   tree type = TREE_TYPE (src);
   if (tree size = TYPE_SIZE_UNIT (type))
 if (tree_fits_shwi_p (size))
-  maxelts = tree_to_uhwi (size);
-
-  maxelts = maxelts / eltsize - 1;
+  {
+	maxelts = tree_to_uhwi (size);
+	maxelts = maxelts / eltsize - 1;
+  }
 
   /* PTR can point to the byte representation of any string type, including
  char* and wchar_t*.  */
@@ -620,10 +627,12 @@ c_strlen (tree src, int only_value)
 
   if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
 {
-  /* If the string has an internal zero byte (e.g., "foo\0bar"), we can't
-	 compute the offset to the following null if we don

Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-19 Thread Martin Sebor

On 07/19/2018 12:19 AM, Bernd Edlinger wrote:

  if (TREE_CODE (idx) != INTEGER_CST
  && TREE_CODE (argtype) == POINTER_TYPE)
{
  /* From a pointer (but not array) argument extract the variable
 index to prevent get_addr_base_and_unit_offset() from failing
 due to it.  Use it later to compute the non-constant offset
 into the string and return it to the caller.  */
  varidx = idx;
  ref = TREE_OPERAND (arg, 0);

  tree type = TREE_TYPE (arg);
  if (TREE_CODE (type) == ARRAY_TYPE
  && TREE_CODE (type) != INTEGER_TYPE)
return NULL_TREE;
}

the condition TREE_CODE(type) == ARRAY_TYPE
&& TREE_CODE (type) != INTEGER_TYPE looks funny.
Check for ARRAY_TYPE should imply != INTEGER_TYPE.


Yes, that other test was superfluous.  I've removed it in
the updated patch I just posted.



  else if (DECL_P (arg))
{
  array = arg;
  chartype = TREE_TYPE (arg);
}

chartype is only used in the if (varidx) block, but that is always zero
in this case.


True.  Chartype could be left uninitialized.  I did it mostly
out of an abundance of caution (I don't like leaving variables
with such big scope uninitialized).  In the latest update
I instead initialize chartype to null.



  while (TREE_CODE (chartype) == ARRAY_TYPE
 || TREE_CODE (chartype) == POINTER_TYPE)
chartype = TREE_TYPE (chartype);

you multiply sizeof(chartype) with varidx but you should probably
use the type of the  TREE_OPERAND (arg, 0); above instead.


The offset returned to the caller is relative to the character
array so varidx must also be the index into the same array.
Otherwise it cannot be used.  The difference is this:

  const char a[][4] = { "12", "123" };

  int x = strlen (&a[0][i]);   // use i as the offset
  int y = strlen (&a[i][0]);   // bail



this is not in the patch, but I dont like it at all, because it compares the
size of a single initializer against the full size of the array.  But it should
be the innermost enclosing array:

  tree array_size = DECL_SIZE_UNIT (array);
  if (!array_size || TREE_CODE (array_size) != INTEGER_CST)
return NULL_TREE;

  /* Avoid returning a string that doesn't fit in the array
 it is stored in, like
 const char a[4] = "abcde";
 but do handle those that fit even if they have excess
 initializers, such as in
 const char a[4] = "abc\000\000";
 The excess elements contribute to TREE_STRING_LENGTH()
 but not to strlen().  */
  unsigned HOST_WIDE_INT length
= strnlen (TREE_STRING_POINTER (init), TREE_STRING_LENGTH (init));
  if (compare_tree_int (array_size, length + 1) < 0)
return NULL_TREE;


The use of strnlen here isn't right for wide strings and needs
to be replaced with a call to string_length() from builtins.c.
I've made that change in the updated patch.

The remaining concern is orthogonal to the changes to fix
the wrong code.  As I mentioned, I opened a few bugs to
improve things in this area: 86434, 86572, 86552.  As
the first step I'm about to post a solution for 86552.



consider the following test case:
$ cat part.c
const char a[2][3][8] = { { "a", "bb", "ccc"},
  { "", "e", "ff" } };

int main ()
{
  int n = __builtin_strlen (&a[0][1][0]);

  if (n == 30)
__builtin_abort ();
}


With my patch for pr86552 GCC prints:

warning: initializer-string for array of chars is too long
 const char a[2][3][8] = { { "a", "bb", "ccc"},
  ^~~~
note: (near initialization for ‘a[0][1]’)
In function ‘main’:
warning: ‘__builtin_strlen’ argument missing terminating nul 
[-Wstringop-overflow=]

   int n = __builtin_strlen (&a[0][1][0]);
   ^~~~
note: referenced argument declared here
 const char a[2][3][8] = { { "a", "bb", "ccc"},

We can discuss what value, if any, might be more appropriate
to fold the length to in these undefined cases but  I would
prefer to have that discussion separately from this review.

Martin


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-19 Thread Martin Sebor

On 07/19/2018 07:23 AM, Bernd Edlinger wrote:

@@ -633,12 +642,17 @@ c_strlen (tree src, int only_value)
return ssize_int (0);

   /* We don't know the starting offset, but we do know that the string
-has no internal zero bytes.  We can assume that the offset falls
-within the bounds of the string; otherwise, the programmer deserves
-what he gets.  Subtract the offset from the length of the string,
-and return that.  This would perhaps not be valid if we were dealing
-with named arrays in addition to literal string constants.  */
-  return size_diffop_loc (loc, size_int (maxelts * eltsize), byteoff);
+has no internal zero bytes.  If the offset falls within the bounds
+of the string subtract the offset from the length of the string,
+and return that.  Otherwise the length is zero.  Take care to
+use SAVE_EXPR in case the OFFSET has side-effects.  */
+  tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : 
byteoff;
+  offsave = fold_convert (ssizetype, offsave);
+  tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
+ build_int_cst (ssizetype, len * eltsize));
+  tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), 
offsave);
+  return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp,
+ build_zero_cst (ssizetype));



This computes the number of bytes.
c_strlen is supposed to return number of (wide) characters:


You're right.  I guess that must mean the function isn't used
for wide character strings.  The original code also computes
a byte offset and so does the new expression.  I have no
problem changing it, but if feels like a change that should
be made independently of this bug fix.



/* Compute the length of a null-terminated character string or wide
character string handling character sizes of 1, 2, and 4 bytes.
TREE_STRING_LENGTH is not the right way because it evaluates to
the size of the character array in bytes (as opposed to characters)
and because it can contain a zero byte in the middle.



@@ -11343,16 +11356,15 @@ string_constant (tree arg, tree *ptr_offset)
 {
   if (TREE_CODE (TREE_TYPE (array)) != ARRAY_TYPE)
return NULL_TREE;
-  if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (array
-   {
- /* Add the scaled variable index to the constant offset.  */
- tree eltoff = fold_build2 (MULT_EXPR, TREE_TYPE (offset),
-fold_convert (sizetype, varidx),
-eltsize);
- offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, eltoff);
-   }
-  else
-   return NULL_TREE;
+
+  while (TREE_CODE (chartype) != INTEGER_TYPE)
+   chartype = TREE_TYPE (chartype);
+
+  /* Set the non-constant offset to the non-constant index scaled
+by the size of the character type.  */
+  offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset),
+   fold_convert (sizetype, varidx),
+   TYPE_SIZE_UNIT (chartype));


here you fix the computation for wide character strings,
but I see no test cases with wide character stings.


The change above corrects the offset when eltsize refers to
the size of an array rather than its character elements.
It wasn't made to fix how wide strings are handled.  I don't
know if the function is ever called for them in a way that
matters (there are no wide character built-ins).  But it's
something to look into.



But down here you use a non-wide character function on a
wide character string:

   /* Avoid returning a string that doesn't fit in the array
  it is stored in, like
  const char a[4] = "abcde";
  but do handle those that fit even if they have excess
  initializers, such as in
  const char a[4] = "abc\000\000";
  The excess elements contribute to TREE_STRING_LENGTH()
  but not to strlen().  */
   unsigned HOST_WIDE_INT length
 = strnlen (TREE_STRING_POINTER (init), TREE_STRING_LENGTH (init));


Actually I begin to wonder, if all this wide character stuff is
really so common that we have to optimize it.


I've replaced the strnlen() call with string_length() in
the updated patch.  The code is entered for wchar_t (and
other) strings but since there are no wide character
built-ns in GCC I suspect that optimizations that apply
to them that don't also apply to other arrays are few
and far between.

At the same time, as wide characters (beyond wchar_t) are
increasingly becoming used in software I wouldn't want to
preclude future optimizations from making use of those that
benefit narrow strings.  In any event, that too is
a discussion to have independently of this bug fix.

Martin



[PATCH] warn for strlen of arrays with missing nul (PR 86552)

2018-07-19 Thread Martin Sebor

In the discussion of my patch for pr86532 Bernd noted that
GCC silently accepts constant character arrays with no
terminating nul as arguments to strlen (and other string
functions).

The attached patch is a first step in detecting these kinds
of bugs in strlen calls by issuing -Wstringop-overflow.
The next step is to modify all other handlers of built-in
functions to detect the same problem (not part of this patch).
Yet another step is to detect these problems in arguments
initialized using the non-string form:

  const char a[] = { 'a', 'b', 'c' };

This patch is meant to apply on top of the one for bug 86532
(I tested it with an earlier version of that patch so there
is code in the context that does not appear in the latest
version of the other diff).

Martin

PR tree-optimization/86552 - missing warning for reading past the end of non-string arrays

gcc/ChangeLog:

	PR tree-optimization/86552
	* builtins.c (warn_string_no_nul): New function.
	(string_length): Add argument and use it.
	(c_strlen): Same.
	(expand_builtin_strlen): Detect missing nul.
	(fold_builtin_1): Adjust.
	* builtins.h (c_strlen): Add argument.
	* expr.c (string_constant): Add arguments.  Detect missing nul
	terminator and outermost declaration it's missing in.
	* expr.h (string_constant): Add argument.
	* fold-const.c (c_getstr): Revert test.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86552
	* gcc.dg/warn-string-no-nul.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 03cf012..9885c4b 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -150,7 +150,7 @@ static tree stabilize_va_list_loc (location_t, tree, int);
 static rtx expand_builtin_expect (tree, rtx);
 static tree fold_builtin_constant_p (tree);
 static tree fold_builtin_classify_type (tree);
-static tree fold_builtin_strlen (location_t, tree, tree);
+static tree fold_builtin_strlen (location_t, tree, tree, tree);
 static tree fold_builtin_inf (location_t, tree, int);
 static tree rewrite_call_expr (location_t, tree, int, tree, int, ...);
 static bool validate_arg (const_tree, enum tree_code code);
@@ -550,6 +550,36 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
   return n;
 }
 
+/* For a call expression EXP to a function that expects a string argument,
+   issue a diagnostic due to it being a called with an argument NONSTR
+   that is a character array with no terminating NUL.  */
+
+static void
+warn_string_no_nul (location_t loc, tree exp, tree fndecl, tree nonstr)
+{
+  loc = expansion_point_location_if_in_system_header (loc);
+
+  bool warned;
+  if (exp)
+{
+  if (!fndecl)
+	fndecl = get_callee_fndecl (exp);
+  warned = warning_at (loc, OPT_Wstringop_overflow_,
+			   "%K%qD argument missing terminating nul",
+			   exp, fndecl);
+}
+  else
+{
+  gcc_assert (fndecl);
+  warned = warning_at (loc, OPT_Wstringop_overflow_,
+			   "%qD argument missing terminating nul",
+			   fndecl);
+}
+
+  if (warned && DECL_P (nonstr))
+inform (DECL_SOURCE_LOCATION (nonstr), "referenced argument declared here");
+}
+
 /* Compute the length of a null-terminated character string or wide
character string handling character sizes of 1, 2, and 4 bytes.
TREE_STRING_LENGTH is not the right way because it evaluates to
@@ -567,13 +597,17 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
accesses.  Note that this implies the result is not going to be emitted
into the instruction stream.
 
+   When ARR is non-null and the string is not properly nul-terminated,
+   set *ARR to the declaration of the outermost constant object whose
+   initializer (or one of its elements) is not nul-terminated.
+
The value returned is of type `ssizetype'.
 
Unfortunately, string_constant can't access the values of const char
arrays with initializers, so neither can we do so here.  */
 
 tree
-c_strlen (tree src, int only_value)
+c_strlen (tree src, int only_value, tree *arr /* = NULL */)
 {
   STRIP_NOPS (src);
   if (TREE_CODE (src) == COND_EXPR
@@ -581,24 +615,31 @@ c_strlen (tree src, int only_value)
 {
   tree len1, len2;
 
-  len1 = c_strlen (TREE_OPERAND (src, 1), only_value);
-  len2 = c_strlen (TREE_OPERAND (src, 2), only_value);
+  len1 = c_strlen (TREE_OPERAND (src, 1), only_value, arr);
+  len2 = c_strlen (TREE_OPERAND (src, 2), only_value, arr);
   if (tree_int_cst_equal (len1, len2))
 	return len1;
 }
 
   if (TREE_CODE (src) == COMPOUND_EXPR
   && (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0
-return c_strlen (TREE_OPERAND (src, 1), only_value);
+return c_strlen (TREE_OPERAND (src, 1), only_value, arr);
 
   location_t loc = EXPR_LOC_OR_LOC (src, input_location);
 
   /* Offset from the beginning of the string in bytes.  */
   tree byteoff;
-  src = string_constant (src, &byteoff);
-  if (src == 0)
+  /* Set if array is nul-terminated, false otherwise.  */
+  bool nulterm;
+  src = string_constant (src, &byteoff, &nu

commited: avoid extended initializer lists warnings

2018-07-19 Thread Martin Sebor

I've checked in the patch below as r262892 to avoid the many
warnings the new code was causing with GCC 6:

/ssd/src/gcc/svn/gcc/align.h:53:32: warning: extended initializer lists 
only available with -std=c++11 or -std=gnu++11


Martin

Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 262891)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2018-07-19  Martin Sebor  
+
+   * align.h (align_flags): Use member initialization.
+
 2018-07-19  David Malcolm  

* Makefile.in (OBJS): Add optinfo.o.
Index: gcc/align.h
===
--- gcc/align.h (revision 262891)
+++ gcc/align.h (working copy)
@@ -50,8 +50,10 @@ struct align_flags
   /* Default constructor.  */
   align_flags (int log0 = 0, int maxskip0 = 0, int log1 = 0, int 
maxskip1 = 0)

   {
-levels[0] = {log0, maxskip0};
-levels[1] = {log1, maxskip1};
+levels[0].log = log0;
+levels[0].maxskip = maxskip0;
+levels[1].log = log1;
+levels[1].maxskip = maxskip1;
 normalize ();
   }

t


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-19 Thread Martin Sebor

Here's one more update with tweaks addressing a couple more
of Bernd's comments:

1) correct the use of TREE_STRING_LENGTH() where a number of
array elements is expected and not bytes
2) set CHARTYPE as soon as it's first determined rather than
trying to extract it again later

On 07/19/2018 01:49 PM, Martin Sebor wrote:

On 07/19/2018 01:17 AM, Richard Biener wrote:

On Wed, 18 Jul 2018, Martin Sebor wrote:


+  while (TREE_CODE (chartype) != INTEGER_TYPE)
+chartype = TREE_TYPE (chartype);

This is a bit concerning.  First under what conditions is chartype not
going to be an INTEGER_TYPE?  And under what conditions will
extracting
its type ultimately lead to something that is an INTEGER_TYPE?


chartype is usually (maybe even always) pointer type here:

  const char a[] = "123";
  extern int i;
  n = strlen (&a[i]);


But your hunch was correct that the loop isn't safe because
the element type need not be an integer (I didn't know/forgot
that the function is called for non-strings too).  The loop
should be replaced by:

  while (TREE_CODE (chartype) == ARRAY_TYPE
 || TREE_CODE (chartype) == POINTER_TYPE)
chartype = TREE_TYPE (chartype);


As this function may be called "late" you need to cope with
the middle-end ignoring type changes and thus happily
passing int *** directly rather than (char *) of that.

Also doesn't the above yield int for int *[]?


I don't think it ever gets this far for either a pointer to
an array of int, or for an array of pointers to int.  So for
something like the following the function fails earlier:

  const int* const a[2] = { ... };
  const char* (const *p)[2] = &a;

  int f (void)
  {
return __builtin_memcmp (*p, "12345678", 8);
  }

(Assuming this is what you were asking about.)


I guess you really want

   if (POINTER_TYPE_P (chartype))
 chartype = TREE_TYPE (chartype);
   while (TREE_CODE (chartype) == ARRAY_TYPE)
 chartype = TREE_TYPE (chartype);

?


That seems to work too.  Attached is an update with this tweak.
The update also addresses some of Bernd's comments: it removes
the pointless second test in:

if (TREE_CODE (type) == ARRAY_TYPE
&& TREE_CODE (type) != INTEGER_TYPE)

the unused assignment to chartype in:

   else if (DECL_P (arg))
 {
   array = arg;
   chartype = TREE_TYPE (arg);
 }

and calls string_constant() instead of strnlen() to compute
the length of a generic string.

Other improvements  are possible in this area but they are
orthogonal to the bug I'm trying to fix so I'll post separate
patches for some of those.

Martin


PR tree-optimization/86532 - Wrong code due to a wrong strlen folding starting with r262522

gcc/ChangeLog:

	PR tree-optimization/86532
	* builtins.h (string_length): Declare.
	* builtins.c (c_strlen): Correct handling of non-constant offsets.	
	(check_access): Be prepared for non-constant length ranges.
	(string_length): Make extern.
	* expr.c (string_constant): Only handle the minor non-constant
	array index.  Use string_constant to compute the length of
	a generic string constant.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86532
	* gcc.c-torture/execute/strlen-2.c: New test.
	* gcc.c-torture/execute/strlen-3.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index c069d66..ceb477d 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -517,11 +517,11 @@ get_pointer_alignment (tree exp)
   return align;
 }
 
-/* Return the number of non-zero elements in the sequence
+/* Return the number of leading non-zero elements in the sequence
[ PTR, PTR + MAXELTS ) where each element's size is ELTSIZE bytes.
ELTSIZE must be a power of 2 less than 8.  Used by c_strlen.  */
 
-static unsigned
+unsigned
 string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
 {
   gcc_checking_assert (eltsize == 1 || eltsize == 2 || eltsize == 4);
@@ -605,14 +605,21 @@ c_strlen (tree src, int only_value)
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
  length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
- in case the latter is less than the size of the array.  */
-  HOST_WIDE_INT maxelts = TREE_STRING_LENGTH (src);
+ in case the latter is less than the size of the array, such as when
+ SRC refers to a short string literal used to initialize a large array.
+ In that case, the elements of the array after the terminating NUL are
+ all NUL.  */
+  HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
+  strelts = strelts / eltsize - 1;
+
+  HOST_WIDE_INT maxelts = strelts;
   tree type = TREE_TYPE (src);
   if (tree size = TYPE_SIZE_UNIT (type))
 if (tree_fits_shwi_p (size))
-  maxelts = tree_to_uhwi (size);
-
-  maxelts = maxelts / eltsize - 1;
+  {
+	maxelts = tree_to_uhwi (size);
+	maxelts = maxelts / eltsize - 1;
+  }
 
   /* PTR can point to the byte representation of any string t

Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-19 Thread Martin Sebor

On 07/19/2018 02:45 PM, Bernd Edlinger wrote:

@@ -11413,8 +11429,10 @@ string_constant (tree arg, tree *ptr_offset)
 const char a[4] = "abc\000\000";
 The excess elements contribute to TREE_STRING_LENGTH()
 but not to strlen().  */
-  unsigned HOST_WIDE_INT length
-= strnlen (TREE_STRING_POINTER (init), TREE_STRING_LENGTH (init));
+  unsigned HOST_WIDE_INT charsize
+= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (init;
+  unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
+  length = string_length (TREE_STRING_POINTER (init), charsize, length);
  if (compare_tree_int (array_size, length + 1) < 0)
return NULL_TREE;


But TREE_STRING_LENGTH is the length in bytes including NUL-termination.
then length is passed to string_length with expects it in units of charsize.
and returns number of characters.
then compare_tree_int compares array_size which is in units of bytes,
but not the size of the innermost enclosing array.


Whoops.  I forgot TREE_STRING_LENGTH(s) is really sizeof(s),
not the length or size in characters.  Not the most fortunate
name.  The update to this patch I sent corrects this mistake.
The subsequent patch I sent to implement the warning whose
absence you lamented for non-nul-terminated strings tightens
up the bound on the array size.


I really don't see why we need to support wide characters especially
when there is no reasonable test coverage, and no usable wstrlen
builtin first.


I'm not sure what special support are you talking about.
The string_constant function handles all kinds of strings
and the change above doesn't affect that.  The purpose of
the check above is to conservatively fail for constant
arrays with excess initializers such as in the comment:

const char a[4] = "abc\000\000";

Defining such strings is undefined, as is using out-of-bounds
offsets into such things, so a permissive check isn't inherently
wrong, nor would it be incorrect to remove it and accept and
fold them.  But neither is essential to the fix for this bug.

If you are you referring to the handling of non-const offsets
here, those are already handled in c_strlen() for a subset of
cases (plain ARRAY_REFs).  This change just makes it consistent
for all references, including those to multi-dimensinal arrays
and other aggregates.  (I noticed it while some of my tests
for the fix were failing.)

Martin



Re: [PATCH] restore -Warray-bounds for string literals (PR 83776)

2018-07-19 Thread Martin Sebor

On 07/19/2018 03:51 PM, Jeff Law wrote:

On 07/13/2018 05:45 PM, Martin Sebor wrote:


+  offset_int ofr[] = {
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
+  };

huh.  Do you maybe want to use widest_int for ofr[]?  What's
wrong with wi::to_offset (vr->min)?  Building another intermediate
tree node here looks just bogus.


I need to convert size_type indices to signed to keep their sign
if it's negative and include it in warnings.  I've moved the code
into a conditional where it's used to minimize the cost but other
than that I don't know how else to convert it.



What are you trying to do in this loop anyways?


The loop It determines the range of the final index/offset for
a series of POINTER_PLUS_EXPRs.  It handles cases like this:

  int g (int i, int j, int k)
  {
if (i < 1) i = 1;
if (j < 1) j = 1;
if (k < 1) k = 1;

const char *p0 = "123";
const char *p1 = p0 + i;
const char *p2 = p1 + j;
const char *p3 = p2 + k;

// p2[3] and p3[1] are out of bounds
return p0[4] + p1[3] + p2[2] + p3[1];
  }


I suppose
look at

  p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
  ... = MEM[p_1 + CST];

?  But then

+  if (TREE_CODE (varoff) != SSA_NAME)
+   break;

you should at least handle INTEGER_CSTs here?


It's already handled (and set in CSTOFF).  There should be no
more constant offsets after that (I haven't come across any.)



+  if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+   break;

please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
the anti-range handling looks odd.  Please add comments so we can follow
what you were thinking when writing range merging code.  Even better if you

can stick to use existing code rather than always re-inventing the wheel...



The anti-range handling is to conservatively add
[-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
to make it clear.  I'd be more than happy to reuse existing
code if I knew where to find it (if it exists).  It sure would
save me lots of work to have a library of utility functions to
call instead of rolling my own code each time.

Finding stuff is never easy :(  GCC's just gotten so big it's virtually
impossible for anyone to know all the APIs.

The suggestion I'd have would be to (when possible) factor this stuff
into routines you can reuse.  We (as a whole) have this tendency to
open-code all kinds of things rather than factoring the code into
reusable routines.

In addition to increasing the probability that you'll be able to reuse
code later, just reading a new function's header tends to make me (as a
reviewer) internally ask if there's already a routine we should be using
instead.  When it's open-coded it's significantly harder to spot those
cases (at least for me).






I think I commented on earlier variants but this doesn't seem to resemble
any of them.


I've reworked the patch (sorry) to also handle arrays.  For GCC
9 it seems I might as well do both in one go.

Attached is an updated patch with these changes.

Martin

gcc-83776.diff


PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds index 
into an array
PR tree-optimization/83776 - missing -Warray-bounds indexing past the end of a 
string literal

gcc/ChangeLog:

PR tree-optimization/84047
PR tree-optimization/83776
* tree-vrp.c (vrp_prop::check_mem_ref): New function.
(check_array_bounds): Call it.
* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
array offsets.

gcc/testsuite/ChangeLog:

PR tree-optimization/83776
PR tree-optimization/84047
* gcc.dg/Warray-bounds-29.c: New test.
* gcc.dg/Warray-bounds-30.c: New test.
* gcc.dg/Warray-bounds-31.c: New test.
* gcc.dg/Warray-bounds-32.c: New test.




diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 3e30f6b..8221a06 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
   || !DECL_P (base))
 return NULL;

+  /* Fail for out-of-bounds array offsets.  */
+  tree basetype = TREE_TYPE (base);
+  if (TREE_CODE (basetype) == ARRAY_TYPE)
+{
+  if (offset < 0)
+   return NULL;
+
+  if (tree size = DECL_SIZE (base))
+   if (tree_fits_uhwi_p (size)
+   && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
+ return NULL;
+}
+
   if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
 return NULL;

So I'm a bit curious about this hunk.  Did we end up creating an access
structure that walked off the end of the array?  Presumably  you
suppressing SRA at this point so that you can see the array access later
and give a suitable warning.  Right?



Re: [PATCH] restore -Warray-bounds for string literals (PR 83776)

2018-07-20 Thread Martin Sebor

On 07/20/2018 05:34 AM, Rainer Orth wrote:

Hi Martin,


On 07/19/2018 03:51 PM, Jeff Law wrote:

On 07/13/2018 05:45 PM, Martin Sebor wrote:


+  offset_int ofr[] = {
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->min)),
+   wi::to_offset (fold_convert (ptrdiff_type_node, vr->max))
+  };

huh.  Do you maybe want to use widest_int for ofr[]?  What's
wrong with wi::to_offset (vr->min)?  Building another intermediate
tree node here looks just bogus.


I need to convert size_type indices to signed to keep their sign
if it's negative and include it in warnings.  I've moved the code
into a conditional where it's used to minimize the cost but other
than that I don't know how else to convert it.



What are you trying to do in this loop anyways?


The loop It determines the range of the final index/offset for
a series of POINTER_PLUS_EXPRs.  It handles cases like this:

  int g (int i, int j, int k)
  {
if (i < 1) i = 1;
if (j < 1) j = 1;
if (k < 1) k = 1;

const char *p0 = "123";
const char *p1 = p0 + i;
const char *p2 = p1 + j;
const char *p3 = p2 + k;

// p2[3] and p3[1] are out of bounds
return p0[4] + p1[3] + p2[2] + p3[1];
  }


I suppose
look at

  p_1 = &obj[i_2];  // already bounds-checked, but with ignore_off_by_one
  ... = MEM[p_1 + CST];

?  But then

+  if (TREE_CODE (varoff) != SSA_NAME)
+   break;

you should at least handle INTEGER_CSTs here?


It's already handled (and set in CSTOFF).  There should be no
more constant offsets after that (I haven't come across any.)



+  if (!vr || vr->type == VR_UNDEFINED || !vr->min || !vr->max)
+   break;

please use positive tests here, VR_RANGE || VR_ANTI_RANGE.  As usual
the anti-range handling looks odd.  Please add comments so we can follow
what you were thinking when writing range merging code.  Even better if you

can stick to use existing code rather than always re-inventing the wheel...



The anti-range handling is to conservatively add
[-MAXOBJSIZE -1, MAXOBJSIZE] to OFFRANGE.  I've added comments
to make it clear.  I'd be more than happy to reuse existing
code if I knew where to find it (if it exists).  It sure would
save me lots of work to have a library of utility functions to
call instead of rolling my own code each time.

Finding stuff is never easy :(  GCC's just gotten so big it's virtually
impossible for anyone to know all the APIs.

The suggestion I'd have would be to (when possible) factor this stuff
into routines you can reuse.  We (as a whole) have this tendency to
open-code all kinds of things rather than factoring the code into
reusable routines.

In addition to increasing the probability that you'll be able to reuse
code later, just reading a new function's header tends to make me (as a
reviewer) internally ask if there's already a routine we should be using
instead.  When it's open-coded it's significantly harder to spot those
cases (at least for me).






I think I commented on earlier variants but this doesn't seem to resemble
any of them.


I've reworked the patch (sorry) to also handle arrays.  For GCC
9 it seems I might as well do both in one go.

Attached is an updated patch with these changes.

Martin

gcc-83776.diff


PR tree-optimization/84047 - missing -Warray-bounds on an out-of-bounds
index into an array
PR tree-optimization/83776 - missing -Warray-bounds indexing past the
end of a string literal

gcc/ChangeLog:

PR tree-optimization/84047
PR tree-optimization/83776
* tree-vrp.c (vrp_prop::check_mem_ref): New function.
(check_array_bounds): Call it.
* /gcc/tree-sra.c (get_access_for_expr): Fail for out-of-bounds
array offsets.

gcc/testsuite/ChangeLog:

PR tree-optimization/83776
PR tree-optimization/84047
* gcc.dg/Warray-bounds-29.c: New test.
* gcc.dg/Warray-bounds-30.c: New test.
* gcc.dg/Warray-bounds-31.c: New test.
* gcc.dg/Warray-bounds-32.c: New test.




diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 3e30f6b..8221a06 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -3110,6 +3110,19 @@ get_access_for_expr (tree expr)
   || !DECL_P (base))
 return NULL;

+  /* Fail for out-of-bounds array offsets.  */
+  tree basetype = TREE_TYPE (base);
+  if (TREE_CODE (basetype) == ARRAY_TYPE)
+{
+  if (offset < 0)
+   return NULL;
+
+  if (tree size = DECL_SIZE (base))
+   if (tree_fits_uhwi_p (size)
+   && tree_to_uhwi (size) < (unsigned HOST_WIDE_INT) offset)
+ return NULL;
+}
+
   if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
 return NULL;

So I'm a bit curious about this hunk.  Did we end up creating an access
structure that walked off the end of the array?  Presumably  you
suppressing SRA at this point so that you can see the arra

Re: [PATCH] specify large command line option arguments (PR 82063)

2018-07-20 Thread Martin Sebor

On 07/19/2018 04:31 PM, Jeff Law wrote:

On 06/24/2018 03:05 PM, Martin Sebor wrote:

Storing integer command line option arguments in type int
limits options such as -Wlarger-than= or -Walloca-larger-than
to at most INT_MAX (see bug 71905).  Larger values wrap around
zero.  The value zero is considered to disable the option,
making it impossible to specify a zero limit.

To get around these limitations, the -Walloc-size-larger-than=
option accepts a string argument that it then parses itself
and interprets as HOST_WIDE_INT.  The option also accepts byte
size suffixes like KB, MB, GiB, etc. to make it convenient to
specify very large limits.

The int limitation is obviously less than ideal in a 64-bit
world.  The treatment of zero as a toggle is just a minor wart.
The special treatment to make it work for just a single option
makes option handling inconsistent.  It should be possible for
any option that takes an integer argument to use the same logic.

The attached patch enhances GCC option processing to do that.
It changes the storage type of option arguments from int to
HOST_WIDE_INT and extends the existing (although undocumented)
option property Host_Wide_Int to specify wide option arguments.
It also introduces the ByteSize property for options for which
specifying the byte-size suffix makes sense.

To make it possible to consider zero as a meaningful argument
value rather than a flag indicating that an option is disabled
the patch also adds a CLVC_SIZE enumerator to the cl_var_type
enumeration, and modifies how options of the kind are handled.

Warning options that take large byte-size arguments can be
disabled by specifying a value equal to or greater than
HOST_WIDE_INT_M1U.  For convenience, aliases in the form of
-Wno-xxx-larger-than have been provided for all the affected
options.

In the patch all the existing -larger-than options are set
to PTRDIFF_MAX.  This makes them effectively enabled, but
because the setting is exceedingly permissive, and because
some of the existing warnings are already set to the same
value and some other checks detect and reject such exceedingly
large values with errors, this change shouldn't noticeably
affect what constructs are diagnosed.

Although all the options are set to PTRDIFF_MAX, I think it
would make sense to consider setting some of them lower, say
to PTRDIFF_MAX / 2.  I'd like to propose that in a followup
patch.

To minimize observable changes the -Walloca-larger-than and
-Wvla-larger-than warnings required more extensive work to
make of the new mechanism because of the "unbounded" argument
handling (the warnings trigger for arguments that are not
visibly constrained), and because of the zero handling
(the warnings also trigger


Martin


gcc-82063.diff


PR middle-end/82063 - issues with arguments enabled by -Wall

gcc/ada/ChangeLog:

PR middle-end/82063
* gcc-interface/misc.c (gnat_handle_option): Change function argument
to HOST_WIDE_INT.

gcc/brig/ChangeLog:
* brig/brig-lang.c (brig_langhook_handle_option): Change function
argument to HOST_WIDE_INT.

gcc/c-family/ChangeLog:

PR middle-end/82063
* c-common.h (c_common_handle_option): Change function argument
to HOST_WIDE_INT.
* c-opts.c (c_common_init_options): Same.
(c_common_handle_option): Same.  Remove special handling of
OPT_Walloca_larger_than_ and OPT_Wvla_larger_than_.
* c.opt (-Walloc-size-larger-than, -Walloca-larger-than): Change
options to take a HOST_WIDE_INT argument and accept a byte-size
suffix.  Initialize.
(-Wvla-larger-than): Same.
(-Wno-alloc-size-larger-than, -Wno-alloca-larger-than): New.
(-Wno-vla-larger-than): Same.


gcc/fortran/ChangeLog:

PR middle-end/82063
* gfortran.h (gfc_handle_option): Change function argument
to HOST_WIDE_INT.
* options.c (gfc_handle_option): Same.

gcc/go/ChangeLog:

PR middle-end/82063
* go-lang.c (go_langhook_handle_option): Change function argument
to HOST_WIDE_INT.

gcc/lto/ChangeLog:

PR middle-end/82063
* lto-lang.c (lto_handle_option): Change function argument
to HOST_WIDE_INT.

gcc/testsuite/ChangeLog:

PR middle-end/82063
* gcc.dg/Walloc-size-larger-than-16.c: Adjust.
* gcc.dg/Walloca-larger-than.c: New test.
* gcc.dg/Wframe-larger-than-2.c: New test.
* gcc.dg/Wlarger-than3.c: New test.
* gcc.dg/Wvla-larger-than-3.c: New test.

gcc/ChangeLog:

PR middle-end/82063
* builtins.c (expand_builtin_alloca): Adjust.
* calls.c (alloc_max_size): Simplify.
* cgraphunit.c (cgraph_node::expand): Adjust.
* common.opt (larger_than_size, warn_frame_larger_than): Remove
variables.
(frame_larger_than_size): Same.
(-Wframe-larger-than, -Wlarger-than, -Wstack-usage): Change options
to take a HOST_W

committed: remove redundant -Wall from -Warray-bounds (PR 82063)

2018-07-20 Thread Martin Sebor

As the last observation in PR 82063 Jim points out that

  Both -Warray-bounds and -Warray-bounds= are listed in the c.opt
  file as being enabled by -Wall, but they are the same option,
  and it causes this one option to be processed twice in the
  C_handle_option_auto function in the generated options.c file.
  It gets set to the same value twice, so it does work as intended,
  but this is wasteful.

I have removed the redundant -Wall from the first option and
committed the change as obvious in r262912.

Martin


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-20 Thread Martin Sebor

On 07/20/2018 08:51 AM, Bernd Edlinger wrote:

Martin,

when I look at tree-ssa-forwprop.c:

   str1 = string_constant (src1, &off1);
   if (str1 == NULL_TREE)
 break;
   if (!tree_fits_uhwi_p (off1)
   || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0
   || compare_tree_int (len1, TREE_STRING_LENGTH (str1)
  - tree_to_uhwi (off1)) > 0
   || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE
   || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1)))
  != TYPE_MODE (char_type_node))
 break;

I don't think it is a good idea to let string_constant return
strings which have not necessarily valid content after the first
NUL byte.  It looks like the content has to be valid up to
TREE_STRING_LENGTH.


I'm not sure I understand when this could happen.  From the patch
below it sounds like you are concerned about cases like:

  const char a[4] = "abc\000\000";

where the STRING_CST is bigger than the array.  But the string
constant has valid content here up to TREE_STRING_LENGTH.  Am
I missing something?  (A test case would be helpful.)

In any case, unless the concern is specifically related to
this bug fix let's discuss it separately so I can fix this
bug.  I'm not opposed to making further changes here (in
fact, I have one in the queue and two others that I raised
in Bugzilla in response to our discussion so far), I just
want to avoid mission creep.

Martin



So may I suggest to do something like the following
(diff to your last patch):

--- expr.c.jj   2018-07-20 10:51:51.983605588 +0200
+++ expr.c  2018-07-20 15:07:29.769423479 +0200
@@ -11277,6 +11277,7 @@ tree
  string_constant (tree arg, tree *ptr_offset)
  {
tree array;
+  tree array_size;
STRIP_NOPS (arg);

/* Non-constant index into the character array in an ARRAY_REF
@@ -11295,9 +11296,11 @@ string_constant (tree arg, tree *ptr_off

arg = TREE_OPERAND (arg, 0);
tree ref = arg;
+  tree reftype = TREE_TYPE (ref);
if (TREE_CODE (arg) == ARRAY_REF)
{
  tree idx = TREE_OPERAND (arg, 1);
+ reftype = TREE_TYPE (TREE_OPERAND (arg, 0));
  if (TREE_CODE (idx) != INTEGER_CST
  && TREE_CODE (argtype) == POINTER_TYPE)
{
@@ -11313,6 +11316,11 @@ string_constant (tree arg, tree *ptr_off
return NULL_TREE;
}
}
+  if (TREE_CODE (reftype) != ARRAY_TYPE)
+   return NULL_TREE;
+  while (TREE_CODE (TREE_TYPE (reftype)) == ARRAY_TYPE)
+   reftype = TREE_TYPE (reftype);
+  array_size = TYPE_SIZE_UNIT (reftype);
array = get_addr_base_and_unit_offset (ref, &base_off);
if (!array
  || (TREE_CODE (array) != VAR_DECL
@@ -11345,7 +11353,10 @@ string_constant (tree arg, tree *ptr_off
return NULL_TREE;
  }
else if (DECL_P (arg))
-array = arg;
+{
+  array = arg;
+  array_size = DECL_SIZE_UNIT (array);
+}
else
  return NULL_TREE;

@@ -11410,24 +11421,18 @@ string_constant (tree arg, tree *ptr_off
if (!init || TREE_CODE (init) != STRING_CST)
  return NULL_TREE;

-  tree array_size = DECL_SIZE_UNIT (array);
if (!array_size || TREE_CODE (array_size) != INTEGER_CST)
  return NULL_TREE;

/* Avoid returning a string that doesn't fit in the array
- it is stored in, like
+ it is stored in, like:
   const char a[4] = "abcde";
- but do handle those that fit even if they have excess
- initializers, such as in
- const char a[4] = "abc\000\000";
- The excess elements contribute to TREE_STRING_LENGTH()
- but not to strlen().  */
-  unsigned HOST_WIDE_INT charsize
-= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (init;
+ ... or:
+ const char a[4] = "abc\000";
+ ... because some callers access the string up to the limit
+ imposed by TREE_STRING_LENGTH.  */
unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
-  length = string_length (TREE_STRING_POINTER (init), charsize,
- length / charsize);
-  if (compare_tree_int (array_size, length + 1) < 0)
+  if (compare_tree_int (array_size, length) < 0)
  return NULL_TREE;

*ptr_offset = offset;


Considering Richard's last comment, I don't know if TYPE_SIZE_UNIT
of the ARRAY_TYPE is the correct way to get the size of the
innermost arrayhere, but I think we will need it.



Bernd.





Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-20 Thread Martin Sebor

On 07/20/2018 03:11 PM, Bernd Edlinger wrote:

I think I have found a valid test case where the latest patch
does generate invalid code:


This is due to another bug in string_constant() that's latent
on trunk but that's exposed by the patch.  Trunk only "works"
because of a bug/limitation in c_strlen() that the patch fixes
or removes.

The underlying root cause is the handling of POINTER_PLUS
expressions in string_constant().  The original code (before
the handling of aggregates was added) just dealt with string
constants.  The new code does much more but doesn't get it
quite right in these cases.

It shouldn't be too difficult to fix, but as valuable as
the testing you are doing is, I'd really prefer to focus
on one problem at a time and make incremental progress.
It will make it easier to track down and fix regressions
than lumping multiple fixes into a larger patch.

Martin



$ cat part.c
static const char a[3][8] = { "1234", "12345", "123456" };

int main ()
{
   volatile int i = 1;
   int n = __builtin_strlen (*(&a[1]+i));

   if (n != 6)
 __builtin_abort ();
}
$ gcc part.c -fdump-tree-original
$ ./a.out
Aborted (core dumped)
$ cat part.c.004t.original

;; Function main (null)
;; enabled by -tree-original


{
   volatile int i = 1;
   int n = (ssizetype) (SAVE_EXPR <(long unsigned int) i * 8>) <= 5 ? (int) (5 - 
(unsigned int) (SAVE_EXPR <(long unsigned int) i * 8>)) : 0;

 volatile int i = 1;
 int n = (ssizetype) (SAVE_EXPR <(long unsigned int) i * 8>) <= 5 ? (int) (5 - 
(unsigned int) (SAVE_EXPR <(long unsigned int) i * 8>)) : 0;
   if (n != 6)
 {
   __builtin_abort ();
 }
}
return 0;


string_constant is called with arg = (const char *) (&a[1] + (sizetype) ((long 
unsigned int) i * 8))




Re: [PATCH] specify large command line option arguments (PR 82063)

2018-07-22 Thread Martin Sebor

On 07/21/2018 06:42 PM, H.J. Lu wrote:

On Fri, Jul 20, 2018 at 1:57 PM, Martin Sebor  wrote:

On 07/19/2018 04:31 PM, Jeff Law wrote:


On 06/24/2018 03:05 PM, Martin Sebor wrote:


Storing integer command line option arguments in type int
limits options such as -Wlarger-than= or -Walloca-larger-than
to at most INT_MAX (see bug 71905).  Larger values wrap around
zero.  The value zero is considered to disable the option,
making it impossible to specify a zero limit.

To get around these limitations, the -Walloc-size-larger-than=
option accepts a string argument that it then parses itself
and interprets as HOST_WIDE_INT.  The option also accepts byte
size suffixes like KB, MB, GiB, etc. to make it convenient to
specify very large limits.

The int limitation is obviously less than ideal in a 64-bit
world.  The treatment of zero as a toggle is just a minor wart.
The special treatment to make it work for just a single option
makes option handling inconsistent.  It should be possible for
any option that takes an integer argument to use the same logic.

The attached patch enhances GCC option processing to do that.
It changes the storage type of option arguments from int to
HOST_WIDE_INT and extends the existing (although undocumented)
option property Host_Wide_Int to specify wide option arguments.
It also introduces the ByteSize property for options for which
specifying the byte-size suffix makes sense.

To make it possible to consider zero as a meaningful argument
value rather than a flag indicating that an option is disabled
the patch also adds a CLVC_SIZE enumerator to the cl_var_type
enumeration, and modifies how options of the kind are handled.

Warning options that take large byte-size arguments can be
disabled by specifying a value equal to or greater than
HOST_WIDE_INT_M1U.  For convenience, aliases in the form of
-Wno-xxx-larger-than have been provided for all the affected
options.

In the patch all the existing -larger-than options are set
to PTRDIFF_MAX.  This makes them effectively enabled, but
because the setting is exceedingly permissive, and because
some of the existing warnings are already set to the same
value and some other checks detect and reject such exceedingly
large values with errors, this change shouldn't noticeably
affect what constructs are diagnosed.

Although all the options are set to PTRDIFF_MAX, I think it
would make sense to consider setting some of them lower, say
to PTRDIFF_MAX / 2.  I'd like to propose that in a followup
patch.

To minimize observable changes the -Walloca-larger-than and
-Wvla-larger-than warnings required more extensive work to
make of the new mechanism because of the "unbounded" argument
handling (the warnings trigger for arguments that are not
visibly constrained), and because of the zero handling
(the warnings also trigger


Martin


gcc-82063.diff


PR middle-end/82063 - issues with arguments enabled by -Wall

gcc/ada/ChangeLog:

PR middle-end/82063
* gcc-interface/misc.c (gnat_handle_option): Change function
argument
to HOST_WIDE_INT.

gcc/brig/ChangeLog:
* brig/brig-lang.c (brig_langhook_handle_option): Change function
argument to HOST_WIDE_INT.

gcc/c-family/ChangeLog:

PR middle-end/82063
* c-common.h (c_common_handle_option): Change function argument
to HOST_WIDE_INT.
* c-opts.c (c_common_init_options): Same.
(c_common_handle_option): Same.  Remove special handling of
OPT_Walloca_larger_than_ and OPT_Wvla_larger_than_.
* c.opt (-Walloc-size-larger-than, -Walloca-larger-than): Change
options to take a HOST_WIDE_INT argument and accept a byte-size
suffix.  Initialize.
(-Wvla-larger-than): Same.
(-Wno-alloc-size-larger-than, -Wno-alloca-larger-than): New.
(-Wno-vla-larger-than): Same.


gcc/fortran/ChangeLog:

PR middle-end/82063
* gfortran.h (gfc_handle_option): Change function argument
to HOST_WIDE_INT.
* options.c (gfc_handle_option): Same.

gcc/go/ChangeLog:

PR middle-end/82063
* go-lang.c (go_langhook_handle_option): Change function argument
to HOST_WIDE_INT.

gcc/lto/ChangeLog:

PR middle-end/82063
* lto-lang.c (lto_handle_option): Change function argument
to HOST_WIDE_INT.

gcc/testsuite/ChangeLog:

PR middle-end/82063
* gcc.dg/Walloc-size-larger-than-16.c: Adjust.
* gcc.dg/Walloca-larger-than.c: New test.
* gcc.dg/Wframe-larger-than-2.c: New test.
* gcc.dg/Wlarger-than3.c: New test.
* gcc.dg/Wvla-larger-than-3.c: New test.

gcc/ChangeLog:

PR middle-end/82063
* builtins.c (expand_builtin_alloca): Adjust.
* calls.c (alloc_max_size): Simplify.
* cgraphunit.c (cgraph_node::expand): Adjust.
* common.opt (larger_than_size, warn_frame_larger_than): Remove
variables.
(frame_larger_than_size): Same.

Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-22 Thread Martin Sebor

On 07/22/2018 02:00 AM, Bernd Edlinger wrote:

On 07/21/18 00:15, Martin Sebor wrote:

On 07/20/2018 08:51 AM, Bernd Edlinger wrote:

Martin,

when I look at tree-ssa-forwprop.c:

   str1 = string_constant (src1, &off1);
   if (str1 == NULL_TREE)
 break;
   if (!tree_fits_uhwi_p (off1)
   || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0
   || compare_tree_int (len1, TREE_STRING_LENGTH (str1)
  - tree_to_uhwi (off1)) > 0
   || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE
   || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1)))
  != TYPE_MODE (char_type_node))
 break;

I don't think it is a good idea to let string_constant return
strings which have not necessarily valid content after the first
NUL byte.  It looks like the content has to be valid up to
TREE_STRING_LENGTH.


I'm not sure I understand when this could happen.  From the patch
below it sounds like you are concerned about cases like:

   const char a[4] = "abc\000\000";

where the STRING_CST is bigger than the array.  But the string
constant has valid content here up to TREE_STRING_LENGTH.  Am
I missing something?  (A test case would be helpful.)



No, I mean something like:

$ cat y.c
const char a[2][3] = { "1234", "xyz" };
char b[6];

int main ()
{
   __builtin_memcpy(b, a, 4);
   __builtin_memset(b + 4, 'a', 2);
   __builtin_printf("%.6s\n", b);
}
$ gcc y.c
y.c:1:24: warning: initializer-string for array of chars is too long
  const char a[2][3] = { "1234", "xyz" };
 ^~
y.c:1:24: note: (near initialization for 'a[0]')
$ ./a.out
1234aa

but expected would be "123xaa".


Hmm.  I assumed this was undefined in C but after double
checking I'm not sure.  If it's in fact valid and the excess
elements are required to be ignored I'll of course fix it in
a subsequent patch.  Let me find out.


In any case, unless the concern is specifically related to
this bug fix let's discuss it separately so I can fix this
bug.  I'm not opposed to making further changes here (in
fact, I have one in the queue and two others that I raised
in Bugzilla in response to our discussion so far), I just
want to avoid mission creep.



I think when you touch a function you should fix it completely
or restrict it to the subset that is known to be safe, and not
leave lots of already known bugs behind.


I find it preferable to open a new bug for separate problems
and tackle each on its own.  That makes it possible to track
separate bugs independently, make sure each fix is adequately
tested, and backport patches to release branches as necessary.
Wholesale changes tend to make this more difficult.

If the code in your test case is in fact well-defined then
it's especially important that a test case be added for it
because none exists yet, and opening a separate bug makes
sure this happens.

Martin


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-22 Thread Martin Sebor

On 07/22/2018 03:03 AM, Bernd Edlinger wrote:

On 07/21/18 01:58, Martin Sebor wrote:

On 07/20/2018 03:11 PM, Bernd Edlinger wrote:

I think I have found a valid test case where the latest patch
does generate invalid code:


This is due to another bug in string_constant() that's latent
on trunk but that's exposed by the patch.  Trunk only "works"
because of a bug/limitation in c_strlen() that the patch fixes
or removes.



I am not sure if that falls under the definition of "latent" bug.
A latent bug would be something unexpected in a completely different
area of the compiler that is triggered by an improved optimization
or a fixed bug elsewhere.


The underlying root cause is the handling of POINTER_PLUS
expressions in string_constant().  The original code (before
the handling of aggregates was added) just dealt with string
constants.  The new code does much more but doesn't get it
quite right in these cases.



I think you should be much more careful with the expressions
you evaluate in string_constant.  For instance when you look
at get_addr_base_and_unit_offset, you'll see it walks through
all kinds of type casts, VIEW_CONVERT_EXPR and MEM_REF with
nonzero displacement.  When you see something like that do
not fold that on the assumption that the source code does not
have undefined behavior.

So I'd say you should add a check that there is no type
cast in the expression you parse.  If that is the case,
your optimization will certainly be wrong.


There are no casts here.  The pointer to array case is just
something I didn't think of, that's all.  The bug is in not
rejecting those.  As I explained, the original code handled
just string literals and constant character arrays.  Current
trunk deals with all kinds of constants and doesn't get all
the constraints right.

Martin


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-22 Thread Martin Sebor

On 07/22/2018 04:47 PM, Martin Sebor wrote:

On 07/22/2018 02:00 AM, Bernd Edlinger wrote:

On 07/21/18 00:15, Martin Sebor wrote:

On 07/20/2018 08:51 AM, Bernd Edlinger wrote:

Martin,

when I look at tree-ssa-forwprop.c:

   str1 = string_constant (src1, &off1);
   if (str1 == NULL_TREE)
 break;
   if (!tree_fits_uhwi_p (off1)
   || compare_tree_int (off1, TREE_STRING_LENGTH
(str1) - 1) > 0
   || compare_tree_int (len1, TREE_STRING_LENGTH (str1)
  - tree_to_uhwi (off1))
> 0
   || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE
   || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1)))
  != TYPE_MODE (char_type_node))
 break;

I don't think it is a good idea to let string_constant return
strings which have not necessarily valid content after the first
NUL byte.  It looks like the content has to be valid up to
TREE_STRING_LENGTH.


I'm not sure I understand when this could happen.  From the patch
below it sounds like you are concerned about cases like:

   const char a[4] = "abc\000\000";

where the STRING_CST is bigger than the array.  But the string
constant has valid content here up to TREE_STRING_LENGTH.  Am
I missing something?  (A test case would be helpful.)



No, I mean something like:

$ cat y.c
const char a[2][3] = { "1234", "xyz" };
char b[6];

int main ()
{
   __builtin_memcpy(b, a, 4);
   __builtin_memset(b + 4, 'a', 2);
   __builtin_printf("%.6s\n", b);
}
$ gcc y.c
y.c:1:24: warning: initializer-string for array of chars is too long
  const char a[2][3] = { "1234", "xyz" };
 ^~
y.c:1:24: note: (near initialization for 'a[0]')
$ ./a.out
1234aa

but expected would be "123xaa".


Hmm.  I assumed this was undefined in C but after double
checking I'm not sure.  If it's in fact valid and the excess
elements are required to be ignored I'll of course fix it in
a subsequent patch.  Let me find out.


The WG14 convener confirmed that this is indeed undefined.
The words that apply to this case (as well as all the others)
are in 6.7.9, p2:

  No initializer shall attempt to provide a value for an object
  not contained within the entity being initialized.

If GCC wants to make this well-defined and guarantee that
the excess elements are stripped (I'm not opposed to it)
we need to treat it should be treated as an enhancement
request, so the feature can be documented and properly
tested (I'm not opposed to it but I'm not going to champion
it either.)  Otherwise, I see no reason for a change.

Martin


Re: [PATCH] specify large command line option arguments (PR 82063)

2018-07-23 Thread Martin Sebor

On 07/23/2018 02:18 AM, Jakub Jelinek wrote:

On Sun, Jul 22, 2018 at 04:43:17PM -0600, Martin Sebor wrote:

OK with the nit fixed.  IF you need to update the doc changes as a
result of the -faligned-* changes, those are pre-approved.



I had to adjust a few more tests and make a couple of minor
tweaks as I noticed a coupled of test failures that I had
previously missed but the result was didn't show any more
regressions on x86_64.

Committed in r262910.



This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86624

This may impact all 32-bit hosts.


r262923 should have fixed it.


You haven't posted the patch you've committed, nobody has reviewed it and it
caused quite a lot regressions:
make check-gcc check-c++-all RUNTESTFLAGS="--target_board=unix\{-m32,-m64\} 
dg.exp='*alloc* pr42611.c'"
One can revert his own patch or parts of it, but for doing something
completely different you need to seek approval.


I committed the fix quickly to unblock those whose bootstrap was
failing due the ILP32 warning.  That seems like the right thing
to do, certainly preferable to reverting a large patch due to
a trivial oversight.

The fix doesn't do anything different than what the original
approved solution does.  It simply applies the same approach
used elsewhere to this instance of the warning.

There are outstanding test failures on ILP32 due to a bug or
limitation in the original approach (I raised bug 86631 for
those yesterday and I will look into those this week).



The patch makes very different behavior by default between 32-bit and 64-bit
targets, that is certainly undesirable.


The difference is unintentional (see bug 86631).



And as mentioned in the PR, I'm still arguing we should not make these
warnings on by default (even -Wall/-Wextra is questionable, but by
default?);


Warnings about excessive allocation (of any kind) have been
enabled by default for a couple of releases now under
-Walloc-size-larger-than=.  As I explained in the patch
description, my goal was not to change the limit under which
they trigger, at least not initially, or the logic that controls
the conditions under which they are issued.

The instance of the warning that showed up in the ILP32 build
is not intended to be enabled by default.  (It complains when
the pass is unable to determine whether an alloca call is
bounded).  There either were no tests that exercise it in
the LP64 test suite so I missed adding the same condition for
it as for the other instances of it that are not meant to be
enabled by default.  The patch I committed yesterday simply
adds that condition.

Martin


Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-23 Thread Martin Sebor

On 07/23/2018 02:05 AM, Jakub Jelinek wrote:

On Sun, Jul 22, 2018 at 04:47:45PM -0600, Martin Sebor wrote:

No, I mean something like:

$ cat y.c
const char a[2][3] = { "1234", "xyz" };
char b[6];

int main ()
{
   __builtin_memcpy(b, a, 4);
   __builtin_memset(b + 4, 'a', 2);
   __builtin_printf("%.6s\n", b);
}
$ gcc y.c
y.c:1:24: warning: initializer-string for array of chars is too long
  const char a[2][3] = { "1234", "xyz" };
 ^~
y.c:1:24: note: (near initialization for 'a[0]')
$ ./a.out
1234aa

but expected would be "123xaa".


Hmm.  I assumed this was undefined in C but after double
checking I'm not sure.  If it's in fact valid and the excess
elements are required to be ignored I'll of course fix it in
a subsequent patch.  Let me find out.


If we just warn about the initializer and treat it some way, an optimization
should not change how the initializer is treated.
The memcpy and memset themselves must be valid and they should just copy
whatever is in the initializer without optimizations.


The calls are valid and the initializer doesn't change with or
without optimization.  The concern is that the string_constant
folds this case and returns the whole initializer rather than
taking care to avoid folding it at all, or returning just
the leading portion(*).

Since the code is undefined (and since there is a warning for
it that's enabled by default) it shouldn't matter what happens
in this case.  But if it's thought to be preferable to do either
of the other two (avoid folding or returning what fits, if
the caller asks for a non-nul terminated array) I'm fine making
that change.

It seems to me that the excessive characters should be stripped
by the front-end.  That way the middle-end won't have to worry
about what to do with apparently contradictory data.

Martin

[*] The initial (non-nul terminated) portion of the string
would only be returned to callers that explicitly request it
and are prepared handle it -- it's not on trunk yet but it is
implemented in the followup patch for bug 86552 as a mechanism
to detect uses of non-nul terminated arrays in string functions.


[PATCH] include more detail in -Warray-bounds (PR 86650)

2018-07-23 Thread Martin Sebor

(David, I'm hoping your your help here.  Please see the end.)

While looking into a recent -Warray-bounds instance in Glibc
involving inlining of large functions it became apparent that
GCC could do a better job of pinpointing the source of
the problem.

The attached patch makes a few adjustments to both
the pretty printer infrastructure and to VRP to make this
possible.  The diagnostic pretty printer already has directives
to print the inlining context for both tree and gcall* arguments,
so most of the changes just adjust things to be able to pass in
gimple* argument instead.

The only slightly interesting change is to print the declaration
to which the out-of-bounds array refers if one is known.

Tested on x86_64-linux with one regression.

The regression is in the gcc.dg/Warray-bounds.c test: the column
numbers of the warnings are off.  Adding the %G specifier to
the array bounds warnings in VRP has the unexpected effect of
expanding the extent of the underling. For instance, for a test
case like this:

  int a[10];

  void f (void)
  {
a[-1] = 0;
  }

from the expected:

   a[-1] = 0;
   ~^~~~

to this:

  a[-1] = 0;
   ~~^~~

David, do you have any idea how to avoid this?

Martin
PR tree-optimization/86650 -  -Warray-bounds missing inlining context

gcc/c/ChangeLog:

	PR tree-optimization/86650
	* c-objc-common.c (c_tree_printer): Adjust.

gcc/c-family/ChangeLog:

	PR tree-optimization/86650
	* c-format.c (local_gcall_ptr_node): Rename...
	 (local_gimple_ptr_node): ...to this.
	* c-format.h (T89_G): Adjust.

gcc/cp/ChangeLog:

	PR tree-optimization/86650
	* error.c (cp_printer): Adjust.

gcc/ChangeLog:

	PR tree-optimization/86650
	* gimple-pretty-print.c (percent_G_format): Simplify.
	* tree-diagnostic.c (default_tree_printer): Adjust.
	* tree-pretty-print.c (percent_K_format): Add argument.
	* tree-pretty-print.h: Add argument.
	* tree-vrp.c (vrp_prop::check_array_ref): Add argument.
	Print an inform message.
	(vrp_prop::check_mem_ref): Same.
	(check_array_bounds): Pass gimple statement to callees.
	* gimple-fold.c (gimple_fold_builtin_strncpy): Adjust.
	* gimple-ssa-warn-restrict.h (check_bounds_or_overlap): Replace
	gcall* argument with gimple*.
	* gimple-ssa-warn-restrict.c (check_call): Same.
	(wrestrict_dom_walker::before_dom_children): Same.
	(builtin_access::builtin_access): Same.
	(check_bounds_or_overlap): Same.
	* tree-ssa-ccp.c (pass_post_ipa_warn::execute): Adjust.
	* tree-ssa-strlen.c (maybe_diag_stxncpy_trunc): Adjust.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86650
	* gcc.dg/Warray-bounds-33.c: New test.
	* gcc.dg/format/gcc_diag-10.c: Adjust.

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index a0192dd..705bffb 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -56,7 +56,7 @@ struct function_format_info
 
 /* Initialized in init_dynamic_diag_info.  */
 static GTY(()) tree local_tree_type_node;
-static GTY(()) tree local_gcall_ptr_node;
+static GTY(()) tree local_gimple_ptr_node;
 static GTY(()) tree locus;
 
 static bool decode_format_attr (tree, function_format_info *, int);
@@ -691,7 +691,7 @@ static const format_char_info gcc_diag_char_table[] =
 
   /* Custom conversion specifiers.  */
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","\"",   NULL },
   /* K requires a "tree" argument at runtime.  */
   { "K",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","\"",   NULL },
@@ -722,7 +722,7 @@ static const format_char_info gcc_tdiag_char_table[] =
   { "E", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
   { "K", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G", 1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
   { "v",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
@@ -754,7 +754,7 @@ static const format_char_info gcc_cdiag_char_table[] =
   { "E",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
   { "K",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, 

Re: [PATCH] include more detail in -Warray-bounds (PR 86650)

2018-07-23 Thread Martin Sebor

On 07/23/2018 07:20 PM, David Malcolm wrote:

On Mon, 2018-07-23 at 17:49 -0600, Martin Sebor wrote:

(David, I'm hoping your your help here.  Please see the end.)

While looking into a recent -Warray-bounds instance in Glibc
involving inlining of large functions it became apparent that
GCC could do a better job of pinpointing the source of
the problem.

The attached patch makes a few adjustments to both
the pretty printer infrastructure and to VRP to make this
possible.  The diagnostic pretty printer already has directives
to print the inlining context for both tree and gcall* arguments,
so most of the changes just adjust things to be able to pass in
gimple* argument instead.

The only slightly interesting change is to print the declaration
to which the out-of-bounds array refers if one is known.

Tested on x86_64-linux with one regression.

The regression is in the gcc.dg/Warray-bounds.c test: the column
numbers of the warnings are off.  Adding the %G specifier to
the array bounds warnings in VRP has the unexpected effect of
expanding the extent of the underling. For instance, for a test
case like this:

   int a[10];

   void f (void)
   {
 a[-1] = 0;
   }

from the expected:

a[-1] = 0;
~^~~~

to this:

   a[-1] = 0;
~~^~~

David, do you have any idea how to avoid this?


Are you referring to the the various places in your patch (in e.g.
  vrp_prop::check_array_ref
  vrp_prop::check_mem_ref
  vrp_prop::search_for_addr_array
) where the patch changed things from this form:

  warning_at (location, OPT_Warray_bounds,
  "[...format string...]", ARGS...);

to this form:

  warning_at (location, OPT_Warray_bounds,
  "%G[...format string...]", stmt, ARGS...);


Yes.



If so, there are two location_t values of interest here:
(a) the "location" value, and
(b) gimple_location (stmt)

My recollection is that %G and %K override the "location" value passed
in as the first param to the diagnostic call, overwriting it within the
diagnostic_info's text_info with the location value from the %K/%G
(which also set up the pp_ti_abstract_origin of the text_info from the
block information stashed in the ad-hoc data part of the location, so
that the pretty-printer prints the inlining chain).


Would having the pretty printer restore the location and
the block after it's done printing the context and before
processing the rest of the format string fix it?  (I have
only a vague idea how this all works so I'm not sure if
this even makes sense.)



[aside, why don't we always just print the inlining chain?  IIRC, %K
and %G feel too much like having to jump through hoops to me, given
that gimple_block is looking at gimple_location anyway, why not just
use the location in the location_t's ad-hoc data; I have a feeling
there's a PR open about this, but I don't have it to hand right now].


That would make sense to me.  I think that's also what we
agreed would be the way forward the last time we discussed
this.



It looks like the old location was (a), and now you're seeing (b),
since (b) is the location of the statement.


Yes, I think that's it.



Whether or not this is a problem is difficult to tell; what does the
full diagnostic look like? (you only quoted the diagnostic_show_locus
part of it).


It looks like this:

e.c: In function ‘f’:
e.c:5:9: warning: array subscript -1 is below array bounds of ‘int[10]’ 
[-Warray-bounds]

   a[-1] = 0;
   ~~^~~
e.c:1:5: note: while referencing ‘a’
 int a[10];
 ^

Martin


Re: committed: remove redundant -Wall from -Warray-bounds (PR 82063)

2018-07-24 Thread Martin Sebor

On 07/24/2018 03:24 AM, Franz Sirl wrote:

Am 2018-07-20 um 23:22 schrieb Martin Sebor:

As the last observation in PR 82063 Jim points out that

   Both -Warray-bounds and -Warray-bounds= are listed in the c.opt
   file as being enabled by -Wall, but they are the same option,
   and it causes this one option to be processed twice in the
   C_handle_option_auto function in the generated options.c file.
   It gets set to the same value twice, so it does work as intended,
   but this is wasteful.

I have removed the redundant -Wall from the first option and
committed the change as obvious in r262912.


Hi Martin,

this looks related to PR 68845 and my patch in there. I never posted it
to gcc-patches because I couldn't find a definitive answer on how
options duplicated between common.opt and c-family/c.opt are supposed to
be handled.
For example, Warray-bounds in common.opt is a separate option (not an
alias to Warray-bounds=), leading to separate enums for them. Is this
intended? Warray-bounds seemed to be the only option with an equal sign
doing it like that at that time. Now Wcast-align is doing the same...

Can you shed some light on this?


-Warray-bounds= (the form that takes an argument) was added in
r219577.  Before then, only the plain form existed.  If I had
to guess, the interplay between the two options (as opposed to
making the latter an alias for the new option) wasn't considered.
I didn't think of it until now either.  Your patch seems like
the right solution to me.  Let me know if you will submit it.
If not, I posted the patch below that touches this area and
that will likely need updating so I can roll your change into
it:

https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01286.html

Martin


Re: [PATCH] Make strlen range computations more conservative

2018-07-24 Thread Martin Sebor

On 07/24/2018 01:59 AM, Bernd Edlinger wrote:

Hi!

This patch makes strlen range computations more conservative.

Firstly if there is a visible type cast from type A to B before passing
then value to strlen, don't expect the type layout of B to restrict the
possible return value range of strlen.

Furthermore use the outermost enclosing array instead of the
innermost one, because too aggressive optimization will likely
convert harmless errors into security-relevant errors, because
as the existing test cases demonstrate, this optimization is actively
attacking string length checks in user code, while and not giving
any warnings.


I strongly object to this change.

As you know, I am actively working in this area -- I asked you
to hold off on submitting patches for it until the review of
bug 86532 has completed.  It's not just unhelpful but
disrespectful of you to ignore my request and to try to make
changes you know I will likely have a strong opinion on in
spite of it, and without as much as involving me in the proposal.

As the author of this code and of many security improvements
in GCC I also find your characterization above of "actively
attacking" user code insulting.  If security is your main
concern then helping detect the invalid code you are trying
to accommodate with this change would be the right thing to
do.  One of the reasons for the tight bound is to build
a better foundation for the detection of buffer overflow
in string functions.  Relaxing the bound could make
the detection more difficult.

So again, I strongly object to both this change and to your
conduct.

Martin


Re: [PATCH] include more detail in -Warray-bounds (PR 86650)

2018-07-24 Thread Martin Sebor

On 07/24/2018 11:05 AM, David Malcolm wrote:

On Mon, 2018-07-23 at 20:56 -0600, Martin Sebor wrote:

On 07/23/2018 07:20 PM, David Malcolm wrote:

On Mon, 2018-07-23 at 17:49 -0600, Martin Sebor wrote:

(David, I'm hoping your your help here.  Please see the end.)

While looking into a recent -Warray-bounds instance in Glibc
involving inlining of large functions it became apparent that
GCC could do a better job of pinpointing the source of
the problem.

The attached patch makes a few adjustments to both
the pretty printer infrastructure and to VRP to make this
possible.  The diagnostic pretty printer already has directives
to print the inlining context for both tree and gcall* arguments,
so most of the changes just adjust things to be able to pass in
gimple* argument instead.

The only slightly interesting change is to print the declaration
to which the out-of-bounds array refers if one is known.

Tested on x86_64-linux with one regression.

The regression is in the gcc.dg/Warray-bounds.c test: the column
numbers of the warnings are off.  Adding the %G specifier to
the array bounds warnings in VRP has the unexpected effect of
expanding the extent of the underling. For instance, for a test
case like this:

   int a[10];

   void f (void)
   {
 a[-1] = 0;
   }

from the expected:

a[-1] = 0;
~^~~~

to this:

   a[-1] = 0;
~~^~~

David, do you have any idea how to avoid this?


Are you referring to the the various places in your patch (in e.g.
  vrp_prop::check_array_ref
  vrp_prop::check_mem_ref
  vrp_prop::search_for_addr_array
) where the patch changed things from this form:

  warning_at (location, OPT_Warray_bounds,
  "[...format string...]", ARGS...);

to this form:

  warning_at (location, OPT_Warray_bounds,
  "%G[...format string...]", stmt, ARGS...);


Yes.



If so, there are two location_t values of interest here:
(a) the "location" value, and
(b) gimple_location (stmt)

My recollection is that %G and %K override the "location" value
passed
in as the first param to the diagnostic call, overwriting it within
the
diagnostic_info's text_info with the location value from the %K/%G
(which also set up the pp_ti_abstract_origin of the text_info from
the
block information stashed in the ad-hoc data part of the location,
so
that the pretty-printer prints the inlining chain).


Would having the pretty printer restore the location and
the block after it's done printing the context and before
processing the rest of the format string fix it?  (I have
only a vague idea how this all works so I'm not sure if
this even makes sense.)


Structurally, it looks like this:

Temporaries during the emission of   |  Long-lived stuff:
the diagnostic:  |
 |+-+
++   ||global_dc|
|diagnostic_info |   |+-+
|++  |   |
||text_info:  |  |   |
||  m_richloc-+--+---> rich_location |
||  x_data+--+---+--> block (via pp_ti_abstract_origin)
|++  |   |
++   |
 |

The location_t of the diagnostic is stored in the rich_location.

Calling:
  warning_at (location)
creates a rich_location wrapping "location" and uses it as above.

During formatting, the %K/%G codes set text_info.x_data via
pp_ti_abstract_origin and overwrite the location_t in the
rich_location.

So in theory we could have a format code that sets the block and
doesn't touch the rich_location.  But that seems like overkill to me.


I wasn't thinking of a new format.  Rather, I thought the %K
would save the current block and location (set by the location
argument to warning_at), then after printing the inlining stack
but before printing the rest of the diagnostic the printer would
restore the saved block and location.  I still don't know enough
to tell if it would work.

In any event, if it's easier to always print the inlining stack
and get rid of %K and %G then that would be preferable.  I don't
think they are used for any other purpose (i.e., they are always
used as the first directive in a format string).


[aside, why don't we always just print the inlining chain?  IIRC,
%K
and %G feel too much like having to jump through hoops to me, given
that gimple_block is looking at gimple_location anyway, why not
just
use the location in the location_t's ad-hoc data; I have a feeling
there's a PR open about this, but I don't have it to hand right
now].


That would make sense to me.  I think that's also what we
agreed would be the way forward the last time we discussed
this.


(nods)


So how do we go about making this happen?  Somewhat selfishly
I was sort of waiting for you to take the lead on it since
you're much more 

Re: [PATCH] fix a couple of bugs in const string folding (PR 86532)

2018-07-24 Thread Martin Sebor

On 07/20/2018 04:20 AM, Richard Biener wrote:

On Thu, 19 Jul 2018, Martin Sebor wrote:


Here's one more update with tweaks addressing a couple more
of Bernd's comments:

1) correct the use of TREE_STRING_LENGTH() where a number of
array elements is expected and not bytes
2) set CHARTYPE as soon as it's first determined rather than
trying to extract it again later


Please look at Bernds followup comments.  One additional note:

I see you are ultimatively using CHARTYPE to get at the size
of the access.  That is wrong.

   if (TREE_CODE (arg) == ADDR_EXPR)
 {
+  tree argtype = TREE_TYPE (arg);
+  chartype = argtype;
+
   arg = TREE_OPERAND (arg, 0);
   tree ref = arg;
   if (TREE_CODE (arg) == ARRAY_REF)
{

so the "access" is of size array_ref_element_size (arg) here.  You
may not simply use TYPE_SIZE_UNIT of sth.

That is, lookign at current trunk,

  if (TREE_CODE (arg) == ADDR_EXPR)
{
  arg = TREE_OPERAND (arg, 0);
  tree ref = arg;
  if (TREE_CODE (arg) == ARRAY_REF)
{
  tree idx = TREE_OPERAND (arg, 1);
  if (TREE_CODE (idx) != INTEGER_CST)
{
  /* Extract the variable index to prevent
 get_addr_base_and_unit_offset() from failing due to
 it.  Use it later to compute the non-constant offset
 into the string and return it to the caller.  */
  varidx = idx;
  ref = TREE_OPERAND (arg, 0);

you should scale the index here by array_ref_element_size (arg).
Or simply rewrite this to instead of using get_addr_base_and_unit_offset,
use get_inner_reference which does all that magic for you.

That is, you shouldn't need chartype.


I've made use of size array_ref_element_size() here as you suggest
and eliminated the type.  For the purposes of testing though,
I haven't been able to come up with a test case that would have
the function return something other than TYPE_SIZE_UNIT().  IIUC,
the function is used to compute the size of elements of overaligned
types and there is no way that I know of to create an array of
overaligned characters.  (If there is a way to exercise this I'd
appreciate a test case so I can add it to the test suite).

I've also fixed the other bug Bernd pointed with pointers to arrays.
The fix seems small enough that it makes sense to handle at the same
time as this bug.

Attached is an update with these changes.

Martin



Richard.



On 07/19/2018 01:49 PM, Martin Sebor wrote:

On 07/19/2018 01:17 AM, Richard Biener wrote:

On Wed, 18 Jul 2018, Martin Sebor wrote:


+  while (TREE_CODE (chartype) != INTEGER_TYPE)
+chartype = TREE_TYPE (chartype);

This is a bit concerning.  First under what conditions is chartype
not
going to be an INTEGER_TYPE?  And under what conditions will
extracting
its type ultimately lead to something that is an INTEGER_TYPE?


chartype is usually (maybe even always) pointer type here:

  const char a[] = "123";
  extern int i;
  n = strlen (&a[i]);


But your hunch was correct that the loop isn't safe because
the element type need not be an integer (I didn't know/forgot
that the function is called for non-strings too).  The loop
should be replaced by:

  while (TREE_CODE (chartype) == ARRAY_TYPE
 || TREE_CODE (chartype) == POINTER_TYPE)
chartype = TREE_TYPE (chartype);


As this function may be called "late" you need to cope with
the middle-end ignoring type changes and thus happily
passing int *** directly rather than (char *) of that.

Also doesn't the above yield int for int *[]?


I don't think it ever gets this far for either a pointer to
an array of int, or for an array of pointers to int.  So for
something like the following the function fails earlier:

  const int* const a[2] = { ... };
  const char* (const *p)[2] = &a;

  int f (void)
  {
return __builtin_memcmp (*p, "12345678", 8);
  }

(Assuming this is what you were asking about.)


I guess you really want

   if (POINTER_TYPE_P (chartype))
 chartype = TREE_TYPE (chartype);
   while (TREE_CODE (chartype) == ARRAY_TYPE)
 chartype = TREE_TYPE (chartype);

?


That seems to work too.  Attached is an update with this tweak.
The update also addresses some of Bernd's comments: it removes
the pointless second test in:

if (TREE_CODE (type) == ARRAY_TYPE
&& TREE_CODE (type) != INTEGER_TYPE)

the unused assignment to chartype in:

   else if (DECL_P (arg))
 {
   array = arg;
   chartype = TREE_TYPE (arg);
 }

and calls string_constant() instead of strnlen() to compute
the length of a generic string.

Other improvements  are possible in this area but they are
orthogonal to the bug I'm trying to fix so I'll post separate
patches for some of those.

Martin







PR tree-optimization/86622 - incorrect strlen of array of array plus variable offset
PR tree-o

Re: committed: remove redundant -Wall from -Warray-bounds (PR 82063)

2018-07-24 Thread Martin Sebor

On 07/24/2018 01:48 PM, Franz Sirl wrote:

Am 2018-07-24 um 17:35 schrieb Martin Sebor:

On 07/24/2018 03:24 AM, Franz Sirl wrote:

Am 2018-07-20 um 23:22 schrieb Martin Sebor:

As the last observation in PR 82063 Jim points out that

   Both -Warray-bounds and -Warray-bounds= are listed in the c.opt
   file as being enabled by -Wall, but they are the same option,
   and it causes this one option to be processed twice in the
   C_handle_option_auto function in the generated options.c file.
   It gets set to the same value twice, so it does work as intended,
   but this is wasteful.

I have removed the redundant -Wall from the first option and
committed the change as obvious in r262912.


Hi Martin,

this looks related to PR 68845 and my patch in there. I never posted it
to gcc-patches because I couldn't find a definitive answer on how
options duplicated between common.opt and c-family/c.opt are supposed to
be handled.
For example, Warray-bounds in common.opt is a separate option (not an
alias to Warray-bounds=), leading to separate enums for them. Is this
intended? Warray-bounds seemed to be the only option with an equal sign
doing it like that at that time. Now Wcast-align is doing the same...

Can you shed some light on this?


-Warray-bounds= (the form that takes an argument) was added in
r219577.  Before then, only the plain form existed.  If I had
to guess, the interplay between the two options (as opposed to
making the latter an alias for the new option) wasn't considered.
I didn't think of it until now either.  Your patch seems like
the right solution to me.  Let me know if you will submit it.
If not, I posted the patch below that touches this area and
that will likely need updating so I can roll your change into
it:

https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01286.html


I'll post a patch tomorrow, since I already have all the changes
available and tested here.

Note that one minor change with this patch is that with
-fdiagnostics-show-option the message will show -Warray-bounds= (equal
sign added) instead of -Warray-bounds.


That should be fine.

Martin



[PATCH] treat -Wxxx-larger-than=HWI_MAX special (PR 86631)

2018-07-24 Thread Martin Sebor

The very large option argument enhancement committed last week
inadvertently introduced an assumption about the LP64 data model
that makes the -Wxxx-larger-than options have a different effect
at their default documented setting of PTRDIFF_MAX between ILP32
and LP64.  As a result, the options are treated as suppressed in
ILP32 while triggering the expected warnings for allocations or
sizes in excess of the limit in ILP64.

To remove this I considered making it possible to use symbolic
constants like PTRDIFF_MAX as the option arguments so that
then defaults in the .opt files could be set to that.  Sadly,
options in GCC are processed before the values of constants
like PTRDIFF_MAX for the target are known, and deferring
the handling of just the -Wxxx-larger-than= options until
the constants have been computed would be too involved to
make it worth the effort.

To keep things simple I decided to have the code that handles
each of the affected options treat the HOST_WIDE_INT_MAX default
as a special request to substitute the value of PTRDIFF_MAX at
the time.

The attached patch implements this.

Tested on x86_64-linux with -m32/-m64.

Martin
PR middle-end/86631 - missing -Walloc-size-larger-than on ILP32 hosts

gcc/ChangeLog:

	PR middle-end/86631
	* calls.c (alloc_max_size): Treat HOST_WIDE_INT special.
	* gimple-ssa-warn-alloca.c (adjusted_warn_limit): New function.
	(pass_walloca::gate): Use it.
	(alloca_call_type): Same.
	(pass_walloca::execute): Same.
	* stor-layout.c (layout_decl): Treat HOST_WIDE_INT special.

gcc/testsuite/ChangeLog:

	PR middle-end/86631
	* g++.dg/Walloca1.C: Adjust.

Index: gcc/calls.c
===
--- gcc/calls.c	(revision 262951)
+++ gcc/calls.c	(working copy)
@@ -1222,9 +1222,12 @@ alloc_max_size (void)
   if (alloc_object_size_limit)
 return alloc_object_size_limit;
 
-  alloc_object_size_limit
-= build_int_cst (size_type_node, warn_alloc_size_limit);
+  HOST_WIDE_INT limit = warn_alloc_size_limit;
+  if (limit == HOST_WIDE_INT_MAX)
+limit = tree_to_shwi (TYPE_MAX_VALUE (ptrdiff_type_node));
 
+  alloc_object_size_limit = build_int_cst (size_type_node, limit);
+
   return alloc_object_size_limit;
 }
 
Index: gcc/gimple-ssa-warn-alloca.c
===
--- gcc/gimple-ssa-warn-alloca.c	(revision 262951)
+++ gcc/gimple-ssa-warn-alloca.c	(working copy)
@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "intl.h"
 
+static unsigned HOST_WIDE_INT adjusted_warn_limit (bool);
+
 const pass_data pass_data_walloca = {
   GIMPLE_PASS,
   "walloca",
@@ -82,7 +84,9 @@ pass_walloca::gate (function *fun ATTRIBUTE_UNUSED
   // Warning is disabled when its size limit is greater than PTRDIFF_MAX
   // for the target maximum, which makes the limit negative since when
   // represented in signed HOST_WIDE_INT.
-  return warn_alloca_limit >= 0 || warn_vla_limit >= 0;
+  unsigned HOST_WIDE_INT max = tree_to_uhwi (TYPE_MAX_VALUE (ptrdiff_type_node));
+  return (adjusted_warn_limit (false) <= max
+	  || adjusted_warn_limit (true) <= max);
 }
 
 // Possible problematic uses of alloca.
@@ -127,6 +131,30 @@ struct alloca_type_and_limit {
   alloca_type_and_limit (enum alloca_type type) : type(type) { }
 };
 
+/* Return the value of the argument N to -Walloca-larger-than= or
+   -Wvla-larger-than= adjusted for the target data model so that
+   when N == HOST_WIDE_INT_MAX, the adjusted value is set to
+   PTRDIFF_MAX on the target.  This is done to prevent warnings
+   for unknown/unbounded allocations in the "permissive mode"
+   while still diagnosing excessive and necessarily invalid
+   allocations.  */
+
+static unsigned HOST_WIDE_INT
+adjusted_warn_limit (bool idx)
+{
+  static HOST_WIDE_INT limits[2];
+  if (limits[idx])
+return limits[idx];
+
+  limits[idx] = idx ? warn_vla_limit : warn_alloca_limit;
+  if (limits[idx] != HOST_WIDE_INT_MAX)
+return limits[idx];
+
+  limits[idx] = tree_to_shwi (TYPE_MAX_VALUE (ptrdiff_type_node));
+  return limits[idx];
+}
+
+
 // NOTE: When we get better range info, this entire function becomes
 // irrelevant, as it should be possible to get range info for an SSA
 // name at any point in the program.
@@ -309,11 +337,7 @@ alloca_call_type (gimple *stmt, bool is_vla, tree
 
   // Adjust warn_alloca_max_size for VLAs, by taking the underlying
   // type into account.
-  unsigned HOST_WIDE_INT max_size;
-  if (is_vla)
-max_size = warn_vla_limit;
-  else
-max_size = warn_alloca_limit;
+  unsigned HOST_WIDE_INT max_size = adjusted_warn_limit (is_vla);
 
   // Check for the obviously bounded case.
   if (TREE_CODE (len) == INTEGER_CST)
@@ -510,6 +534,8 @@ pass_walloca::execute (function *fun)
 	  struct alloca_type_and_limit t
 	= alloca_call_type (stmt, is_vla, &invalid_casted_type);
 
+	  unsigned HOST_WIDE_INT adjusted_alloca_limit
+	= adjusted_warn_limit (false);
 	  // Even if w

Re: [PATCH] treat -Wxxx-larger-than=HWI_MAX special (PR 86631)

2018-07-25 Thread Martin Sebor

On 07/25/2018 02:34 AM, Richard Biener wrote:

On Wed, Jul 25, 2018 at 4:07 AM Martin Sebor  wrote:


The very large option argument enhancement committed last week
inadvertently introduced an assumption about the LP64 data model
that makes the -Wxxx-larger-than options have a different effect
at their default documented setting of PTRDIFF_MAX between ILP32
and LP64.  As a result, the options are treated as suppressed in
ILP32 while triggering the expected warnings for allocations or
sizes in excess of the limit in ILP64.

To remove this I considered making it possible to use symbolic
constants like PTRDIFF_MAX as the option arguments so that
then defaults in the .opt files could be set to that.  Sadly,
options in GCC are processed before the values of constants
like PTRDIFF_MAX for the target are known, and deferring
the handling of just the -Wxxx-larger-than= options until
the constants have been computed would be too involved to
make it worth the effort.

To keep things simple I decided to have the code that handles
each of the affected options treat the HOST_WIDE_INT_MAX default
as a special request to substitute the value of PTRDIFF_MAX at
the time.

The attached patch implements this.


I wonder if documenting a special value of -1 would be easier for
users to use.  Your patch doesn't come with adjustments to
invoke.texi so I wonder how people could know of this special
handling?


I don't mean for the special value to be used except internally
for the defaults.  Otherwise, users wanting to override the default
will choose a value other than it.  I'm happy to document it in
the .opt file for internal users though.

-1 has the documented effect of disabling the warnings altogether
(-1 is SIZE_MAX) so while I agree that -1 looks better it doesn't
work.  (It would need more significant changes.)

I don't consider this the most elegant design but it's the best
I could think of short of complicating things even more.

Martin


Re: [PATCH] treat -Wxxx-larger-than=HWI_MAX special (PR 86631)

2018-07-25 Thread Martin Sebor

On 07/25/2018 08:57 AM, Jakub Jelinek wrote:

On Wed, Jul 25, 2018 at 08:54:13AM -0600, Martin Sebor wrote:

I don't mean for the special value to be used except internally
for the defaults.  Otherwise, users wanting to override the default
will choose a value other than it.  I'm happy to document it in
the .opt file for internal users though.

-1 has the documented effect of disabling the warnings altogether
(-1 is SIZE_MAX) so while I agree that -1 looks better it doesn't
work.  (It would need more significant changes.)


The variable is signed, so -1 is not SIZE_MAX.  Even if -1 disables it, you
could use e.g. -2 or other negative value for the other special case.


The -Wxxx-larger-than=N distinguish three ranges of argument
values (treated as unsigned):

  1.  [0, HOST_WIDE_INT_MAX)
  2.  HOST_WIDE_INT_MAX
  3.  [HOST_WIDE_INT_MAX + 1, Infinity)

(1) implies warnings for allocations in excess of the size.  For
the alloca/VLA warnings it also means warnings for allocations
that may be unbounded.  (This feels like a bit of a wart.)

(2) implies warnings for allocations in excess of PTRDIFF_MAX
only.  For the alloca/VLA warnings it also disables warnings
for allocations that may be unbounded (also a bit of a wart)

(3) isn't treated consistently by all options (yet) but for
the alloca/VLA warnings it means no warnings.  Since
the argument value is stored in signed HOST_WIDE_INT this
range is strictly negative.

Any value from (3) could in theory be made special and used
instead of -1 or HOST_WIDE_INT_MAX as a placeholder for
PTRDIFF_MAX.  But no matter what the choice is, it removes
the value from the usable set in (3) (i.e., it doesn't have
the expected effect of disabling the warning).

I don't see the advantage of picking -2 over any other negative
number.  As inelegant as the current choice of HOST_WIDE_INT_MAX
may be, it seems less arbitrary and less intrusive than picking
a random value from the negative range.

Martin

PS The handling of these ranges isn't consistent across all
the options because they were each developed independently
and without necessarily aiming for it.  I think making them
more consistent would be nice as a followup patch.  I would
expect consistency to be achievable more easily if baking
special cases into the design is kept to a minimum.  It
would also help to remove some existing special cases.
E.g., by introducing a distinct option for the special case
of diagnosing unbounded alloca/VLA allocations and removing
it from -W{alloca,vla}-larger-than=.


Re: [PATCH] Make strlen range computations more conservative

2018-07-25 Thread Martin Sebor

One other example I have found in one of the test cases:

char c;

if (strlen(&c) != 0) abort();

this is now completely elided, but why?


Because the only string that can be stored in an array of one
element is the empty string.  Expanding that call to strlen()
is in all likelihood going to result in zero.  The only two
cases when it doesn't are invalid: either the character is
uninitialized (GCC may not see it so it may not warn about
it), or it is initialized to a non-zero value (which makes
it not a string -- I have submitted an enhancement to detect
a subset of these cases).  The cases where the user expects
to be able to read past the end of the character and what
follows are both exceedingly unlikely and also undefined.
So in my view, it is safer to fold the call into zero than
not.

  Is there a code base where

that is used?  I doubt, but why do we care to eliminate something
stupid like that?  If we would emit a warning for that I'm fine with it,
But if we silently remove code like that I don't think that it
will improve anything.  So I ask, where is the code base which
gets an improvement from that optimization?


Jonathan suggested issuing a warning in this case.  That
sounds reasonable to me, but not everyone is in favor of
issuing warnings out of the folder.  (I'm guilty of having
done that in a few important cases despite it.)  I am fully
supportive of enhancing warnings to detect more problems,
but I am opposed to gratuitously removing solutions that
have been put in after a great deal of thought, without as
much as bring them up for discussion.


This work concentrates mostly on avoiding to interfere with code that
actually deserves warnings, but which is not being warned about.


Then help by adding the missing warnings.  It will help drive
improvements to user code and will ultimately lead to greater
efficiency.  Dumbing down the analyses and accommodating
undefined code is not a good way forward.  It will only lead
to a kludgy compiler with hacks for this or that bad practice
and compromise our ability to implement new optimizations (and
detect more bugs).

Martin


Re: [PATCH] Make strlen range computations more conservative

2018-07-25 Thread Martin Sebor

BUT - for the string_constant and c_strlen functions we are,
in all cases we return something interesting, able to look
at an initializer which then determines that type.  Hopefully.
I think the strlen() folding code when it sets SSA ranges
now looks at types ...?

Consider

struct X { int i; char c[4]; int j;};
struct Y { char c[16]; };

void foo (struct X *p, struct Y *q)
{
  memcpy (p, q, sizeof (struct Y));
  if (strlen ((char *)(struct Y *)p + 4) < 7)
abort ();
}

here the GIMPLE IL looks like

  const char * _1;

   [local count: 1073741825]:
  _5 = MEM[(char * {ref-all})q_4(D)];
  MEM[(char * {ref-all})p_6(D)] = _5;
  _1 = p_6(D) + 4;
  _2 = __builtin_strlen (_1);

and I guess Martin would argue that since p is of type struct X
+ 4 gets you to c[4] and thus strlen of that cannot be larger
than 3.  But of course the middle-end doesn't work like that
and luckily we do not try to draw such conclusions or we
are somehow lucky that for the testcase as written above we do not
(I'm not sure whether Martins changes in this area would derive
such conclusions in principle).


Only if the strlen argument were p->c.


NOTE - we do not know the dynamic type here since we do not know
the dynamic type of the memory pointed-to by q!  We can only
derive that at q+4 there must be some object that we can
validly call strlen on (where Martin again thinks strlen
imposes constrains that memchr does not - sth I do not agree
with from a QOI perspective)


The dynamic type is a murky area.  As you said, above we don't
know whether *p is an allocated object or not.  Strictly speaking,
we would need to treat it as such.  It would basically mean
throwing out all type information and treating objects simply
as blobs of bytes.  But that's not what GCC or other compilers do
either.  For instance, in the modified foo below, GCC eliminates
the test because it assumes that *p and *q don't overlap.  It
does that because they are members of structs of unrelated types
access to which cannot alias.  I.e., not just the type of
the access matters (here int and char) but so does the type of
the enclosing object.  If it were otherwise and only the type
of the access mattered then eliminating the test below wouldn't
be valid (objects can have their stored value accessed by either
an lvalue of a compatible type or char).

  void foo (struct X *p, struct Y *q)
  {
int j = p->j;
q->c[__builtin_offsetof (struct X, j)] = 0;
if (j != p->j)
  __builtin_abort ();
}

Clarifying (and adjusting if necessary) this area is among
the goals of the C object model proposal and the ongoing study
group.  We have been talking about some of these cases there
and trying to come up with ways to let code do what it needs
to do without compromising existing language rules, which was
the consensus position within WG14 when the study group was
formed: i.e., to clarify or reaffirm existing rules and, in
cases of ambiguity or where the standard is unintentionally
overly permissive), favor tighter rules over looser ones.

Martin



committed: removed directives ignored by DejaGnu

2018-07-25 Thread Martin Sebor

Aldy pointed out that the runtime test I added in r261705 to
exercise the new strnlen() built-in makes use of directives
that are ignored by the test harness.

I have removed the directives via r262981.

Martin


PING [PATCH] warn for strlen of arrays with missing nul (PR 86552)

2018-07-25 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01124.html

The fix for bug 86532 has been checked in so this enhancement
can now be applied on top of it (with only minor adjustments).

On 07/19/2018 02:08 PM, Martin Sebor wrote:

In the discussion of my patch for pr86532 Bernd noted that
GCC silently accepts constant character arrays with no
terminating nul as arguments to strlen (and other string
functions).

The attached patch is a first step in detecting these kinds
of bugs in strlen calls by issuing -Wstringop-overflow.
The next step is to modify all other handlers of built-in
functions to detect the same problem (not part of this patch).
Yet another step is to detect these problems in arguments
initialized using the non-string form:

  const char a[] = { 'a', 'b', 'c' };

This patch is meant to apply on top of the one for bug 86532
(I tested it with an earlier version of that patch so there
is code in the context that does not appear in the latest
version of the other diff).

Martin





Re: [PATCH] treat -Wxxx-larger-than=HWI_MAX special (PR 86631)

2018-07-26 Thread Martin Sebor

On 07/26/2018 02:38 AM, Richard Biener wrote:

On Wed, Jul 25, 2018 at 5:54 PM Martin Sebor  wrote:


On 07/25/2018 08:57 AM, Jakub Jelinek wrote:

On Wed, Jul 25, 2018 at 08:54:13AM -0600, Martin Sebor wrote:

I don't mean for the special value to be used except internally
for the defaults.  Otherwise, users wanting to override the default
will choose a value other than it.  I'm happy to document it in
the .opt file for internal users though.

-1 has the documented effect of disabling the warnings altogether
(-1 is SIZE_MAX) so while I agree that -1 looks better it doesn't
work.  (It would need more significant changes.)


The variable is signed, so -1 is not SIZE_MAX.  Even if -1 disables it, you
could use e.g. -2 or other negative value for the other special case.


The -Wxxx-larger-than=N distinguish three ranges of argument
values (treated as unsigned):

   1.  [0, HOST_WIDE_INT_MAX)
   2.  HOST_WIDE_INT_MAX
   3.  [HOST_WIDE_INT_MAX + 1, Infinity)


But it doesn't make sense for those to be host dependent.


It isn't when the values are handled by each warning.  That's
also the point of this patch: to remove this (unintended)
dependency.


I think numerical user input should be limited to [0, ptrdiff_max]
and cases (1) and (2) should be simply merged, I see no value
in distinguishing them.  -Wxxx-larger-than should be aliased
to [0, ptrdiff_max], case (3) is achieved by -Wno-xxx-larger-than.

I think you are over-engineering this and the user-interface is
awful.


Thank you.

I agree that what you describe would be the ideal solution.
As I explained in the description of the patch, I did consider
handling PTRDIFF_MAX but the target-dependent value is not
available at the time the option argument is processed.  We
don't even know yet what the target data model is.

This is the best I came up with.  What do you suggest instead?

Martin


Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)

2018-07-26 Thread Martin Sebor

On 06/29/2018 11:05 AM, Jeff Law wrote:

On 06/07/2018 09:57 AM, Martin Sebor wrote:

The attached patch enhances the strlen pass to more consistently
deal with MEM_REF assignments (PR 86042) and to track string
lengths across calls to memcpy that overwrite parts of a string
with sequences of non-nul characters (PR 86043).

Fixes for both bugs rely on changes to the same code so I chose
to include them in the same patch.

To fix PR 86042 the patch extends handle_char_store() to deal with
more forms of multi-character assignments from MEM_REF (originally
introduced in r256180).  To handle assignments from strings of
multiple nuls the patch also extends the initializer_zerop()
function to understand MEM_REFs of the form:

   MEM[(char * {ref-all})&a] = MEM[(char * {ref-all})"..."];

The solution for PR 86043 consists of two parts: the extension
above which lets handle_char_store() recognize assignments of
sequences of non-null characters that overwrite some portion of
the leading non-zero characters in the destination and avoid
discarding the destination information, and a similar extension
to handle_builtin_memcpy().

Martin

gcc-86042.diff


PR tree-optimization/86042 - missing strlen optimization after second strcpy

gcc/ChangeLog:

PR tree-optimization/86042
* tree-ssa-strlen.c (handle_builtin_memcpy): Handle strict overlaps.
(get_string_cst_length): Rename...
(get_min_string_length): ...to this.  Add argument.
(handle_char_store): Extend to handle multi-character stores by
MEM_REF.
* tree.c (initializer_zerop): Use new argument.  Handle MEM_REF.
* tree.h (initializer_zerop): Add argument.

gcc/testsuite/ChangeLog:

PR tree-optimization/86042
* gcc.dg/strlenopt-44.c: New test.

OK.


I missed your approval and didn't get to committing the patch
until today.  While retesting it on top of fresh trunk I noticed
a few test failures due to other recent strlen changes.  I made
adjustments to the patch to avoid most of them and opened bug
86688 for one that I think needs a separate code change and
xfailed the test cases until the bug gets resolved.

Martin



Re: [PATCH] treat -Wxxx-larger-than=HWI_MAX special (PR 86631)

2018-07-26 Thread Martin Sebor

On 07/26/2018 08:58 AM, Martin Sebor wrote:

On 07/26/2018 02:38 AM, Richard Biener wrote:

On Wed, Jul 25, 2018 at 5:54 PM Martin Sebor  wrote:


On 07/25/2018 08:57 AM, Jakub Jelinek wrote:

On Wed, Jul 25, 2018 at 08:54:13AM -0600, Martin Sebor wrote:

I don't mean for the special value to be used except internally
for the defaults.  Otherwise, users wanting to override the default
will choose a value other than it.  I'm happy to document it in
the .opt file for internal users though.

-1 has the documented effect of disabling the warnings altogether
(-1 is SIZE_MAX) so while I agree that -1 looks better it doesn't
work.  (It would need more significant changes.)


The variable is signed, so -1 is not SIZE_MAX.  Even if -1 disables
it, you
could use e.g. -2 or other negative value for the other special case.


The -Wxxx-larger-than=N distinguish three ranges of argument
values (treated as unsigned):

   1.  [0, HOST_WIDE_INT_MAX)
   2.  HOST_WIDE_INT_MAX
   3.  [HOST_WIDE_INT_MAX + 1, Infinity)


But it doesn't make sense for those to be host dependent.


It isn't when the values are handled by each warning.  That's
also the point of this patch: to remove this (unintended)
dependency.


I think numerical user input should be limited to [0, ptrdiff_max]
and cases (1) and (2) should be simply merged, I see no value
in distinguishing them.  -Wxxx-larger-than should be aliased
to [0, ptrdiff_max], case (3) is achieved by -Wno-xxx-larger-than.


To be clear: this is also close to what this patch does.

The only wrinkle is that we don't know the value of PTRDIFF_MAX
either at the time the option initial value is set in the .opt
file or when the option is processed when it's specified either
on the command line or as an alias in the .opt file (as all
-Wno-xxx-larger-than options are).  Case (2) above is only
used by the implementation as a placeholder for PTRDIFF_MAX.
It's not part of the interface -- it's an internal workaround
for lack of a better word.

(There is an additional wrinkle in the -Walloca-larger-than=
has two modes both of which are controlled by a single option:
(a) diagnose only allocations >= PTRDIFF_MAX (default), and
(b) diagnose allocations > limit plus also unbounded/unknown
allocations.  I think these modes should be split up and (b)
controlled by a separate option (say something like
-Walloca-may-be-unbounded).


I think you are over-engineering this and the user-interface is
awful.


Thank you.

I agree that what you describe would be the ideal solution.
As I explained in the description of the patch, I did consider
handling PTRDIFF_MAX but the target-dependent value is not
available at the time the option argument is processed.  We
don't even know yet what the target data model is.

This is the best I came up with.  What do you suggest instead?

Martin




Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)

2018-07-27 Thread Martin Sebor

On 07/27/2018 03:05 AM, Eric Botcazou wrote:

I missed your approval and didn't get to committing the patch
until today.  While retesting it on top of fresh trunk I noticed
a few test failures due to other recent strlen changes.  I made
adjustments to the patch to avoid most of them and opened bug
86688 for one that I think needs a separate code change and
xfailed the test cases until the bug gets resolved.


But it has introduced a couple of regressions in the ACATS testsuite:

FAIL:   c52103c
FAIL:   cde0001

=== acats Summary ===
# of expected passes2318
# of unexpected failures2
Native configuration is x86_64-suse-linux-gnu


Yes, I didn't see these failures, sorry.

FWIW, there are 128 failures in the GCC test suite on x86_64.
Many of these have been there for weeks (e.g., the lto failures
due to PR86004), even years (guality).  My script to compare
the results against a baseline uses the following regular
expression to extract the names of failing (and other
"interesting") tests.  As it happens, the Ada logs have
extra spaces before the test names that makes the script
exclude them from the list:

tlist=$(sed -n -e "s/^FAIL: \([^ ]*\) .*/\1/p" \
   -e "s/^XFAIL: \([^ ]*\) .*/\1/p" \
   -e "s/^UNRESOLVED: \([^ ]*\) .*/\1/p" \
   -e "s/^XPASS: \([^ ]*\) .*/\1/p" \
   test_summary.log \
| sort -u)

Martin


[PATCH] fix typo in tree-ssa-strlen.c (PR 86696)

2018-07-27 Thread Martin Sebor

My yesterday's change to tree-ssa-strlen.c introduced a test
for INTEGER_TYPE where INTEGRAL_TYPE_P should have been used,
causing a subsequent ICE when the latter was supplied.

The attached patch corrects the test and also makes the subsequent
code more robust and be prepared for the test to fail.

Since the change also caused a couple pf regressions in the Ada
test suite, if there are no objections, I will commit the patch
later today to clear up those failures.

Retested on x86_64-linux with no regressions (including in the Ada
test suite).

Martin
PR tree-optimization/86696 - ICE in handle_char_store at gcc/tree-ssa-strlen.c:3332

gcc/ChangeLog:

	PR tree-optimization/86696
	* tree-ssa-strlen.c (get_min_string_length): Handle all integer
	types, including enums.
	(handle_char_store): Be prepared for the above function to fail.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86696
	* gcc.dg/pr86696.C: New test.

Index: gcc/testsuite/g++.dg/pr86696.C
===
--- gcc/testsuite/g++.dg/pr86696.C	(nonexistent)
+++ gcc/testsuite/g++.dg/pr86696.C	(working copy)
@@ -0,0 +1,30 @@
+/* PR tree-optimization/86696 - ICE in handle_char_store at
+   gcc/tree-ssa-strlen.c
+   { dg-do compile }
+   { dg-options "-O2 -Wall -std=c++11" } */
+
+typedef char a;
+template  struct c {
+  int d;
+  b e;
+};
+struct f;
+class g {
+public:
+  void h(c);
+};
+enum i {};
+enum j : a { k, l };
+struct f {
+  i m;
+  a n;
+  a o;
+  a p;
+  j family;
+};
+void fn1() {
+  i format{};
+  f info{format, a(), 0, 4, l};
+  g dest;
+  dest.h({format, info});
+}
Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c	(revision 263030)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -3150,7 +3150,7 @@ handle_pointer_plus (gimple_stmt_iterator *gsi)
 static HOST_WIDE_INT
 get_min_string_length (tree rhs, bool *full_string_p)
 {
-  if (TREE_CODE (TREE_TYPE (rhs)) == INTEGER_TYPE)
+  if (INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
 {
   if (tree_expr_nonzero_p (rhs))
 	{
@@ -3315,9 +3315,16 @@ handle_char_store (gimple_stmt_iterator *gsi)
 	 Otherwise, we're storing an unknown value at offset OFFSET,
 	 so need to clip the nonzero_chars to OFFSET.  */
 	  bool full_string_p = storing_all_zeros_p;
-	  HOST_WIDE_INT len = (storing_nonzero_p
-			   ? get_min_string_length (rhs, &full_string_p)
-			   : 1);
+	  HOST_WIDE_INT len = 1;
+	  if (storing_nonzero_p)
+	{
+	  /* Try to get the minimum length of the string (or
+		 individual character) being stored.  If it fails,
+		 STORING_NONZERO_P guarantees it's at least 1.  */
+	  len = get_min_string_length (rhs, &full_string_p);
+	  if (len < 0)
+		len = 1;
+	}
 
 	  location_t loc = gimple_location (stmt);
 	  tree oldlen = si->nonzero_chars;


Re: [PATCH] enhance strlen to understand MEM_REF and partial overlaps (PR 86042, 86043)

2018-07-27 Thread Martin Sebor

On 07/27/2018 10:17 AM, Eric Botcazou wrote:

FWIW, there are 128 failures in the GCC test suite on x86_64.
Many of these have been there for weeks (e.g., the lto failures
due to PR86004), even years (guality).  My script to compare
the results against a baseline uses the following regular
expression to extract the names of failing (and other
"interesting") tests.


Why not just use "make mail-report.log" ?


My script downloads the latest result posted to gcc-testresults
for the target I'm building for, iterates over the interesting
test results in my build logs, compares them to the baseline,
and prints a condensed report like the one in the attachment.
This helps me decide what failures to look into and which ones
are probably safe to ignore.  The tests marked with '!' are
regressions in my run vs the baseline.  I only look at those,
and only those I don't recognize (which admittedly is error-prone).

The others are either the same failures or improvements.
(The numbers in parentheses show the difference between the number
of times the test was seen to fail in the baseline vs in my run.)

What I was trying to highlight is that rolling my own solution like
this makes missing regressions more likely than having a shared
solution would.  It's a separate issue that some of these failures
continue to persist despite having the appearance of regressions
the LTO failures), and another problem are the failures in
the guality tests.  These also make it more likely that
a regression will be missed.

Martin
!  FAIL: gcc.dg/attr-nonstring-2.c (4: +4)
   FAIL: gcc.dg/guality/pr54200.c (1)
   FAIL: gcc.dg/guality/pr54519-1.c (4: -12)
   FAIL: gcc.dg/guality/pr54519-2.c (2: -2)
   FAIL: gcc.dg/guality/pr54519-3.c (8: -8)
   FAIL: gcc.dg/guality/pr54519-4.c (2: -2)
   FAIL: gcc.dg/guality/pr54519-5.c (2: -2)
   FAIL: gcc.dg/guality/pr54970.c (29: -29)
   FAIL: gcc.dg/guality/pr56154-1.c (1: -1)
   FAIL: gcc.dg/guality/pr59776.c (8: -8)
   FAIL: gcc.dg/guality/pr68860-1.c (24: -24)
   FAIL: gcc.dg/guality/sra-1.c (6: -6)
   FAIL: gcc.dg/guality/vla-1.c (8: -8)
   FAIL: g++.dg/guality/pr55665.C (1: -1)
!  FAIL: g++.dg/lto/20091002-1 (1: +1)
!  FAIL: g++.dg/lto/pr64043 (1: +1)
!  FAIL: g++.dg/lto/pr65193 (1: +1)
!  FAIL: g++.dg/lto/pr65302 (1: +1)
!  FAIL: g++.dg/lto/pr65316 (1: +1)
!  FAIL: g++.dg/lto/pr65549 (2: +2)
!  FAIL: g++.dg/lto/pr66180 (1: +1)
!  FAIL: g++.dg/lto/pr66705 (1: +1)
!  FAIL: g++.dg/lto/pr68057 (4: +4)
!  FAIL: g++.dg/lto/pr69077 (1: +1)
!  FAIL: g++.dg/lto/pr69133 (1: +1)
!  FAIL: g++.dg/lto/pr69137 (1: +1)
!  FAIL: g++.dg/lto/pr79000 (1: +1)
!  FAIL: g++.dg/lto/pr81940 (1: +1)
!  FAIL: g++.dg/lto/pr85176 (1: +1)
   FAIL: g++.dg/pr80481.C (3)
   FAIL: g++.dg/pr83239.C (2)
!  FAIL: gfortran.dg/derived_constructor_comps_5.f90 (6: +6)
!  FAIL: gfortran.dg/lto/pr79108 (1: +1)
!  FAIL: ./index0-out.go (1: +1)
! UNRESOLVED: gcc.dg/tree-prof/crossmodule-indircall-1a.c (1: +1)
! UNRESOLVED: gcc.dg/tree-prof/pr59003.c (1: +1)
! UNRESOLVED: gcc.dg/tree-prof/wcoverage-mismatch.c (2: +2)
! UNRESOLVED: g++.dg/bprob/g++-bprob-2.C (2: +2)
! UNRESOLVED: gfortran.dg/derived_constructor_comps_5.f90 (3: +3)
  UNRESOLVED: libgomp.oacc-c++/non-scalar-data.C (1: -1)
  XPASS: gcc.dg/guality/example.c (3: -7)
  XPASS: gcc.dg/guality/guality.c (8: -8)
  XPASS: gcc.dg/guality/inline-params.c (5: -5)
  XPASS: gcc.dg/guality/pr41353-1.c (2: -3)
  XPASS: gcc.dg/guality/pr54970.c (5: -5)
warnings found:
 360   Wabi
   7   Wcast-function-type
   4   Wcpp
   6   Wdeprecated-copy
   5   Wformat-truncation=
   8   Wimplicit-fallthrough=
  14   Wincompatible-pointer-types
  49   Wmaybe-uninitialized
  43   Wmisleading-indentation
  12   Wnonnull
   8   Wsign-compare
   4   Wstringop-truncation
  19   Wuninitialized
   2   Wunused-function
  20   Wunused-parameter


committed [PATCH] fix typo in tree-ssa-strlen.c (PR 86696)

2018-07-27 Thread Martin Sebor

I committed this in r263032.

On 07/27/2018 09:53 AM, Martin Sebor wrote:

My yesterday's change to tree-ssa-strlen.c introduced a test
for INTEGER_TYPE where INTEGRAL_TYPE_P should have been used,
causing a subsequent ICE when the latter was supplied.

The attached patch corrects the test and also makes the subsequent
code more robust and be prepared for the test to fail.

Since the change also caused a couple pf regressions in the Ada
test suite, if there are no objections, I will commit the patch
later today to clear up those failures.

Retested on x86_64-linux with no regressions (including in the Ada
test suite).

Martin




Re: [PATCH] Fix wrong code with truncated string literals (PR 86711/86714)

2018-07-29 Thread Martin Sebor

On 07/29/2018 04:56 AM, Bernd Edlinger wrote:

Hi!

This fixes two wrong code bugs where string_constant
returns over length string constants.  Initializers
like that are rejected in C++, but valid in C.


If by valid you are referring to declarations like the one in
the added test:

const char a[2][3] = { "1234", "xyz" };

then (as I explained), the excess elements in "1234" make
the char[3] initialization and thus the test case undefined.
I have resolved bug 86711 as invalid on those grounds.

Bug 86711 has a valid test case that needs to be fixed, along
with bug 86688 that I raised for the same underlying problem:
considering the excess nul as part of the string.  As has been
discussed in a separate bug, rather than working around
the excessively long strings in the middle-end, it would be
preferable to avoid creating them to begin with.

I'm already working on a fix for bug 86688, in part because
I introduced the code change and also because I'm making other
changes in this area -- bug 86552.  Both of these in response
to your comments.

I would normally welcome someone else helping with my work
but (as I already made clear last week) it's counteproductive
to have two people working in the very same area at the same
time, especially when they are working at cross purposes as
you seem to be hell-bent on doing.


I have xfailed strlenopt-49.c, which tests this feature.


That's not appropriate.  The purpose of the test is to verify
the fix for bug 86428: namely, that a call to strlen() on
an array initialized with a string of the same length is
folded, such as in:

const char b[4] = "123\0";

That's a valid initializer that can and should continue to be
folded.  The test needs to continue to exercise that feature.

The test also happens to exercise invalid/overlong initializers.
This is because that, in my view, it's safer to fold the result
of such calls to a constant than than to call the library
function and have it either return an unpredictable value or
perhaps even crash.


Personally I don't think that it is worth the effort to
optimize something that is per se invalid in C++.


This is a C test, not C++.  (I don't suppose you are actually
saying that only the common subset between C and C++ is worth
optimizing.)

Just in case it isn't clear from the above: the point of
the test exercising the behavior for overlong strings isn't
optimizing undefined behavior but rather avoiding the worst
consequences of it.  I have already explained this once
before so I'm starting to wonder if I'm being insufficiently
clear or if you are not receiving or reading (or understanding)
my responses.  We can have a broader discussion about whether
this is the best approach for GCC to take either in this instance
or in general, but in the meantime I would appreciate it if you
refrained from undoing my changes just because you don't agree
with or don't understand the motivation behind them.

Martin

PS I continue to wonder about your motivation and ethics.  It's
rare to have someone so openly, blatantly and persistently try
to undermine someone else's work.


Re: [PATCH] Fix wrong code with truncated string literals (PR 86711/86714)

2018-07-30 Thread Martin Sebor

On 07/30/2018 12:57 AM, Richard Biener wrote:

On Sun, 29 Jul 2018, Martin Sebor wrote:


On 07/29/2018 04:56 AM, Bernd Edlinger wrote:

Hi!

This fixes two wrong code bugs where string_constant
returns over length string constants.  Initializers
like that are rejected in C++, but valid in C.


If by valid you are referring to declarations like the one in
the added test:

const char a[2][3] = { "1234", "xyz" };

then (as I explained), the excess elements in "1234" make
the char[3] initialization and thus the test case undefined.
I have resolved bug 86711 as invalid on those grounds.

Bug 86711 has a valid test case that needs to be fixed, along
with bug 86688 that I raised for the same underlying problem:
considering the excess nul as part of the string.  As has been
discussed in a separate bug, rather than working around
the excessively long strings in the middle-end, it would be
preferable to avoid creating them to begin with.

I'm already working on a fix for bug 86688, in part because
I introduced the code change and also because I'm making other
changes in this area -- bug 86552.  Both of these in response
to your comments.

I would normally welcome someone else helping with my work
but (as I already made clear last week) it's counteproductive
to have two people working in the very same area at the same
time, especially when they are working at cross purposes as
you seem to be hell-bent on doing.


I have xfailed strlenopt-49.c, which tests this feature.


That's not appropriate.  The purpose of the test is to verify
the fix for bug 86428: namely, that a call to strlen() on
an array initialized with a string of the same length is
folded, such as in:

const char b[4] = "123\0";

That's a valid initializer that can and should continue to be
folded.  The test needs to continue to exercise that feature.

The test also happens to exercise invalid/overlong initializers.
This is because that, in my view, it's safer to fold the result
of such calls to a constant than than to call the library
function and have it either return an unpredictable value or
perhaps even crash.


Personally I don't think that it is worth the effort to
optimize something that is per se invalid in C++.


This is a C test, not C++.  (I don't suppose you are actually
saying that only the common subset between C and C++ is worth
optimizing.)

Just in case it isn't clear from the above: the point of
the test exercising the behavior for overlong strings isn't
optimizing undefined behavior but rather avoiding the worst
consequences of it.  I have already explained this once
before so I'm starting to wonder if I'm being insufficiently
clear or if you are not receiving or reading (or understanding)
my responses.  We can have a broader discussion about whether
this is the best approach for GCC to take either in this instance
or in general, but in the meantime I would appreciate it if you
refrained from undoing my changes just because you don't agree
with or don't understand the motivation behind them.

Martin

PS I continue to wonder about your motivation and ethics.  It's
rare to have someone so openly, blatantly and persistently try
to undermine someone else's work.


Martin, you are clearly the one being hostile here - Bernd is trying
to help.  In fact his patches are more focused, easier to undestand
and thus easier to review.


As I explained, it's unhelpful for two people to making changes
to the same code at the same time, especially when one is undoing
the other one's changes.  I have made it clear that I am working
in this area -- I welcome and address valid feedback but I can't
very well do that while someone is compromising my work.

I appreciate test cases and suggestions for improvements but
please avoid making changes to this code while I'm working on
it.  It is not helpful.

Martin


Re: [PATCH] Fix the damage done by my other patch from yesterday to strlenopt-49.c

2018-07-30 Thread Martin Sebor

On 07/30/2018 05:51 AM, Bernd Edlinger wrote:

Hi,

this is how I would like to handle the over length strings issue in the C FE.
If the string constant is exactly the right length and ends in one explicit
NUL character, shorten it by one character.

I thought Martin would be working on it,  but as this is a really simple fix,
I would dare to send it to gcc-patches anyway, hope you don't mind...


And as I said repeatedly, I am working on it along with a number
of other things in this very area.  There are a number of issues
to solve:

1) issue a warning for the non-nul terminated strings (bug 86552:
   raised in response to your comments, initial patch posted,
   more work in progress)
2) avoid creating overlong string literals (bug 86688: raised
   partly also in response to your earlier comments)
3) handle braced-initializers (as in char a[] = { 1, 2, 0 }; )
   I'm testing a patch for that.

I am actively working on all three of these so please, for the fourth
time, let me finish my work.  Submitting patches that try to handle
any of these issues in a slightly or substantially different way at
the same time isn't helpful.  On the contrary, it's very disruptive.
This has nothing to do with ownership and everything with
coordination and an apparent divergence of opinions.

There are over 10,000 open bugs in Bugzilla.  Working on any of
them that's not assigned to anyone would be helpful.  This is
not.

Martin


Re: [PATCH] Move -Walloca and related warnings from c.opt to common.opt

2018-07-30 Thread Martin Sebor

On 07/30/2018 09:28 AM, Jakub Jelinek wrote:

On Sun, Jul 29, 2018 at 08:35:39PM +0200, Iain Buclaw wrote:

Since r262910, it was noticed that new -Walloca-larger-than= warnings
started appearing when building the D frontend's standard library.
These have been checked and verified as valid, and appropriate fixes
will be sent on upstream.

As for the warning itself, as it is now default on, it would be
preferable to be able control it.  Given the choice between adding
these options to the D frontend or moving them to common, I'd rather
move them to common.


It is strange to add a warning option for languages that don't even have
alloca.  Instead, the warning shouldn't be enabled by default (but Martin
doesn't want to listen to that), or at least should be enabled only for the
languages where it makes sense.


I agree that the warning shouldn't be enabled for languages that
don't have support for alloca.

Martin


Re: PING [PATCH] warn for strlen of arrays with missing nul (PR 86552)

2018-07-30 Thread Martin Sebor

Attached is an updated version of the patch that handles more
instances of calling strlen() on a constant array that is not
a nul-terminated string.

No other functions except strlen are explicitly handled yet,
and neither are constant arrays with braced-initializer lists
like const char a[] = { 'a', 'b', 'c' };  I am testing
an independent solution for those (bug 86552).  Once those
are handled the warning will be able to detect those as well.

Tested on x86_64-linux.

On 07/25/2018 05:38 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01124.html

The fix for bug 86532 has been checked in so this enhancement
can now be applied on top of it (with only minor adjustments).

On 07/19/2018 02:08 PM, Martin Sebor wrote:

In the discussion of my patch for pr86532 Bernd noted that
GCC silently accepts constant character arrays with no
terminating nul as arguments to strlen (and other string
functions).

The attached patch is a first step in detecting these kinds
of bugs in strlen calls by issuing -Wstringop-overflow.
The next step is to modify all other handlers of built-in
functions to detect the same problem (not part of this patch).
Yet another step is to detect these problems in arguments
initialized using the non-string form:

  const char a[] = { 'a', 'b', 'c' };

This patch is meant to apply on top of the one for bug 86532
(I tested it with an earlier version of that patch so there
is code in the context that does not appear in the latest
version of the other diff).

Martin





PR tree-optimization/86552 - missing warning for reading past the end of non-string arrays

gcc/ChangeLog:

	PR tree-optimization/86552
	* builtins.h (warn_string_no_nul): Declare..
	(c_strlen): Add argument.
	* builtins.c (warn_string_no_nul): New function.
	(fold_builtin_strlen): Add argument.  Detect missing nul.
	(fold_builtin_1): Adjust.
	(string_length): Add argument and use it.
	(c_strlen): Same.
	(expand_builtin_strlen): Detect missing nul.
	* expr.c (string_constant): Add arguments.  Detect missing nul
	terminator and outermost declaration it's missing in.
	* expr.h (string_constant): Add argument.
	* fold-const.c (c_getstr): Change argument to bool*, rename
	other arguments.
	* fold-const-call.c (fold_const_call): Detect missing nul.
	* gimple-fold.c (get_range_strlen): Add argument.
	(get_maxval_strlen): Adjust.
	* gimple-fold.h (get_range_strlen): Add argument.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86552
	* gcc.dg/warn-string-no-nul.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index aa3e0d8..f4924d5 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -150,7 +150,7 @@ static tree stabilize_va_list_loc (location_t, tree, int);
 static rtx expand_builtin_expect (tree, rtx);
 static tree fold_builtin_constant_p (tree);
 static tree fold_builtin_classify_type (tree);
-static tree fold_builtin_strlen (location_t, tree, tree);
+static tree fold_builtin_strlen (location_t, tree, tree, tree);
 static tree fold_builtin_inf (location_t, tree, int);
 static tree rewrite_call_expr (location_t, tree, int, tree, int, ...);
 static bool validate_arg (const_tree, enum tree_code code);
@@ -550,6 +550,36 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
   return n;
 }
 
+/* For a call expression EXP to a function that expects a string argument,
+   issue a diagnostic due to it being a called with an argument NONSTR
+   that is a character array with no terminating NUL.  */
+
+void
+warn_string_no_nul (location_t loc, tree exp, tree fndecl, tree nonstr)
+{
+  loc = expansion_point_location_if_in_system_header (loc);
+
+  bool warned;
+  if (exp)
+{
+  if (!fndecl)
+	fndecl = get_callee_fndecl (exp);
+  warned = warning_at (loc, OPT_Wstringop_overflow_,
+			   "%K%qD argument missing terminating nul",
+			   exp, fndecl);
+}
+  else
+{
+  gcc_assert (fndecl);
+  warned = warning_at (loc, OPT_Wstringop_overflow_,
+			   "%qD argument missing terminating nul",
+			   fndecl);
+}
+
+  if (warned && DECL_P (nonstr))
+inform (DECL_SOURCE_LOCATION (nonstr), "referenced argument declared here");
+}
+
 /* Compute the length of a null-terminated character string or wide
character string handling character sizes of 1, 2, and 4 bytes.
TREE_STRING_LENGTH is not the right way because it evaluates to
@@ -567,37 +597,60 @@ string_length (const void *ptr, unsigned eltsize, unsigned maxelts)
accesses.  Note that this implies the result is not going to be emitted
into the instruction stream.
 
+   When ARR is non-null and the string is not properly nul-terminated,
+   set *ARR to the declaration of the outermost constant object whose
+   initializer (or one of its elements) is not nul-terminated.
+
The value returned is of type `ssizetype'.
 
Unfortunately, string_constant can't access the values of const

Re: [PATCH] warn for strlen of arrays with missing nul (PR 86552)

2018-07-30 Thread Martin Sebor

On 07/26/2018 02:53 AM, Bernd Edlinger wrote:

@@ -567,13 +597,17 @@ string_length (const void *ptr, unsigned eltsize, 
unsigned maxelts)
accesses.  Note that this implies the result is not going to be emitted
   into the instruction stream.

+   When ARR is non-null and the string is not properly nul-terminated,
+   set *ARR to the declaration of the outermost constant object whose
+   initializer (or one of its elements) is not nul-terminated.
+
The value returned is of type `ssizetype'.

Unfortunately, string_constant can't access the values of const char
arrays with initializers, so neither can we do so here.  */


Maybe drop that sentence when it is no longer true?


I believe the sentence means is that folding the length of arrays
like this isn't handled:

  const char a[] = { 'a', 'b', '\0' };

because they are represented not as STRING_CST but rather as
aggregate CONSTRUCTORs.

Adding this handling has been something I've been meaning to
do for some time and this seems like a good opportunity to do
it.  I'm testing a patch with this enhancement.


 tree
-c_strlen (tree src, int only_value)
+c_strlen (tree src, int only_value, tree *arr /* = NULL */)
 {
   STRIP_NOPS (src);
   if (TREE_CODE (src) == COND_EXPR
@@ -581,24 +615,31 @@ c_strlen (tree src, int only_value)
 {
   tree len1, len2;

-  len1 = c_strlen (TREE_OPERAND (src, 1), only_value);
-  len2 = c_strlen (TREE_OPERAND (src, 2), only_value);
+  len1 = c_strlen (TREE_OPERAND (src, 1), only_value, arr);
+  len2 = c_strlen (TREE_OPERAND (src, 2), only_value, arr);


Wow, what happens here if the first operand is non-zero terminated and the 
second is zero-terminated?


It depends on the size of the first array and on the length
of the second.  This test case triggers a warning:

  const char a[2][3] = { "123", "12" };

  int f (int i)
  {
return __builtin_strlen (i < 0 ? a[0] : a[1]);
  }

  warning: ‘__builtin_strlen’ argument missing terminating nul 
[-Wstringop-overflow=]

  note: referenced argument declared here
  const char a[2][3] = { "123", "12" };
 ^

but this one didn't:

  const char a[3] = "123";
  const char b[4] = "123";

  int f (int i)
  {
return __builtin_strlen (i < 0 ? a : b);
  }

The usual arguments both for and against issuing a diagnostic
here apply but I think it makes more sense to err on the side
of caution and diagnose them than not so I enhanced the patch
to do that, as well as handle a few more strlen contexts.

Martin


Re: [PATCH] Fix wrong code with truncated string literals (PR 86711/86714)

2018-07-30 Thread Martin Sebor

On 07/30/2018 09:24 AM, Bernd Edlinger wrote:

On 07/30/18 01:05, Martin Sebor wrote:

On 07/29/2018 04:56 AM, Bernd Edlinger wrote:

Hi!

This fixes two wrong code bugs where string_constant
returns over length string constants.  Initializers
like that are rejected in C++, but valid in C.


If by valid you are referring to declarations like the one in
the added test:

 const char a[2][3] = { "1234", "xyz" };

then (as I explained), the excess elements in "1234" make
the char[3] initialization and thus the test case undefined.
I have resolved bug 86711 as invalid on those grounds.

Bug 86711 has a valid test case that needs to be fixed, along
with bug 86688 that I raised for the same underlying problem:
considering the excess nul as part of the string.  As has been
discussed in a separate bug, rather than working around
the excessively long strings in the middle-end, it would be
preferable to avoid creating them to begin with.

I'm already working on a fix for bug 86688, in part because
I introduced the code change and also because I'm making other
changes in this area -- bug 86552.  Both of these in response
to your comments.



Sorry, I must admit, I have completely lost track on how many things
you are trying to work in parallel.

Nevertheless I started to review you pr86552 patch here:
https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01593.html

But so far you did not respond to me.

Well actually I doubt your patch does apply to trunk,
maybe you start to re-base that one, and post it again
instead?


I read your comments and have been busy working on enhancing
the patch (among other things).  There are a large number of
additional contexts where constant strings are expected and
where a missing nul needs to be detected.  Some include
additional instances of strlen calls that my initial patch
didn't handle, many more others that involve other string
functions.  I have posted an updated patch that applies
cleanly and that handles the first set.

There is also a class of problems involving constant character
arrays initialized by a braced list, as in char [] = { x, y, z };
Those are currently not recognized as strings even if they are
nul-terminated, but they are far more likely to be a source of
these problems than string literals, certainly in C++ where
string initializers must fit in the array.  I am testing a patch
to convert those into STRING_CST so they can be handled as well.

Since initializing arrays with more elements than fit is
undefined in C and since the behavior is undefined at compile
time it seems to me that rejecting such initializers with
a hard error (as opposed to a warning) would be appropriate
and obviate having to deal with them in the middle-end.

Martin


[PATCH] avoid incomplete types in -Warray-bounds (PR 86741)

2018-07-30 Thread Martin Sebor

The enhanced handling of MEM_REFs in -Warray-bounds assumes
the object from whose address an offset is being computed has
a complete type.  Since the size of such objects isn't known,
whether the offset (or index) from its beginning is valid
cannot be reliably determined.  The attached patch avoids
dealing with such objects.

Martin
PR tree-optimization/86741 - ICE in -Warray-bounds indexing into an object of incomplete type

gcc/ChangeLog:

	PR tree-optimization/86741
	* tree-vrp.c (vrp_prop::check_mem_ref): Avoid incomplete types.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86741
	* gcc.dg/Warray-bounds-33.c: New test.

Index: gcc/testsuite/gcc.dg/Warray-bounds-33.c
===
--- gcc/testsuite/gcc.dg/Warray-bounds-33.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/Warray-bounds-33.c	(working copy)
@@ -0,0 +1,36 @@
+/* PR tree-optimization/86741 - ICE in -Warray-bounds indexing into
+   an object of incomplete type
+   { dg-do compile }
+   { dg-options "-O2 -Wall" }  */
+
+struct S
+{
+  int s;
+};
+
+void f (void);
+
+void test_void (void)
+{
+  extern void v;
+  struct S *b = (struct S*)&v;
+  if (b->s)
+f ();
+}
+
+void test_incomplete_enum (void)
+{
+  extern enum E e;
+  struct S *b = (struct S*)&e;
+  if (b->s)
+f ();
+}
+
+void test_func (void)
+{
+  struct S *b = (struct S*)&f;
+  if (b->s)
+f ();
+}
+
+/* { dg-prune-output "taking address of expression of type .void." } */
Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c	(revision 263072)
+++ gcc/tree-vrp.c	(working copy)
@@ -5048,9 +5048,12 @@ vrp_prop::check_mem_ref (location_t location, tree
  a reference/subscript via a pointer to an object that is not
  an element of an array.  References to members of structs and
  unions are excluded because MEM_REF doesn't make it possible
- to identify the member where the reference originated.  */
+ to identify the member where the reference originated.
+ Incomplete types are excluded as well because their size is
+ not known.  */
   tree reftype = TREE_TYPE (arg);
   if (POINTER_TYPE_P (reftype)
+  || !COMPLETE_TYPE_P (reftype)
   || RECORD_OR_UNION_TYPE_P (reftype))
 return;
 


[PATCH] convert braced initializers to strings (PR 71625)

2018-07-30 Thread Martin Sebor

The middle-end contains code to determine the lengths of constant
character arrays initialized by string literals.  The code is used
in a number of optimizations and warnings.

However, the code is unable to deal with constant arrays initialized
using the braced initializer syntax, as in

  const char a[] = { '1', '2', '\0' };

The attached patch extends the C and C++ front-ends to convert such
initializers into a STRING_CST form.

The goal of this work is to both enable existing optimizations for
such arrays, and to help detect bugs due to using non-nul terminated
arrays where nul-terminated strings are expected.  The latter is
an extension of the GCC 8 _Wstringop-overflow and
-Wstringop-truncation warnings that help detect or prevent reading
past the end of dynamically created character arrays.  Future work
includes detecting potential past-the-end reads from uninitialized
local character arrays.

Tested on x86_64-linux.

Martin
PR tree-optimization/71625 - missing strlen optimization on different array initialization style

gcc/c/ChangeLog:

	PR tree-optimization/71625
	* c-parser.c (c_parser_declaration_or_fndef): Call
	convert_braced_list_to_string.

gcc/c-family/ChangeLog:

	PR tree-optimization/71625
	* c-common.c (convert_braced_list_to_string): New function.
	* c-common.h (convert_braced_list_to_string): Declare it.

gcc/cp/ChangeLog:

	PR tree-optimization/71625
	* parser.c (cp_parser_init_declarator):  Call
	convert_braced_list_to_string.

gcc/testsuite/ChangeLog:

	PR tree-optimization/71625
	* g++.dg/init/string2.C: New test.
	* g++.dg/init/string3.C: New test.
	* gcc.dg/strlenopt-55.c: New test.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 422d668..9a93175 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -8345,4 +8345,72 @@ maybe_add_include_fixit (rich_location *richloc, const char *header)
   free (text);
 }
 
+/* Attempt to convert a braced array initializer list CTOR into
+   a STRING_CST for convenience and efficiency.  When non-null,
+   use EVAL to attempt to evalue constants (used by C++).
+   MAXELTS gives the maximum number of elements to accept.
+   Return the converted string on success or null on failure.  */
+
+tree
+convert_braced_list_to_string (tree ctor, tree (*eval)(tree),
+			   unsigned HOST_WIDE_INT maxelts)
+{
+  unsigned HOST_WIDE_INT nelts = CONSTRUCTOR_NELTS (ctor);
+
+  auto_vec str;
+  str.reserve (nelts + 1);
+
+  unsigned HOST_WIDE_INT i;
+  tree index, value;
+
+  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), i, index, value)
+{
+  unsigned HOST_WIDE_INT idx = index ? tree_to_uhwi (index) : i;
+
+  /* auto_vec is limited to UINT_MAX elements.  */
+  if (idx > UINT_MAX)
+	return NULL_TREE;
+
+  /* Attempt to evaluate constants.  */
+  if (eval)
+	value = eval (value);
+
+  /* Avoid non-constant initializers.  */
+ if (!tree_fits_uhwi_p (value))
+	return NULL_TREE;
+
+  /* Skip over embedded nuls.  */
+  unsigned val = tree_to_uhwi (value);
+  if (!val)
+	continue;
+
+  /* Bail if the CTOR has a block of more than 256 embedded nuls
+	 due to implicitly initialized elements.  */
+  unsigned nelts = (idx - str.length ()) + 1;
+  if (nelts > 256)
+	return NULL_TREE;
+
+  if (nelts > 1)
+	{
+	  str.reserve (idx);
+	  str.quick_grow_cleared (idx);
+	}
+
+  if (idx > maxelts)
+	return NULL_TREE;
+
+  str.safe_insert (idx, val);
+}
+
+  /* Append a nul for the empty initializer { } and for the last
+ explicit initializer in the loop above that is a nul.  */
+  if (!nelts || str.length () < i)
+str.safe_push (0);
+
+  /* Build a string literal but return the embedded STRING_CST.  */
+  tree res = build_string_literal (str.length (), str.begin ());
+  res = TREE_OPERAND (TREE_OPERAND (res, 0), 0);
+  return res;
+}
+
 #include "gt-c-family-c-common.h"
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index fcec95b..343a1ae 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1331,6 +1331,8 @@ extern void maybe_add_include_fixit (rich_location *, const char *);
 extern void maybe_suggest_missing_token_insertion (rich_location *richloc,
 		   enum cpp_ttype token_type,
 		   location_t prev_token_loc);
+extern tree convert_braced_list_to_string (tree, tree (*)(tree) = NULL,
+	   unsigned HOST_WIDE_INT = -1);
 
 #if CHECKING_P
 namespace selftest {
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7a92628..e12d270 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -2126,6 +2126,23 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
 	  if (d != error_mark_node)
 		{
 		  maybe_warn_string_init (init_loc, TREE_TYPE (d), init);
+
+		  /* Convert a string CONSTRUCTOR into a STRING_CST.  */
+		  tree valtype = TREE_TYPE (init.value);
+		  if (TREE_CODE (init.value) == CONSTRUCTOR
+		  && TREE_CODE (valtype) == ARRAY_TYPE)
+		{
+		  valtype = TREE_TYPE (valtype);
+

Re: [PATCH] Make strlen range computations more conservative

2018-07-30 Thread Martin Sebor

On 07/27/2018 12:48 AM, Bernd Edlinger wrote:

I have one more example similar to PR86259, that resembles IMHO real world code:

Consider the following:


int fun (char *p)
{
  char buf[16];

  assert(strlen(p) < 4); //here: security relevant check

  sprintf(buf, "echo %s - %s", p, p); //here: security relevant code
  return system(buf);
}


What is wrong with the assertion?

Nothing, except it is removed, when this function is called from untrusted code:

untrused_fun ()
{
   char b[2] = "ab";
   fun(b);
}

 don't try to execute that: after "ab" there can be "; rm -rF / ;" on your 
stack

Even the slightly more safe check "assert(strnlen(p, 4) < 4);" would have
been removed.

Now that is a simple error and it would be easy to fix -- normally.
But when the assertion is removed, the security relevant code
is allowed to continue where it creates more damage and is
suddenly much harder to debug.


sprintf() is a known source of buffer overflows.  The recommended
practice is to use snprintf.  An alternate mechanism to constrain
the number of bytes formatted by an individual %s directive is to
use the precision, such as %.4s.


So, I start to believe that strlen range assumptions are unsafe, unless
we can prove that the string is in fact zero terminated.

I would like to guard the strlen range checks with a new option, maybe
-fassume-zero-terminated-char-arrays, and enable that under -Ofast only.

What do you think?


I'm not opposed to providing options to control various
features but I'm not in favor of disabling them by default as
a solution to accommodate buggy code.  For every instance of
a bug in a program with undefined behavior, whether it's reading
or writing past the end of an object or subobject, or integer
overflow, it's possible to show security-related consequences.
One could just as easily create a test case where allowing strlen
to read past the end of a member array could be exploited to cause
a subsequent buffer overflow.   Some of these consequences might
be in some cases mitigated by one strategy and others in other
cases by another.  There's no silver bullet -- the best approach
is to drive improvements to code to help weed out these bugs.

Even without _FORTIFY_SOURCE GCC diagnoses (some) writes past
the end of subobjects by string functions.  With _FORTIFY_SOURCE=2
it calls abort.  This is the default on popular distributions,
including Fedora, RHEL, and Ubuntu.   -Wstringop-truncation tries
to help detect the creation of unterminated strings by strncpy
and strncat.  There is little reason in my mind to treat strlen
or any other function as special, except perhaps for the few
existing exceptions of the raw memory functions (memcpy, et al.)

As you know, I have already posted a patch to detect a subset
of the problem of calling strlen on non-terminated arrays.
More such issues, including uses of dynamically created and
uninitialized arrays, can be detected by relatively modest
enhancements to the tree-ssa-strlen pass (also on my list
of things to do).  It may also be worth considering moving
the "initializer-string for array chars is too long" warning
from -Wc++-compat to -Wall or -Wextra.  But I would much rather
focus on these solutions and work toward overall improvements
than on weakening optimization to accommodate undefined code.
With sufficient awareness as a result of warnings such code
should all but disappear.  Following stricter rules opens up
opportunities for deeper analyses to enable more optimization
and detect even more bugs.

Martin


Re: PING [PATCH] warn for strlen of arrays with missing nul (PR 86552)

2018-07-30 Thread Martin Sebor

On 07/30/2018 03:11 PM, Bernd Edlinger wrote:

Hi,


@@ -621,6 +674,12 @@ c_strlen (tree src, int only_value)
maxelts = maxelts / eltsize - 1;
  }

+  /* Unless the caller is prepared to handle it by passing in a non-null
+ ARR, fail if the terminating nul doesn't fit in the array the string
+ is stored in (as in const char a[3] = "123";  */
+  if (!arr && maxelts < strelts)
+return NULL_TREE;
+


this is c_strlen, how is the caller ever supposed to handle non-zero terminated 
strings???
especially if you do this above?


Callers that pass in a non-null ARR handle them by issuing
a warning.  The rest get back a null result.  It should be
evident from the rest of the patch.  It can be debated what
each caller should do when it detects such a missing nul
where one is expected.  Different approaches may be more
or less appropriate for different callers/functions (e.g.,
strcpy vs strlen).


+c_strlen (tree src, int only_value, tree *arr /* = NULL */)
{
  STRIP_NOPS (src);
+
+  /* Used to detect non-nul-terminated strings in subexpressions
+ of a conditional expression.  When ARR is null, point it at
+ one of the elements for simplicity.  */
+  tree arrs[] = { NULL_TREE, NULL_TREE };
+  if (!arr)
+arr = arrs;



@@ -11427,7 +11478,9 @@ string_constant (tree arg, tree *ptr_offset)
  unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init);
  length = string_length (TREE_STRING_POINTER (init), charsize,
  length / charsize);
-  if (compare_tree_int (array_size, length + 1) < 0)
+  if (nulterm)
+*nulterm = array_elts > length;
+  else if (array_elts <= length)
return NULL_TREE;


I don't understand why you can't use
compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)), TREE_STRING_LENGTH (init))
instead of this convoluted code above ???

Sorry, this patch does not look like it is ready any time soon.


I'm open to technical comments on the substance of my changes
but I'm not interested in your opinion of the readiness of
the patch (whatever that might mean), certainly not if you
have formed it after skimming a random handful of lines out
of a 600 line patch.


But actually I am totally puzzled by your priorities.
This is what I see right now:

1) We have missing warnings.
2) We have wrong code bugs.
3) We have apparently a specification error on the C Language standard (*)


Why are you prioritizing 1) over 2) thus blocking my attempts to fix a wrong 
code
issue,and why do you not tackle 3) in your WG14?


My priorities are none of your concern.

Your "attempts to fix" issues interfere with my work on a number
of projects.  You are not being helpful -- instead, by submitting
changes that you know fully well conflict with mine, you are
impeding and undermining my work.  That is why I object to them.


(*) which means that GCC is currently removing code from assertions
as I pointed out here: https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01695.html

This happens because GCC follows the language standards literally right now.

I would say too literally, and it proves that the language standard's logic is
flawed IMHO.


I have no idea what your point is about standards, but bugs
like the one in the example, including those arising from
uninitialized arrays, could be detected with only minor
enhancements to the tree-ssa-strlen pass.  Implementing some
of this is among the projects I'm expected and expecting to
work on for GCC 9.  This patch is a small step in that
direction.

If you care about detecting bugs I would expect you to be
supportive rather than dismissive of this work, and helpful
in bringing it to fruition rather that putting it down or
questioning my priorities.  Especially since the work was
prompted by your own (valid) complaint that GCC doesn't
diagnose them.

Martin



Re: [PATCH] Fix wrong code with truncated string literals (PR 86711/86714)

2018-07-30 Thread Martin Sebor

On 07/30/2018 02:21 PM, Bernd Edlinger wrote:

On 07/30/18 21:52, Martin Sebor wrote:

On 07/30/2018 09:24 AM, Bernd Edlinger wrote:

On 07/30/18 01:05, Martin Sebor wrote:

On 07/29/2018 04:56 AM, Bernd Edlinger wrote:

Hi!

This fixes two wrong code bugs where string_constant
returns over length string constants.  Initializers
like that are rejected in C++, but valid in C.


If by valid you are referring to declarations like the one in
the added test:

 const char a[2][3] = { "1234", "xyz" };

then (as I explained), the excess elements in "1234" make
the char[3] initialization and thus the test case undefined.
I have resolved bug 86711 as invalid on those grounds.

Bug 86711 has a valid test case that needs to be fixed, along
with bug 86688 that I raised for the same underlying problem:
considering the excess nul as part of the string.  As has been
discussed in a separate bug, rather than working around
the excessively long strings in the middle-end, it would be
preferable to avoid creating them to begin with.

I'm already working on a fix for bug 86688, in part because
I introduced the code change and also because I'm making other
changes in this area -- bug 86552.  Both of these in response
to your comments.



Sorry, I must admit, I have completely lost track on how many things
you are trying to work in parallel.

Nevertheless I started to review you pr86552 patch here:
https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01593.html

But so far you did not respond to me.

Well actually I doubt your patch does apply to trunk,
maybe you start to re-base that one, and post it again
instead?


I read your comments and have been busy working on enhancing
the patch (among other things).  There are a large number of
additional contexts where constant strings are expected and
where a missing nul needs to be detected.  Some include
additional instances of strlen calls that my initial patch
didn't handle, many more others that involve other string
functions.  I have posted an updated patch that applies
cleanly and that handles the first set.

There is also a class of problems involving constant character
arrays initialized by a braced list, as in char [] = { x, y, z };
Those are currently not recognized as strings even if they are
nul-terminated, but they are far more likely to be a source of
these problems than string literals, certainly in C++ where
string initializers must fit in the array.  I am testing a patch
to convert those into STRING_CST so they can be handled as well.

Since initializing arrays with more elements than fit is
undefined in C and since the behavior is undefined at compile
time it seems to me that rejecting such initializers with
a hard error (as opposed to a warning) would be appropriate
and obviate having to deal with them in the middle-end.



We do not want to change what is currently accepted by the
front end. period.


On whose behalf are you making such categorical statements?
It was Jakub and Richard's suggestion in bug 86714 to reject
the undefined excessive initializers and I happen to like
the idea.  I don't recall anyone making a decision about what
"we" do or don't want to change.

That said, if rejecting such initializers is not acceptable
an alternate solution that I believe Richard preferred is to
trim excess elements early on, e.g., during gimplification
(or at some other point after the front-end is done).  That's
okay with me too.



But there is no reason why ambiguous string constants
have to be passed to the middle end.

For instance char c[2] = "a\0"; should look like c[1] = "a";
while c[2] = "aaa"; should look like c[2] = "aa"; varasm.c
will cut the excess precision off anyway.

That is TREE_STRING_LENGTH (str) == 3 and TREE_STRING_POINTER(str) = "aa\0";

I propose to have all STRING_CST always be created by the
FE with explicit nul termination, but the
TYPE_SIZE_UNIT (TREE_TYPE (str)) >= TREE_STRING_LENGTH(str) in normal case 
(null-terminated)
TREE_SIZE_UNIT (TREE_TYPE (str)) < TREE_STRING_LENGTH(str) if non zero 
terminated,
truncated in the initializer.

Do you understand what I mean?


I don't insist on any particular internal representation as long
as it makes it possible to detect and diagnose common bugs, and
(ideally) also help mitigate their worst consequences.

Martin


Re: [PATCH] convert braced initializers to strings (PR 71625)

2018-07-31 Thread Martin Sebor

On 07/31/2018 07:38 AM, Jason Merrill wrote:

On Tue, Jul 31, 2018 at 9:51 AM, Martin Sebor  wrote:

The middle-end contains code to determine the lengths of constant
character arrays initialized by string literals.  The code is used
in a number of optimizations and warnings.

However, the code is unable to deal with constant arrays initialized
using the braced initializer syntax, as in

  const char a[] = { '1', '2', '\0' };

The attached patch extends the C and C++ front-ends to convert such
initializers into a STRING_CST form.

The goal of this work is to both enable existing optimizations for
such arrays, and to help detect bugs due to using non-nul terminated
arrays where nul-terminated strings are expected.  The latter is
an extension of the GCC 8 _Wstringop-overflow and
-Wstringop-truncation warnings that help detect or prevent reading
past the end of dynamically created character arrays.  Future work
includes detecting potential past-the-end reads from uninitialized
local character arrays.



  && TYPE_MAIN_VARIANT (TREE_TYPE (valtype)) == char_type_node)


Why? Don't we want this for other character types as well?


It suppresses narrowing warnings for things like

  signed char a[] = { 0xff };

(there are a couple of tests that exercise this).

At the same time, STRING_CST is supposed to be able to represent
strings of any integer type so there should be a way to make it
work.  On the flip side, recent discussions of changes in this
area suggest there may be bugs in the wide character handling of
STRING_CST so those would need to be fixed before relying on it
for robust support.

In any case, if you have a suggestion for how to make it work for
at least the narrow character types I'll adjust the patch.

Martin


Re: [PATCH] Make strlen range computations more conservative

2018-07-31 Thread Martin Sebor

On 07/31/2018 12:38 AM, Jakub Jelinek wrote:

On Mon, Jul 30, 2018 at 09:45:49PM -0600, Martin Sebor wrote:

Even without _FORTIFY_SOURCE GCC diagnoses (some) writes past
the end of subobjects by string functions.  With _FORTIFY_SOURCE=2
it calls abort.  This is the default on popular distributions,


Note that _FORTIFY_SOURCE=2 is the mode that goes beyond what the standard
requires, imposes extra requirements.  So from what this mode accepts or
rejects we shouldn't determine what is or isn't considered valid.


I'm not sure what the additional requirements are but the ones
I am referring to are the enforcing of struct member boundaries.
This is in line with the standard requirements of not accessing
[sub]objects via pointers derived from other [sub]objects.

The one area where Builtin Object Size doesn't faithfully reflect
subobject boundaries is arrays of of arrays.  This was a serious
concern for the security group at my last company (see bug 44384)
We developed (proprietary) patches to mitigate the shortcoming.

Martin



Re: [PATCH] include more detail in -Warray-bounds (PR 86650)

2018-07-31 Thread Martin Sebor

Attached is a much scaled-down patch that only adds a note
to the -Warray-bounds warnings mentioning the declaration
to which the out-of-bounds index or offset applies.

Printing the inlining context needs a bit more work but
this small improvement can be made independently of it.

On 07/23/2018 05:49 PM, Martin Sebor wrote:

(David, I'm hoping your your help here.  Please see the end.)

While looking into a recent -Warray-bounds instance in Glibc
involving inlining of large functions it became apparent that
GCC could do a better job of pinpointing the source of
the problem.

The attached patch makes a few adjustments to both
the pretty printer infrastructure and to VRP to make this
possible.  The diagnostic pretty printer already has directives
to print the inlining context for both tree and gcall* arguments,
so most of the changes just adjust things to be able to pass in
gimple* argument instead.

The only slightly interesting change is to print the declaration
to which the out-of-bounds array refers if one is known.

Tested on x86_64-linux with one regression.

The regression is in the gcc.dg/Warray-bounds.c test: the column
numbers of the warnings are off.  Adding the %G specifier to
the array bounds warnings in VRP has the unexpected effect of
expanding the extent of the underling. For instance, for a test
case like this:

  int a[10];

  void f (void)
  {
a[-1] = 0;
  }

from the expected:

   a[-1] = 0;
   ~^~~~

to this:

  a[-1] = 0;
   ~~^~~

David, do you have any idea how to avoid this?

Martin


PR tree-optimization/86650 - -Warray-bounds missing inlining context

gcc/ChangeLog:

	PR tree-optimization/86650
	* tree-vrp.c (vrp_prop::check_array_ref): Print an inform message.
	(vrp_prop::check_mem_ref): Same.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86650
	* gcc.dg/Warray-bounds-33.c: New test.

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 7ab8898..e553f3a 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -4838,14 +4838,13 @@ vrp_prop::check_array_ref (location_t location, tree ref,
 
   tree artype = TREE_TYPE (TREE_OPERAND (ref, 0));
 
+  bool warned = false;
+
   /* Empty array.  */
   if (up_bound && tree_int_cst_equal (low_bound, up_bound_p1))
-{
-  warning_at (location, OPT_Warray_bounds,
-		  "array subscript %E is above array bounds of %qT",
-		  low_bound, artype);
-  TREE_NO_WARNING (ref) = 1;
-}
+warned = warning_at (location, OPT_Warray_bounds,
+			 "array subscript %E is above array bounds of %qT",
+			 low_bound, artype);
 
   if (TREE_CODE (low_sub) == SSA_NAME)
 {
@@ -4866,12 +4865,10 @@ vrp_prop::check_array_ref (location_t location, tree ref,
 	  : tree_int_cst_le (up_bound, up_sub))
   && TREE_CODE (low_sub) == INTEGER_CST
   && tree_int_cst_le (low_sub, low_bound))
-{
-  warning_at (location, OPT_Warray_bounds,
-		  "array subscript [%E, %E] is outside array bounds of %qT",
-		  low_sub, up_sub, artype);
-  TREE_NO_WARNING (ref) = 1;
-}
+	warned = warning_at (location, OPT_Warray_bounds,
+			 "array subscript [%E, %E] is outside "
+			 "array bounds of %qT",
+			 low_sub, up_sub, artype);
 }
   else if (up_bound
 	   && TREE_CODE (up_sub) == INTEGER_CST
@@ -4885,10 +4882,9 @@ vrp_prop::check_array_ref (location_t location, tree ref,
 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 	  fprintf (dump_file, "\n");
 	}
-  warning_at (location, OPT_Warray_bounds,
-		  "array subscript %E is above array bounds of %qT",
-		  up_sub, artype);
-  TREE_NO_WARNING (ref) = 1;
+  warned = warning_at (location, OPT_Warray_bounds,
+			   "array subscript %E is above array bounds of %qT",
+			   up_sub, artype);
 }
   else if (TREE_CODE (low_sub) == INTEGER_CST
&& tree_int_cst_lt (low_sub, low_bound))
@@ -4899,9 +4895,18 @@ vrp_prop::check_array_ref (location_t location, tree ref,
 	  dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
 	  fprintf (dump_file, "\n");
 	}
-  warning_at (location, OPT_Warray_bounds,
-		  "array subscript %E is below array bounds of %qT",
-		  low_sub, artype);
+  warned = warning_at (location, OPT_Warray_bounds,
+			   "array subscript %E is below array bounds of %qT",
+			   low_sub, artype);
+}
+
+  if (warned)
+{
+  ref = TREE_OPERAND (ref, 0);
+
+  if (DECL_P (ref))
+	inform (DECL_SOURCE_LOCATION (ref), "while referencing %qD", ref);
+
   TREE_NO_WARNING (ref) = 1;
 }
 }
@@ -4916,7 +4921,8 @@ vrp_prop::check_array_ref (location_t location, tree ref,
the address of the just-past-the-end element of an array).  */
 
 void
-vrp_prop::check_mem_ref (location_t location, tree ref, bool ignore_off_by_one)
+vrp_prop::check_mem_ref (location_t location, tree ref,
+			 bool ignore_off_by_one)
 {
   if (TREE_NO_WARNIN

[PATCH] change %G argument from gcall* to gimple*

2018-07-31 Thread Martin Sebor

The GCC internal %G directive takes a gcall* argument and prints
the call's inlining stack in diagnostics.  The argument type makes
it unsuitable for gimple expressions such as those diagnosed by
-Warray-bounds.

As the first step in adding inlining context to -Warray-bounds
warnings the attached patch changes the %G argument to accept
gimple* instead of gcall*.  (More work is needed for %G to
preserve the location range within diagnostics so this patch
just implements the first step.)

Martin
PR tree-optimization/86650 - -Warray-bounds missing inlining context

gcc/c/ChangeLog:

	PR tree-optimization/86650
	* c-objc-common.c (c_tree_printer): Adjust.

gcc/c-family/ChangeLog:

	PR tree-optimization/86650
	* c-format.c (local_gcall_ptr_node): Rename...
	 (local_gimple_ptr_node): ...to this.
	* c-format.h (T89_G): Adjust.

gcc/cp/ChangeLog:

	PR tree-optimization/86650
	* error.c (cp_printer): Adjust.

gcc/ChangeLog:

	PR tree-optimization/86650
	* gimple-pretty-print.c (percent_G_format): Simplify.
	* tree-diagnostic.c (default_tree_printer): Adjust.
	* tree-pretty-print.c (percent_K_format): Add argument.
	* tree-pretty-print.h: Add argument.
	* gimple-fold.c (gimple_fold_builtin_strncpy): Adjust.
	* gimple-ssa-warn-restrict.h (check_bounds_or_overlap): Replace
	gcall* argument with gimple*.
	* gimple-ssa-warn-restrict.c (check_call): Same.
	(wrestrict_dom_walker::before_dom_children): Same.
	(builtin_access::builtin_access): Same.
	(check_bounds_or_overlap): Same.
	* tree-ssa-ccp.c (pass_post_ipa_warn::execute): Adjust.
	* tree-ssa-strlen.c (maybe_diag_stxncpy_trunc): Adjust.

gcc/testsuite/ChangeLog:

	PR tree-optimization/86650
	* gcc.dg/format/gcc_diag-10.c: Adjust.

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index a0192dd..705bffb 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -56,7 +56,7 @@ struct function_format_info
 
 /* Initialized in init_dynamic_diag_info.  */
 static GTY(()) tree local_tree_type_node;
-static GTY(()) tree local_gcall_ptr_node;
+static GTY(()) tree local_gimple_ptr_node;
 static GTY(()) tree locus;
 
 static bool decode_format_attr (tree, function_format_info *, int);
@@ -691,7 +691,7 @@ static const format_char_info gcc_diag_char_table[] =
 
   /* Custom conversion specifiers.  */
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","\"",   NULL },
   /* K requires a "tree" argument at runtime.  */
   { "K",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","\"",   NULL },
@@ -722,7 +722,7 @@ static const format_char_info gcc_tdiag_char_table[] =
   { "E", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
   { "K", 1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G", 1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
   { "v",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
@@ -754,7 +754,7 @@ static const format_char_info gcc_cdiag_char_table[] =
   { "E",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
   { "K",   1, STD_C89, { T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G",   1, STD_C89, { T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "", "\"",   NULL },
 
   { "v",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
@@ -787,7 +787,7 @@ static const format_char_info gcc_cxxdiag_char_table[] =
   { "K", 1, STD_C89,{ T89_T,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "\"",   NULL },
   { "v", 0,STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
 
-  /* G requires a "gcall*" argument at runtime.  */
+  /* G requires a "gimple*" argument at runtime.  */
   { "G", 1, STD_C89,{ T89_G,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "\"",   NULL },
 
   /* These accept either an 'int' or an

Re: [PATCH] Alias -Warray-bounds to -Warray-bounds=1

2018-07-31 Thread Martin Sebor

I can't approve patches but this one seems to be in
the obvious category so I think it could be checked in
without formal approval.

It is however missing a couple of things: 1) a test case,
and 2) a reference to the bug it fixes in the ChangeLog
and in the test.

With that, if no one objects, I will commit the path for
you.

Martin

On 07/25/2018 08:04 AM, Franz Sirl wrote:

Hi,

as discussed with Martin, this patch consolidates -Warray-bounds into an
alias of -Warray-bounds=1.

Bootstrapped on x86_64-linux, no regressions.

Please apply if it's OK.

Franz.







Re: [PATCH] Make strlen range computations more conservative

2018-07-31 Thread Martin Sebor

On 07/31/2018 09:48 AM, Jakub Jelinek wrote:

On Tue, Jul 31, 2018 at 09:17:52AM -0600, Martin Sebor wrote:

On 07/31/2018 12:38 AM, Jakub Jelinek wrote:

On Mon, Jul 30, 2018 at 09:45:49PM -0600, Martin Sebor wrote:

Even without _FORTIFY_SOURCE GCC diagnoses (some) writes past
the end of subobjects by string functions.  With _FORTIFY_SOURCE=2
it calls abort.  This is the default on popular distributions,


Note that _FORTIFY_SOURCE=2 is the mode that goes beyond what the standard
requires, imposes extra requirements.  So from what this mode accepts or
rejects we shouldn't determine what is or isn't considered valid.


I'm not sure what the additional requirements are but the ones
I am referring to are the enforcing of struct member boundaries.
This is in line with the standard requirements of not accessing
[sub]objects via pointers derived from other [sub]objects.


In the middle-end the distinction between what was originally a reference
to subobjects and what was a reference to objects is quickly lost
(whether through SCCVN or other optimizations).
We've run into this many times with the __builtin_object_size already.
So, if e.g.
struct S { char a[3]; char b[5]; } s = { "abc", "defg" };
...
strlen ((char *) &s) is well defined but
strlen (s.a) is not in C, for the middle-end you might not figure out which
one is which.


Yes, I'm aware of the middle-end transformation to MEM_REF
-- it's one of the reasons why detecting invalid accesses
by the middle end warnings, including -Warray-bounds,
-Wformat-overflow, -Wsprintf-overflow, and even -Wrestrict,
is less than perfect.

But is strlen(s.a) also meant to be well-defined in the middle
end (with the semantics of computing the length or "abcdefg"?)
And if so, what makes it well defined?

Certainly not every "strlen" has these semantics.  For example,
this open-coded one doesn't:

  int len = 0;
  for (int i = 0; s.a[i]; ++i)
++len;

It computes 2 (with no warning for the out-of-bounds access).

So if the standard doesn't guarantee it and different kinds
of accesses behave differently, how do we explain what "works"
and what doesn't without relying on GCC implementation details?

If we can't then the only language we have in common with users
is the standard.  (This, by the way, is what the C memory model
group is trying to address -- the language or feature that's
missing from the standard that says when, if ever, these things
might be valid.)

Martin


[PING 2] [WIP PATCH] add object access attributes (PR 83859)

2019-10-24 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-09/msg01690.html

On 10/17/2019 10:28 AM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-09/msg01690.html

Other than the suggestions I got for optimization (for GCC 11)
and additional buffer overflow detection for [static] arrays),
is there any feedback on the patch itself?  Jeff?

Martin

On 9/29/19 1:51 PM, Martin Sebor wrote:

-Wstringop-overflow detects a subset of past-the-end read and write
accesses by built-in functions such as memcpy and strcpy.  It relies
on the functions' effects the knowledge of which is hardwired into
GCC.  Although it's possible for users to create wrappers for their
own functions to detect similar problems, it's quite cumbersome and
so only lightly used outside system libraries like Glibc.  Even Glibc
only checks for buffer overflow and not for reading past the end.

PR 83859 asks to expose the same checking that GCC does natively for
built-in calls via a function attribute that associates a pointer
argument with the size argument, such as:

   __attribute__((buffer_size (1, 2))) void
   f (char* dst, size_t dstsize);

The attached patch is my initial stab at providing this feature by
introducing three new attributes:

   * read_only (ptr-argno, size-argno)
   * read_only (ptr-argno, size-argno)
   * read_write (ptr-argno, size-argno)

As requested, the attributes associate a pointer parameter to
a function with a size parameter.  In addition, they also specify
how the function accesses the object the pointer points to: either
it only reads from it, or it only writes to it, or it does both.

Besides enabling the same buffer overflow detection as for built-in
string functions they also let GCC issue -Wuninitialized warnings
for uninitialized objects passed to read-only functions by reference,
and -Wunused-but-set warnings for objects passed to write-only
functions that are otherwise unused (PR 80806).  The -Wununitialized
part is done. The -Wunused-but-set detection is implemented only in
the C FE and not yet in C++.

Besides the diagnostic improvements above the attributes also open
up optimization opportunities such as DCE.  I'm still working on this
and so it's not yet part of the initial patch.

I plan to finish the patch for GCC 10 but I don't expect to have
the time to start taking advantage of the attributes for optimization
until GCC 11.

Besides regression testing on x86_64-linux, I also tested the patch
by compiling Binutils/GDB, Glibc, and the Linux kernel with it.  It
found no new problems but caused a handful of 
-Wunused-but-set-variable false positives due to an outstanding bug in 
the C front-end introduced

by the patch that I still need to fix.

Martin




[PING][POC v2 PATCH] __builtin_warning

2019-10-24 Thread Martin Sebor

Other than the comments from Joseph any feedback on the patch
itself and my questions?

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01015.html

On 10/14/2019 02:41 PM, Martin Sebor wrote:

Attached is a more fleshed out version of the built-in implemented
to run in a pass of its own.  I did this in anticipation of looking
at the CFG to help eliminate false positives due to ASAN
instrumentation (e.g., PR 91707).

The built-in now handles a decent number of C and GCC formatting
directives.

The patch introduces a convenience API to create calls to the built-in
(gimple_build_warning).  It also avoids duplicate warnings emitted as
a result of redundant calls to the built-in for the same code (e.g.,
by different passes detecting the same out-of-bounds access).

To show how to use the built-in and the APIs within GCC the patch
modifies path isolation and CCP to inject calls to it into the CFG.
A couple of new tests exercise using the built-in from user code.

The patch triggers a number of -Wnonnull instances during bootstrap
and failures in tests that exercise the warnings modified by using
the built-in.  The GCC warnings are mostly potential bugs that
I will submit patches for, but they're in general unrelated to
the built-in itself.

At this point I want to know if there is support for a) including
the built-in in GCC 10, b) the path isolation changes to make use
of it, and c) the CCP -Wnonnull changes.  If (a), I will submit
a final patch in the next few weeks.  If also (b) and/or (c)
I will also work on cleaning up the GCC warnings.

Martin

PS The patch introduces a general mechanism for processing vararg
formatting functions.  It's only used to handle the built-in but
once it's accepted I expect to replace the gimple-ssa-printf.c
parser with it.

On 8/9/19 3:26 PM, Martin Sebor wrote:

Attached is a very rough and only superficially barely tested
prototype of the __builtin_warning intrinsic we talked about
the other day.  The built-in is declared like so:

   int __builtin_warning (int loc,
  const char *option,
  const char *txt, ...);

If it's still present during RTL expansion the built-in calls

   bool ret = warning_at (LOC, find_opt (option), txt, ...);

and expands to the constant value of RET (which could be used
to issue __builtin_note but there may be better ways to deal
with those than that).

LOC is the location of the warning or zero for the location of
the built-in itself (when called by user code), OPTION is either
the name of the warning option (e.g., "-Wnonnull", when called
by user code) or the index of the option (e.g., OPT_Wnonnull,
when emitted by GCC into the IL), and TXT is the format string
to format the warning text.  The rest of the arguments are not
used but I expect to extract and pass them to the diagnostic
pretty printer to format the text of the warning.

Using the built-in in user code should be obvious.  To show
how it might be put to use within GCC, I added code to
gimple-ssa-isolate-paths.c to emit -Wnonnull in response to
invalid null pointer accesses.  For this demo, when compiled
with the patch applied and with -Wnull-dereference (which is
not in -Wall or -Wextra), GCC issues three warnings: two
instances of -Wnull-dereference one of which is a false positive,
and one -Wnonnull (the one I added, which is included in -Wall),
which is a true positive:

   int f (void)
   {
 char a[4] = "12";
 char *p = __builtin_strlen (a) < 3 ? a : 0;
 return *p;
   }

   int g (void)
   {
 char a[4] = "12";
 char *p = __builtin_strlen (a) > 3 ? a : 0;
 return *p;
   }

   In function ‘f’:
   warning: potential null pointer dereference [-Wnull-dereference]
 7 |   return *p;
   |  ^~
   In function ‘g’:
   warning: null pointer dereference [-Wnull-dereference]
    14 |   return *p;
   |  ^~
   warning: invalid use of a null pointer [-Wnonnull]

The -Wnull-dereference instance in f is a false positive because
the strlen result is clearly less than two.  The strlen pass folds
the strlen result to a constant but it runs after path isolation
which will have already issued the bogus warning.

Martin

PS I tried compiling GCC with the patch.  It fails in libgomp
with:

   libgomp/oacc-mem.c: In function ‘gomp_acc_remove_pointer’:
   cc1: warning: invalid use of a null pointer [-Wnonnull]

so clearly it's missing location information.  With
-Wnull-dereference enabled we get more detail:

   libgomp/oacc-mem.c: In function ‘gomp_acc_remove_pointer’:
   libgomp/oacc-mem.c:1013:31: warning: potential null pointer 
dereference [-Wnull-dereference]

    1013 |   for (size_t i = 0; i < t->list_count; i++)
 |  ~^~~~
   libgomp/oacc-mem.c:1012:19: warning: potential null pointer 
dereference [-Wnull-dereference]

    1012 |   t->refcount = minrefs;
 |   ~~

[PING][PATCH] bring -Warray-bounds closer to -Wstringop-overflow (PR91647, 91463, 91679)

2019-10-24 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00860.html

Should I add something like the -Wzero-length-array-bounds option
to allow some of the questionable idioms seen in the kernel?

On 10/11/2019 10:34 AM, Martin Sebor wrote:

On 9/10/19 4:35 PM, Jeff Law wrote:

On 9/6/19 1:27 PM, Martin Sebor wrote:

Recent enhancements to -Wstringop-overflow improved the warning
to the point that it detects a superset of the problems -Warray-
bounds is intended detect in character accesses.  Because both
warnings detect overlapping sets of problems, and because the IL
they work with tends to change in subtle ways from target to
targer, tests designed to verify one or the other sometimes fail
with a target where the warning isn't robust enough to detect
the problem given the IL representation.

To reduce these test suite failures the attached patch extends
-Warray-bounds to handle some of the same problems -Wstringop-
overflow does, pecifically, out-of-bounds accesses to array
members of structs, including zero-length arrays and flexible
array members of defined objects.

In the process of testing the enhancement I realized that
the recently added component_size() function doesn't work as
intended for non-character array members (see below).  The patch
corrects this by reverting back to the original implementation
of the function until the better/simpler solution can be put in
place as mentioned below.

Tested on x86_64-linux.

Martin


[*] component_size() happens to work for char arrays because those
are transformed to STRING_CSTs, but not for arrays that are not.
E.g., in

   struct S { int64_t i; int16_t j; int16_t a[]; }
 s = { 0, 0, { 1, 0 } };

unless called with type set to int16_t[2], fold_ctor_reference
will return s.a[0] rather than all of s.a.  But set type to
int16_t[2] we would need to know that s.a's initializer has two
elements, and that's just what we're using fold_ctor_reference
to find out.

I think this could probably be made to work somehow by extending
useless_type_conversion_p to handle this case as special somehow,
but it doesn't seem worth the effort given that there should be
an easier way to do it as you noted below.

Given the above, the long term solution should be to rely on
DECL_SIZE_UNIT(decl) - TYPE_SIZE_UNIT(decl_type) as Richard
suggested in the review of its initial implementation.
Unfortunately, because of bugs in both the C and C++ front ends
(I just opened PR 65403 with the details) the simple formula
doesn't give the right answers either.  So until the bugs are
fixed, the patch reverts back to the original loopy solution.
It's no more costly than the current fold_ctor_reference
approach.

...


So no concerns with the patch itself, just the fallout you mentioned in
a follow-up message.  Ideally we'd have glibc and the kernel fixed
before this goes in, but I'd settle for just getting glibc fixed since
we have more influence there.


Half of the issues there were due to a bug in the warning.  The rest
are caused by Glibc's use of interior zero-length arrays to access
subsequent members.  It works in simple cases but it's very brittle
because GCC assumes that even such members don't alias. If it's meant
to be a supported feature then aliasing would have to be changed to
take it into account.  But I'd rather encourage projects to move away
from these dangerous hacks and towards cleaner, safer code.

I've fixed the bug in the attached patch.  The rest can be suppressed
by replacing the zero-length arrays with flexible array members but
that's just trading one misuse for another.  If the code can't be
changed to avoid this (likely not an option since the arrays are in
visible in the public API) I think the best way to deal with them is
to suppress them by #pragma GCC diagnostic ignored.  I opened BZ 25097
in Glibc Bugzilla to track this.


Out of curiosity are the kernel issues you raised due to flexible arrays
or just cases where we're doing a better job on normal objects?  I'd be
a bit surprised to find flexible arrays in the kernel.


I don't think I've come across any flexible arrays in the kernel.

The patch triggers 94 instances of -Warray-bounds (60 of which
are for distinct code) in 21 .c files.  I haven't looked at all
of them but some of the patterns I noticed are:

1) Intentionally using an interior zero-length array to access
    (e.g., memset) one or more subsequent members. E.g.,
    _dbgp_external_startup in drivers/usb/early/ehci-dbgp.c and
    quite a few others.  This is pretty pervasive but seems easily
    avoidable.

2) Overwriting a member array with more data (e.g., function
    cxio_rdev_open in
    drivers/infiniband/hw/cxgb3/cxio_hal.c or in function
    pk_probe in drivers/hid/hid-prodikeys.c).  At first glance
    some of these look like bugs but with stuff obscured by macros
    and no comments it's hard to tell.

3) Uses

[PING 2][PATCH] implement -Wrestrict for sprintf (PR 83688)

2019-10-24 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00570.html

On 10/14/2019 08:34 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00570.html

On 10/8/19 5:51 PM, Martin Sebor wrote:

Attached is a resubmission of the -Wrestrict implementation for
the sprintf family of functions.  The original patch was posted
in 2017 but never approved.  This revision makes only a few minor
changes to the original code, mostly necessitated by rebasing on
the top of trunk.

The description from the original posting still applies today:

   The enhancement works by first determining the base object (or
   pointer) from the destination of the sprintf call, the constant
   offset into the object (and subobject for struct members), and
   its size.  For each %s argument, it then computes the same data.
   If it determines that overlap between the two is possible it
   stores the data for the directive argument (including the size
   of the argument) for later processing.  After the whole call and
   format string have been processed, the code then iterates over
   the stored directives and their arguments and compares the size
   and length of the argument against the function's overall output.
   If they overlap it issues a warning.

The solution is pretty simple.  The only details that might be
worth calling out are the addition of a few utility functions some
of which at first glance look like they could be replaced by calls
to existing utilities:

  *  array_elt_at_offset
  *  field_at_offset
  *  get_origin_and_offset
  *  alias_offset

Specifically, get_origin_and_offset looks like a dead ringer for
get_addr_base_and_unit_offset, except since the former is only
used for warnings it is less conservative.  It also works with
SSA_NAMEs.  This is also the function I expect to need to make
changes to (and fix bugs in).  The rest of the functions are
general utilities that could perhaps be moved to tree.c at some
point when there is a use for them elsewhere (I have some work
in progress that might need at least one of them).

Another likely question worth addressing is why the sprintf
overlap detection isn't handled in gimple-ssa-warn-restrict.c.
There is an opportunity for code sharing between the two "passes"
but it will require some fairly intrusive changes to the latter.
Those feel out of scope for the initial solution.

Finally, because of new dependencies between existing classes in
the file, some code had to be moved around within it a bit.  That
contributed to the size of the patch making the changes seem more
extensive than they really are.

Tested on x86_64-linux with Binutils/GDB and Glibc.

Martin

The original submission:
https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00036.html




[PING] [PATCH] add __has_builtin (PR 66970)

2019-10-25 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00062.html

I was privately pointed at the Clang tests.  The implementation
passes most of them.  The one difference I noticed is that GCC
expands macros in the __has_builtin argument list while Clang
doesn't.  Since this is in line with other similar built-ins
(e.g., __has_attribute) and since Clang has at least one bug
report about this, I left it as is and just raised PR 91961
for the record.

On 10/11/2019 09:23 AM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-10/msg00062.html

On 10/1/19 11:16 AM, Martin Sebor wrote:

Attached is an implementation of the __has_builtin special
preprocessor operator/macro analogous to __has_attribute and
(hopefully) compatible with the synonymous Clang feature (I
couldn't actually find tests for it in the Clang test suite
but if someone points me at them I'll verify it).

Tested on x86_64-linux.

Martin

PS I couldn't find an existing API to test whether a reserved
symbol like __builtin_offsetof is a function-like built-in so
I hardwired the tests for C and C++ into the new names_builtin_p
functions.  I don't like this very much because the next time
such an operator is added there is nothing to remind us to update
the functions.  Adding a flag to the c_common_reswords array would
solve the problem but at the expense of a linear search through
it.  Does anyone have a suggestion for how to do this better?




[PATCH] avoid eliminating live nul stores into strings of bounded length (PR 92226)

2019-10-28 Thread Martin Sebor

A recent enhancement to take advantage of non-constant strlen
results constrained to a known range interacts badly with
the nul-over-nul optimization.  The optimization eliminates
nul stores that overwrite the exiting terminating nul of
the destination string.  This interaction causes the nul
store to be eliminated in subset of cases when it shouldn't
be.

The attached patch fixes the bug by avoiding the optimization
for such destinations.  It also adjusts the comment that
describes the function with the bug to make its return value
clearer.

Tested on x86_64-linux.

Martin
PR tree-optimization/92226 - live nul char store to array eliminated

gcc/testsuite/ChangeLog:

	PR tree-optimization/92226
	* gcc.dg/strlenopt-88.c: New test.

gcc/ChangeLog:

	PR tree-optimization/92226
	* tree-ssa-strlen.c (compare_nonzero_chars): Return -1 also when
	the offset is in the open range outlined by SI's length.

Index: gcc/tree-ssa-strlen.c
===
--- gcc/tree-ssa-strlen.c	(revision 277521)
+++ gcc/tree-ssa-strlen.c	(working copy)
@@ -193,10 +193,11 @@ static void handle_builtin_stxncpy (built_in_funct
 
*  +1  if SI is known to start with more than OFF nonzero characters.
 
-   *   0  if SI is known to start with OFF nonzero characters,
-	  but is not known to start with more.
+   *   0  if SI is known to start with exactly OFF nonzero characters.
 
-   *  -1  if SI might not start with OFF nonzero characters.  */
+   *  -1  if SI either does not start with OFF nonzero characters
+  or the relationship between the number of leading nonzero
+  characters in SI and OFF is unknown.  */
 
 static inline int
 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
@@ -221,7 +221,7 @@ compare_nonzero_chars (strinfo *si, unsigned HOST_
   if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
 return compare_tree_int (si->nonzero_chars, off);
 
-  if (TREE_CODE (si->nonzero_chars) != SSA_NAME)
+  if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
 return -1;
 
   const value_range *vr
@@ -232,7 +232,15 @@ compare_nonzero_chars (strinfo *si, unsigned HOST_
   if (rng != VR_RANGE || !range_int_cst_p (vr))
 return -1;
 
-  return compare_tree_int (vr->min (), off);
+  /* If the offset is less than the minimum length or if the bounds
+ of the length range are equal return the result of the comparison
+ same as in the constant case.  Otherwise return a conservative
+ result.  */
+  int cmpmin = compare_tree_int (vr->min (), off);
+  if (cmpmin > 0 || tree_int_cst_equal (vr->min (), vr->max ()))
+return cmpmin;
+
+  return -1;
 }
 
 /* Return true if SI is known to be a zero-length string.  */
Index: gcc/testsuite/gcc.dg/strlenopt-88.c
===
--- gcc/testsuite/gcc.dg/strlenopt-88.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/strlenopt-88.c	(working copy)
@@ -0,0 +1,196 @@
+/* PR tree-optimization/92226 - live nul char store to array eliminated
+   { dg-do run }
+   { dg-options "-O2 -Wall" } */
+
+#include "strlenopt.h"
+
+#define NOIPA __attribute__ ((noipa))
+
+unsigned nfails;
+
+char a[8];
+
+void test (int line, const char *func, size_t expect)
+{
+  size_t len = strlen (a);
+  if (len == expect)
+return;
+
+  ++nfails;
+
+  __builtin_printf ("assertion failed in %s on line %i: "
+		"strlen (\"%s\") == %zu, got %zu\n",
+		func, line, a, expect, len);
+}
+
+NOIPA const char* str (size_t n)
+{
+  return "9876543210" + 10 - n;
+}
+
+#define T(name, CMPEXP, LEN, IDX, EXPECT)	\
+  NOIPA static void name (void)			\
+  {		\
+const char *s = str (LEN);			\
+if (strlen (s) CMPEXP)			\
+  {		\
+	strcpy (a, s);\
+	a[IDX] = 0;\
+	test (__LINE__, #name, EXPECT);		\
+  }		\
+  } typedef void dummy_type
+
+
+T (len_eq_1_store_nul_0, == 1, 1, 0, 0);
+T (len_eq_1_store_nul_1, == 1, 1, 1, 1);
+T (len_eq_1_store_nul_2, == 1, 1, 2, 1);
+T (len_eq_1_store_nul_3, == 1, 1, 3, 1);
+T (len_eq_1_store_nul_4, == 1, 1, 4, 1);
+
+T (len_eq_2_store_nul_0, == 2, 2, 0, 0);
+T (len_eq_2_store_nul_1, == 2, 2, 1, 1);
+T (len_eq_2_store_nul_2, == 2, 2, 2, 2);
+T (len_eq_2_store_nul_3, == 2, 2, 3, 2);
+T (len_eq_2_store_nul_4, == 2, 2, 4, 2);
+
+T (len_eq_3_store_nul_0, == 3, 3, 0, 0);
+T (len_eq_3_store_nul_1, == 3, 3, 1, 1);
+T (len_eq_3_store_nul_2, == 3, 3, 2, 2);
+T (len_eq_3_store_nul_3, == 3, 3, 3, 3);
+T (len_eq_3_store_nul_4, == 3, 3, 4, 3);
+
+
+T (len_gt_1_store_nul_0, > 2, 2, 0, 0);
+T (len_gt_1_store_nul_1, > 2, 2, 1, 1);
+T (len_gt_1_store_nul_2, > 2, 2, 2, 2);
+T (len_gt_1_store_nul_3, > 2, 2, 3, 2);
+T (len_gt_1_store_nul_4, > 2, 2, 4, 2);
+
+T (len_gt_2_store_nul_0, > 2, 3, 0, 0);
+T (len_gt_2_store_nul_1, > 2, 3, 1, 1);
+T (len_gt_2_store_nul_2, > 2, 3, 2, 2);
+T (len_gt_2_store_nul_3, > 2, 3, 3, 3);
+T (len_gt_2_store_nul_4, > 2, 3, 4, 3);
+
+T (len_gt_3_store_nul_0, > 2, 4, 0, 0);
+T (len_gt_3_store_nul_1, > 2, 4, 1, 1);
+T (

  1   2   3   4   5   6   7   8   9   10   >