date:20220629

Re: [PATCH][AArch64] Implement ACLE Data Intrinsics

2022-06-29 Thread Richard Sandiford via Gcc-patches

"Andre Vieira (lists)"  writes:
> On 17/06/2022 11:54, Richard Sandiford wrote:
>> "Andre Vieira (lists)"  writes:
>>> Hi,
>>>
>>> This patch adds support for the ACLE Data Intrinsics to the AArch64 port.
>>>
>>> Bootstrapped and regression tested on aarch64-none-linux.
>>>
>>> OK for trunk?
>> Sorry for the slow review.
> No worries :)
>>
>>> +{\
>>> +  size_t size = sizeof (TYPE) * __CHAR_BIT__;\
>>> +  rotate = rotate % size;\
>>> +  return value >> rotate | value << (size - rotate); \
>> This runs into UB for rotate == 0.
> I assume it's because of the value << size no?

Yeah.

> I added a modulo, I assume it's legal to shift by 0?

Thanks, and yeah, shifting by zero is fine.

> This OK?
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> e0a741ac663188713e21f457affa57217d074783..69f1fb3604a481fa378d105cf3ee98edec1ba619
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -613,6 +613,12 @@ enum aarch64_builtins
>AARCH64_LS64_BUILTIN_ST64B,
>AARCH64_LS64_BUILTIN_ST64BV,
>AARCH64_LS64_BUILTIN_ST64BV0,
> +  AARCH64_REV16,
> +  AARCH64_REV16L,
> +  AARCH64_REV16LL,
> +  AARCH64_RBIT,
> +  AARCH64_RBITL,
> +  AARCH64_RBITLL,
>AARCH64_BUILTIN_MAX
>  };
>  
> @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void)
>= aarch64_general_add_builtin (data[i].name, data[i].type, 
> data[i].code);
>  }
>  
> +static void
> +aarch64_init_data_intrinsics (void)
> +{
> +  tree uint32_fntype = build_function_type_list (uint32_type_node,
> +  uint32_type_node, NULL_TREE);
> +  tree ulong_fntype = build_function_type_list (long_unsigned_type_node,
> + long_unsigned_type_node,
> + NULL_TREE);
> +  tree uint64_fntype = build_function_type_list (uint64_type_node,
> +  uint64_type_node, NULL_TREE);
> +  aarch64_builtin_decls[AARCH64_REV16]
> += aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
> +AARCH64_REV16);
> +  aarch64_builtin_decls[AARCH64_REV16L]
> += aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype,
> +AARCH64_REV16L);
> +  aarch64_builtin_decls[AARCH64_REV16LL]
> += aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", 
> uint64_fntype,
> +AARCH64_REV16LL);
> +  aarch64_builtin_decls[AARCH64_RBIT]
> += aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
> +AARCH64_RBIT);
> +  aarch64_builtin_decls[AARCH64_RBITL]
> += aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype,
> +AARCH64_RBITL);
> +  aarch64_builtin_decls[AARCH64_RBITLL]
> += aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
> +AARCH64_RBITLL);
> +}
> +
>  /* Implement #pragma GCC aarch64 "arm_acle.h".  */
>  void
>  handle_arm_acle_h (void)
>  {
> +  aarch64_init_data_intrinsics ();
>if (TARGET_LS64)
>  aarch64_init_ls64_builtins ();
>  }
> @@ -2393,6 +2430,40 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, 
> rtx target)
>emit_insn (pat);
>return target;
>  }

Nit: missing blank line here.

> +/* Function to expand an expression EXP which calls one of the ACLE Data
> +   Intrinsic builtins FCODE with the result going to TARGET.  */
> +static rtx
> +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx 
> target)
> +{
> +  expand_operand ops[2];
> +  machine_mode mode = GET_MODE (target);
> +  create_output_operand (&ops[0], target, mode);
> +  create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), 
> mode);
> +  enum insn_code icode;
> +  switch (fcode)
> +{
> +case AARCH64_REV16:
> +case AARCH64_REV16L:
> +case AARCH64_REV16LL:
> +  if (mode == SImode)
> + icode = CODE_FOR_aarch64_rev16si;
> +  else
> + icode = CODE_FOR_aarch64_rev16di;

You should be able to do:

  icode = code_for_aarch64_rev (mode);

instead.  Same for the next cases.

> +  break;
> +case AARCH64_RBIT:
> +case AARCH64_RBITL:
> +case AARCH64_RBITLL:
> +  if (mode == SImode)
> + icode = CODE_FOR_aarch64_rbitsi;
> +  else
> + icode = CODE_FOR_aarch64_rbitdi;
> +  break;
> +default:
> +  gcc_unreachable ();
> +}
> +  expand_insn (icode, 2, ops);
> +  return target;

This needs to return ops[0].value instead, since "target" just suggests
a possible location.

Could you add tests for a memory source and memory destination, e.g.:

void test_clz_mem (uint32_t *a)
{
  *a = __clz (*a);

[PATCH][pushed] docs: remove removed param from documentation

2022-06-29 Thread Martin Liška


Pushed as obvious.

Martin

gcc/ChangeLog:

* doc/invoke.texi: Remove removed evrp-mode.
---
 gcc/doc/invoke.texi | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index bde59ff0472..757775ea576 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -15124,9 +15124,6 @@ statement during VRP.
 @item evrp-sparse-threshold
 Maximum number of basic blocks before EVRP uses a sparse cache.
 
-@item evrp-mode

-Specifies the mode Early VRP should operate in.
-
 @item vrp1-mode
 Specifies the mode VRP pass 1 should operate in.
 
--

2.36.1

Re: [PATCH][pushed] docs: remove removed param from documentation

2022-06-29 Thread Aldy Hernandez via Gcc-patches

Thanks so much.

Aldy

On Wed, Jun 29, 2022 at 10:09 AM Martin Liška  wrote:
>
> Pushed as obvious.
>
> Martin
>
> gcc/ChangeLog:
>
> * doc/invoke.texi: Remove removed evrp-mode.
> ---
>   gcc/doc/invoke.texi | 3 ---
>   1 file changed, 3 deletions(-)
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index bde59ff0472..757775ea576 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15124,9 +15124,6 @@ statement during VRP.
>   @item evrp-sparse-threshold
>   Maximum number of basic blocks before EVRP uses a sparse cache.
>
> -@item evrp-mode
> -Specifies the mode Early VRP should operate in.
> -
>   @item vrp1-mode
>   Specifies the mode VRP pass 1 should operate in.
>
> --
> 2.36.1
>

Re: [PATCH] testsuite/102690: Only check warning for lp64 in Warray-bounds-16.C

2022-06-29 Thread Richard Biener via Gcc-patches

On Tue, 28 Jun 2022, Kito Cheng wrote:

> That warning won't happen on ilp32 targets, seems like Andrew Pinski
> already mention that[1] before.
> 
> Verified on riscv32-unknown-elf and riscv64-unknown-elf.
> 
> [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92879#c1

OK

> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/102690
>   * g++.dg/warn/Warray-bounds-16.C: XFAIL only on lp64 for the
>   warning.
> ---
>  gcc/testsuite/g++.dg/warn/Warray-bounds-16.C | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C 
> b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> index 89cbadb91c7..45a14b19ea3 100644
> --- a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> +++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> @@ -19,7 +19,7 @@ struct S
>  p = (int*) new unsigned char [sizeof (int) * m];
>  
>  for (int i = 0; i < m; i++)
> -  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-* } 
> } */
> +  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail lp64 } } 
> */
>}
>  };
>  
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstra

[PATCH] tree-optimization/106112 - fix CSE from wider operation

2022-06-29 Thread Richard Biener via Gcc-patches

The following fixes a mistake in looking up an extended operand
in the CSE of a truncated operation.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2022-06-28  Richard Biener  

PR tree-optimization/106112
* tree-ssa-sccvn.cc (valueized_wider_op): Properly extend
a constant operand according to its type.

* gcc.dg/torture/pr106112.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr106112.c | 16 
 gcc/tree-ssa-sccvn.cc   |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr106112.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr106112.c 
b/gcc/testsuite/gcc.dg/torture/pr106112.c
new file mode 100644
index 000..bd7f63c0935
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr106112.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+
+__INT32_TYPE__ a = 5, b, c, d;
+__UINT64_TYPE__ e = 20862985922;
+int main()
+{
+  __UINT32_TYPE__ f = 4294967292;
+  e = e | f;
+  c = -1 % ((~f ^ 4294967292) - (e - d));
+  b = ~-~e % ~-d;
+  if (b)
+a = 0;
+  if (a < 1)
+__builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 5214f142f52..9deedeac378 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -4983,7 +4983,7 @@ valueized_wider_op (tree wide_type, tree op, bool 
allow_truncate)
 
   /* For constants simply extend it.  */
   if (TREE_CODE (op) == INTEGER_CST)
-return wide_int_to_tree (wide_type, wi::to_wide (op));
+return wide_int_to_tree (wide_type, wi::to_widest (op));
 
   return NULL_TREE;
 }
-- 
2.35.3

[RFC] trailing_wide_ints with runtime variable lengths

2022-06-29 Thread Aldy Hernandez via Gcc-patches

Currently global ranges are stored in SSA_NAME_RANGE_INFO as a pair of
wide_int-like objects along with the nonzero bits.  We frequently lose
precision when streaming out our higher resolution iranges.  The plan
was always to store the full irange between passes.  However, as was
originally discussed eons ago:

https://gcc.gnu.org/pipermail/gcc-patches/2017-May/475139.html

...we need a memory efficient way of saving iranges, preferably using
the trailing_wide_ints idiom.

The problem with doing so is that trailing_wide_ints assume a
compile-time specified number of elements.  For irange, we need to
determine the size at run-time.

One solution is to adapt trailing_wide_ints such that N is the maximum
number of elements allowed, and allow setting the actual number at
run-time (defaulting to N).  The attached patch does this, while
requiring no changes to existing users.

It uses a byte to store the number of elements in the
trailing_wide_ints control word.  The control word is currently a
16-bit precision, an 8-bit max-length, and the rest is used for
m_len[N].  On a 64-bit architecture, this allows for 5 elements in
m_len without having to use an extra word.  With this patch, m_len[]
would be smaller by one byte (4) before consuming the padding.  This
shouldn't be a problem as the only users of trailing_wide_ints use N=2
for NUM_POLY_INT_COEFFS in aarch64, and N=3 for range_info_def.

For irange, my plan is to use one more word to fit a maximum of 12
elements (the above 4 plus 8 more).  This would allow for 6 pairs of
sub-ranges which would be more than adequate for our needs.  In
previous tests we found that 99% of ranges fit within 3-4 pairs.  More
precisely, this would allow for 5 pairs, plus the nonzero bits, plus a
spare wide-int for future development.

Ultimately this means that streaming an irange would consume one more
word than what we currently do for range_info_def.  IMO this is a nice
trade-off considering we started storing a slew of wide-ints directly
;-).

I'm not above rolling an altogether different approach, but would
prefer to use the existing trailing infrastructure since it's mostly
what we need.

Thoughts?

p.s. Tested and benchmarked on x86-64 Linux.  There was no discernible
performance change in our benchmark suite.

gcc/ChangeLog:

* wide-int.h (struct trailing_wide_ints): Add m_num_elements.
(trailing_wide_ints::set_precision): Add num_elements argument.
(trailing_wide_ints::extra_size): Same.
---
 gcc/wide-int.h | 42 +-
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index 8041b6104f9..f68ccf0a0c5 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -1373,10 +1373,13 @@ namespace wi
 : public int_traits  {};
 }
 
-/* An array of N wide_int-like objects that can be put at the end of
-   a variable-sized structure.  Use extra_size to calculate how many
-   bytes beyond the sizeof need to be allocated.  Use set_precision
-   to initialize the structure.  */
+/* A variable-lengthed array of wide_int-like objects that can be put
+   at the end of a variable-sized structure.  The number of objects is
+   at most N and can be set at runtime by using set_precision().
+
+   Use extra_size to calculate how many bytes beyond the
+   sizeof need to be allocated.  Use set_precision to initialize the
+   structure.  */
 template 
 struct GTY((user)) trailing_wide_ints
 {
@@ -1387,6 +1390,9 @@ private:
   /* The shared maximum length of each number.  */
   unsigned char m_max_len;
 
+  /* The number of elements.  */
+  unsigned char m_num_elements;
+
   /* The current length of each number.
  Avoid char array so the whole structure is not a typeless storage
  that will, in turn, turn off TBAA on gimple, trees and RTL.  */
@@ -1399,12 +1405,15 @@ private:
 public:
   typedef WIDE_INT_REF_FOR (trailing_wide_int_storage) const_reference;
 
-  void set_precision (unsigned int);
+  void set_precision (unsigned int precision, unsigned int num_elements = N);
   unsigned int get_precision () const { return m_precision; }
+  unsigned int num_elements () const { return m_num_elements; }
   trailing_wide_int operator [] (unsigned int);
   const_reference operator [] (unsigned int) const;
-  static size_t extra_size (unsigned int);
-  size_t extra_size () const { return extra_size (m_precision); }
+  static size_t extra_size (unsigned int precision,
+   unsigned int num_elements = N);
+  size_t extra_size () const { return extra_size (m_precision,
+ m_num_elements); }
 };
 
 inline trailing_wide_int_storage::
@@ -1457,11 +1466,14 @@ trailing_wide_int_storage::operator = (const T &x)
 }
 
 /* Initialize the structure and record that all elements have precision
-   PRECISION.  */
+   PRECISION.  NUM_ELEMENTS can be no more than N.  */
 template 
 inline void
-trailing_wide_ints ::set_precision (unsigned i

Re: [PATCH 1/2]AArch64 Add fallback case using sdot for usdot

2022-06-29 Thread Richard Biener via Gcc-patches

On Tue, Jun 28, 2022 at 5:54 PM Tamar Christina  wrote:
>
> > -Original Message-
> > From: Richard Biener 
> > Sent: Monday, June 27, 2022 7:10 AM
> > To: Tamar Christina 
> > Cc: Richard Sandiford ; Richard Earnshaw
> > ; nd ; gcc-
> > patc...@gcc.gnu.org; Marcus Shawcroft 
> > Subject: Re: [PATCH 1/2]AArch64 Add fallback case using sdot for usdot
> >
> > On Mon, Jun 27, 2022 at 7:25 AM Tamar Christina via Gcc-patches  > patc...@gcc.gnu.org> wrote:
> > >
> > > > -Original Message-
> > > > From: Richard Sandiford 
> > > > Sent: Thursday, June 16, 2022 7:54 PM
> > > > To: Tamar Christina 
> > > > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
> > > > ; Marcus Shawcroft
> > > > ; Kyrylo Tkachov
> > 
> > > > Subject: Re: [PATCH 1/2]AArch64 Add fallback case using sdot for
> > > > usdot
> > > >
> > > > Richard Sandiford via Gcc-patches  writes:
> > > > > Tamar Christina  writes:
> > > > >> Hi All,
> > > > >>
> > > > >> The usdot operation is common in video encoder and decoders
> > > > >> including some of the most widely used ones.
> > > > >>
> > > > >> This patch adds a +dotprod version of the optab as a fallback for
> > > > >> when you do have sdot but not usdot available.
> > > > >>
> > > > >> The fallback works by adding a bias to the unsigned argument to
> > > > >> convert it to a signed value and then correcting for the bias later 
> > > > >> on.
> > > > >>
> > > > >> Essentially it relies on (x - 128)y + 128y == xy where x is
> > > > >> unsigned and y is signed (assuming both are 8-bit values).
> > > > >> Because the range of a signed byte is only to 127 we split the bias
> > correction into:
> > > > >>
> > > > >>(x - 128)y + 127y + y
> > > > >
> > > > > I bet you knew this question was coming, but: this technique isn't
> > > > > target-specific, so wouldn't it be better to handle it in
> > > > > tree-vect-patterns.cc instead?
> > >
> > > Ok, so after many hours of trying I don't know how to make this work.
> > > DOT_PROD_EXPR is a reduction, but emitting them as additional pattern
> > > statement doesn't work because they'll be marked as internal_def
> > > rather than reduction_def.  I tried marking the new vec_stmt_info that
> > > I create explicitly as reduction_def but this gets overwritten during 
> > > analysis.
> > >
> > > I then looked into getting it as a vectorizable_operation but has this
> > > obvious problems In that it no longer treats it as a reduction and so 
> > > tries to
> > decompose into hi/lo.
> > >
> > > I then looked into treating additional patterns from  a reduction as
> > > reductions themselves but this is obviously wrong as non-reduction
> > statements also get marked as reductions.
> > >
> > > The conclusion is that I don't think the vectorizer allows additional
> > > reductions to be emitted from patterns.
> >
> > Indeed.  DOT_PROD is a weird beast and it doesn't define which lanes are
> > reduced to which so it's only usable when the result is reduced to a single
> > lane.
> >
> > An SLP pattern might work if you use reduc-plus for the reduced lanes and
> > keep the multiply separate?
>
> Unfortunately I can't seem to get it to handle the reduction in SLP.  It 
> seems to always
> use the non-SLP aware loop vectorizer here.  The suggested unroll factor is 
> always 1 and
> even trying to force it gets it to bail out later, presumable because it's 
> reducing into a
> scalar that's used outside the loop?

Yes, it possibly needs 1-lane SLP support.

> Thanks,
> Tamar
>
> >
> > Richard.
> >
> > > > Also, how about doing (x - 128)y + 64y + 64y instead, to reduce the
> > > > number of hoisted constants?
> > > >
> > > > Thanks,
> > > > Richard
> > > >
> > > > > Thanks,
> > > > > Richard
> > > > >
> > > > >> Concretely for:
> > > > >>
> > > > >> #define N 480
> > > > >> #define SIGNEDNESS_1 unsigned
> > > > >> #define SIGNEDNESS_2 signed
> > > > >> #define SIGNEDNESS_3 signed
> > > > >> #define SIGNEDNESS_4 unsigned
> > > > >>
> > > > >> SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res,
> > > > >> SIGNEDNESS_3 char *restrict a,
> > > > >>SIGNEDNESS_4 char *restrict b) {
> > > > >>   for (__INTPTR_TYPE__ i = 0; i < N; ++i)
> > > > >> {
> > > > >>   int av = a[i];
> > > > >>   int bv = b[i];
> > > > >>   SIGNEDNESS_2 short mult = av * bv;
> > > > >>   res += mult;
> > > > >> }
> > > > >>   return res;
> > > > >> }
> > > > >>
> > > > >> we generate:
> > > > >>
> > > > >> moviv5.16b, 0x7f
> > > > >> mov x3, 0
> > > > >> moviv4.16b, 0x1
> > > > >> moviv3.16b, 0xff80
> > > > >> moviv0.4s, 0
> > > > >> .L2:
> > > > >> ldr q2, [x2, x3]
> > > > >> ldr q1, [x1, x3]
> > > > >> add x3, x3, 16
> > > > >> sub v2.16b, v2.16b, v3.16b
> > > > >> sdotv0.4s, v2.16b, v1.16b
> > > > >> sdotv0.4s, v5.16b, v1.16b
> > > > >> sdotv0.4s, v4.16b, v1.16b
> > > > >> cmp x3,

Re: [PATCH] lto: pass -pthread to AM_LDFLAGS [PR 106118]

2022-06-29 Thread Richard Biener via Gcc-patches

On Tue, Jun 28, 2022 at 5:19 PM Pekka Seppänen  wrote:
>
> Move -pthread from configure.ac to Makefile.in so that it is passed to
> AM_LDFLAGS.

OK

> lto-plugin/ChangeLog:
>
> * configure.ac: Move -pthread from here...
> * Makefile.am: ...to here.
> * configure: Regenerate.
> * Makefile.in: Likewise.
> ---
>   lto-plugin/Makefile.am  | 3 ++-
>   lto-plugin/configure.ac | 3 ---
>   2 files changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/lto-plugin/Makefile.am b/lto-plugin/Makefile.am
> index a96acc87ee2..81362eafc36 100644
> --- a/lto-plugin/Makefile.am
> +++ b/lto-plugin/Makefile.am
> @@ -9,7 +9,8 @@ libexecsubdir :=
> $(libexecdir)/gcc/$(real_target_noncanonical)/$(gcc_version)$(a
>
>   AM_CPPFLAGS = -I$(top_srcdir)/../include $(DEFS)
>   AM_CFLAGS = @ac_lto_plugin_warn_cflags@ $(CET_HOST_FLAGS)
> -AM_LDFLAGS = @ac_lto_plugin_ldflags@
> +# The plug-in depends on pthreads.
> +AM_LDFLAGS = -pthread @ac_lto_plugin_ldflags@
>   AM_LIBTOOLFLAGS = --tag=disable-static
>   override CFLAGS := $(filter-out -fsanitize=address
> -fsanitize=hwaddress,$(CFLAGS))
>   override LDFLAGS := $(filter-out -fsanitize=address
> -fsanitize=hwaddress,$(LDFLAGS))
> diff --git a/lto-plugin/configure.ac b/lto-plugin/configure.ac
> index 75cf46ac5c7..c2ec512880f 100644
> --- a/lto-plugin/configure.ac
> +++ b/lto-plugin/configure.ac
> @@ -13,9 +13,6 @@ AC_PROG_CC
>   AC_SYS_LARGEFILE
>   ACX_PROG_CC_WARNING_OPTS([-Wall], [ac_lto_plugin_warn_cflags])
>
> -# The plug-in depends on pthreads
> -LDFLAGS="-pthread"
> -
>   # Check whether -static-libgcc is supported.
>   saved_LDFLAGS="$LDFLAGS"
>   LDFLAGS="$LDFLAGS -static-libgcc"
> --
> 2.34.1

Re: [committed] d: Add SIMD intrinsics module and compiler built-ins.

2022-06-29 Thread Andreas Schwab via Gcc-patches

make[3]: Entering directory '/opt/gcc/gcc-20220629/Build/gcc'
/opt/gcc/gcc-20220629/Build/./prev-gcc/xg++ 
-B/opt/gcc/gcc-20220629/Build/./prev-gcc/ -B/usr/aarch64-suse-linux/bin/ 
-nostdinc++ 
-B/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/src/.libs 
-B/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/libsupc++/.libs
  
-I/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/include/aarch64-suse-linux
  -I/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/include  
-I/opt/gcc/gcc-20220629/libstdc++-v3/libsupc++ 
-L/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/src/.libs 
-L/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/libsupc++/.libs
  -fno-PIE -c  -DIN_GCC_FRONTEND -g -O2 -fno-checking -gtoggle -DIN_GCC 
-fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing 
-Wwrite-strings -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual 
-pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror 
-fno-common  -DHAVE_CONFIG_H -I. -Id -I../../gcc -I../../gcc/d 
-I../../gcc/../include -I../../gcc/../libcpp/include -I../../gcc/../libcody  
-I../../gcc/../libdecnumber -I../../gcc/../libdecnumber/bid -I../libdecnumber 
-I../../gcc/../libbacktrace   -o d/intrinsics.o -MT d/intrinsics.o -MMD -MP -MF 
d/.deps/intrinsics.TPo ../../gcc/d/intrinsics.cc
../../gcc/d/intrinsics.cc: In function 'tree_node* 
build_shuffle_mask_type(tree)':
../../gcc/d/intrinsics.cc:279:42: error: 'nunits' may be used uninitialized 
[-Werror=maybe-uninitialized]
  279 |   return build_ctype (TypeVector::create (t->sarrayOf (nunits)));
  |   ~~~^~
../../gcc/d/intrinsics.cc:276:26: note: 'nunits' was declared here
  276 |   unsigned HOST_WIDE_INT nunits;
  |  ^~
In file included from ../../gcc/d/intrinsics.cc:19:
../../gcc/system.h: In function 'tree_node* 
expand_intrinsic_vec_shufflevector(tree)':
../../gcc/system.h:396:29: error: 'v1elems' may be used uninitialized 
[-Werror=maybe-uninitialized]
  396 | #define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
  | ^
../../gcc/d/intrinsics.cc:1193:35: note: 'v1elems' was declared here
 1193 |   unsigned HOST_WIDE_INT v0elems, v1elems;
  |   ^~~
../../gcc/d/intrinsics.cc:1193:26: error: 'v0elems' may be used uninitialized 
[-Werror=maybe-uninitialized]
 1193 |   unsigned HOST_WIDE_INT v0elems, v1elems;
  |  ^~~
cc1plus: all warnings being treated as errors
make[3]: *** [Makefile:1146: d/intrinsics.o] Error 1
make[3]: Leaving directory '/opt/gcc/gcc-20220629/Build/gcc'

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [committed] openmp: Add support for HBW or large capacity or interleaved memory through the libmemkind.so library

2022-06-29 Thread Jakub Jelinek via Gcc-patches

On Tue, Jun 28, 2022 at 10:29:53PM +0100, Andrew Stubbs wrote:
> On 09/06/2022 09:19, Jakub Jelinek via Gcc-patches wrote:
> > +  switch (memspace)
> > +{
> > +case omp_high_bw_mem_space:
> > +#ifdef LIBGOMP_USE_MEMKIND
> > +  struct gomp_memkind_data *memkind_data;
> > +  memkind_data = gomp_get_memkind ();
> > +  if (data.partition == omp_atv_interleaved
> > + && memkind_data->kinds[GOMP_MEMKIND_HBW_INTERLEAVE])
> > +   {
> > + data.memkind = GOMP_MEMKIND_HBW_INTERLEAVE;
> > + break;
> > +   }
> > +  else if (memkind_data->kinds[GOMP_MEMKIND_HBW_PREFERRED])
> > +   {
> > + data.memkind = GOMP_MEMKIND_HBW_PREFERRED;
> > + break;
> > +   }
> > +#endif
> > +  return omp_null_allocator;
> > +case omp_large_cap_mem_space:
> > +#ifdef LIBGOMP_USE_MEMKIND
> > +  memkind_data = gomp_get_memkind ();
> > +  if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM_ALL])
> > +   data.memkind = GOMP_MEMKIND_DAX_KMEM_ALL;
> > +  else if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM])
> > +   data.memkind = GOMP_MEMKIND_DAX_KMEM;
> > +#endif
> > +  break;
> > +default:
> > +#ifdef LIBGOMP_USE_MEMKIND
> > +  if (data.partition == omp_atv_interleaved)
> > +   {
> > + memkind_data = gomp_get_memkind ();
> > + if (memkind_data->kinds[GOMP_MEMKIND_INTERLEAVE])
> > +   data.memkind = GOMP_MEMKIND_INTERLEAVE;
> > +   }
> > +#endif
> > +  break;
> > +}
> > +
> 
> This conflicts with mine and Abid's patches to implement the device
> allocators and host unified shared memory via the overridable
> "MEMSPACE_ALLOC" hooks. I can still use those for the "else" case, but
> they'll be inactive on any configuration where libmemkind exists. That's
> fine for the device code, and may be OK for USM (given that libmemkind won't
> have an option for that). There's a problem for the NVidia-specific
> host-memory pinning I have planned though.
> 
> How do you propose we resolve this conflict, please?
> 
> Ideally it will be in such a way that the conditional is resolved at compile
> time and the routine can be inlined (so no fake-OO function pointers in
> structs, I think).

memkind isn't used just because it exists, but because it supports some
requested trait that isn't otherwise supported.
Using callbacks instead of selecting the allocator to call using ifs is
possible if all those callbacks have the same arguments or if we add enough
dummy arguments to the callbacks such that some wrappers can handle it all.

Right now, memkind is used if ->memkind in the allocator data isn't
GOMP_MEMKIND_NONE, so when it is, you can certainly call some callback
(performance question might be whether to call it in the else case always
using callback or whether to call malloc etc. directly if callback is NULL).

And omp_init_allocator needs to decide what to do if one asks for features
that need memkind as well as for features that need whatever you/Abid have
been working on.  A possible resolution is punt (return omp_null_allocator),
or prefer one feature over the other one or vice versa.
>From the features currently handled by memkind, even before my changes for
consistency with libomp from llvm omp_high_bw_mem_space was considered a
hard request, ditto omp_atk_pinned omp_atv_true, but the rest was just taken
as optimization hint.

Jakub

Re: [PATCH] if-to-switch: Don't skip the first condition bb when find_conditions in if-to-switch [PR105740]

2022-06-29 Thread Martin Liška


On 6/21/22 14:08, Richard Biener via Gcc-patches wrote:

On Tue, Jun 21, 2022 at 12:05 PM Xionghu Luo  wrote:




On 2022/6/21 15:33, Richard Biener via Gcc-patches wrote:

On Tue, Jun 21, 2022 at 5:06 AM xionghuluo(罗雄虎) via Gcc-patches
 wrote:



Bootstrap and regression tested pass on x86_64-linux-gnu, OK for master?


OK if you add a comment that an empty conditions_in_bbs indicates we are
processing the first basic-block (that's not obvious to me).




Thanks.  Committed in  r13-1184, I assume this doesn't need backport?


No, it's not a regression.

Richard.


Thanks,
Xionghu


Caused https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106126

Martin

[PATCH] sanitizer: Fix hwasan related option conflicts [PR106132]

2022-06-29 Thread Martin Liška


Split report_conflicting_sanitizer_options(..., SANITIZE_ADDRESS | 
SANITIZE_HWADDRESS)
call into 2 calls as we don't have any option that would be
address+hwaddress (that conflicts as well).

PR sanitizer/106132

gcc/ChangeLog:

* opts.cc (finish_options): Use 2 calls to
report_conflicting_sanitizer_options.

gcc/testsuite/ChangeLog:

* c-c++-common/hwasan/arguments-3.c: Cover new ICE.
---
 gcc/opts.cc | 4 +++-
 gcc/testsuite/c-c++-common/hwasan/arguments-3.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/opts.cc b/gcc/opts.cc
index fe0293e4283..54e57f36755 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -1214,7 +1214,9 @@ finish_options (struct gcc_options *opts, struct 
gcc_options *opts_set,
 
   /* Address sanitizers conflict with the thread sanitizer.  */

   report_conflicting_sanitizer_options (opts, loc, SANITIZE_THREAD,
-   SANITIZE_ADDRESS | SANITIZE_HWADDRESS);
+   SANITIZE_ADDRESS);
+  report_conflicting_sanitizer_options (opts, loc, SANITIZE_THREAD,
+   SANITIZE_HWADDRESS);
   /* The leak sanitizer conflicts with the thread sanitizer.  */
   report_conflicting_sanitizer_options (opts, loc, SANITIZE_LEAK,
SANITIZE_THREAD);
diff --git a/gcc/testsuite/c-c++-common/hwasan/arguments-3.c 
b/gcc/testsuite/c-c++-common/hwasan/arguments-3.c
index 6e907b46b3b..2bf8917355b 100644
--- a/gcc/testsuite/c-c++-common/hwasan/arguments-3.c
+++ b/gcc/testsuite/c-c++-common/hwasan/arguments-3.c
@@ -1,3 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-fsanitize=thread" } */
+/* { dg-additional-options "-fsanitize=thread,address" } */
+/* { dg-error ".*'-fsanitize=thread' is incompatible with '-fsanitize=address'.*" 
"" { target *-*-* } 0 } */
 /* { dg-error ".*'-fsanitize=thread' is incompatible with '-fsanitize=hwaddress'.*" 
"" { target *-*-* } 0 } */
+/* { dg-error ".*'-fsanitize=hwaddress' is incompatible with '-fsanitize=address'.*" 
"" { target *-*-* } 0 } */
--
2.36.1

[PATCH] libgfortran: Fix up LIBGFOR_CHECK_FLOAT128 [PR106137]

2022-06-29 Thread Jakub Jelinek via Gcc-patches

Hi!

My recent gfortran + libgfortran patch apparently broke (some?) aarch64
builds.  While it is desirable to use just _Float128 rather than __float128,
we only want to use it (and e.g. define HAVE_FLOAT128) on targets where
_Float128 is supported and long double isn't IEEE quad precision.
Which is targets that support __float128 type which we have been testing
for before - _Float128 is supported on those targets and on targets where
long double is IEEE quad precision.

So, the following patch restores check for whether __float128 is supported
into the LIBGFOR_CHECK_FLOAT128 check which determines whether
HAVE_FLOAT128 is defined or whether to use libquadmath, so that e.g. on
aarch64 where long double is IEEE quad we don't do that.

Tested by Tamar on aarch64 and by me on x86_64-linux, ok for trunk?

2022-06-29  Jakub Jelinek  

PR bootstrap/106137
* acinclude.m4 (LIBGFOR_CHECK_FLOAT128): Also test for __float128.
* configure: Regenerated.

--- libgfortran/acinclude.m4.jj 2022-06-28 13:14:45.327799267 +0200
+++ libgfortran/acinclude.m42022-06-29 11:45:19.286551469 +0200
@@ -276,7 +276,6 @@ AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
GCC_TRY_COMPILE_OR_LINK([
 _Float128 foo (_Float128 x)
 {
-
  _Complex _Float128 z1, z2;
 
  z1 = x;
@@ -290,11 +289,18 @@ AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
 {
   return x * __builtin_huge_valf128 ();
 }
+
+__float128 baz (__float128 x)
+{
+  return x * __builtin_huge_valf128 ();
+}
   ],[
 foo (1.2F128);
 bar (1.2F128);
+baz (1.2F128);
 foo (1.2Q);
 bar (1.2Q);
+baz (1.2Q);
   ],[
 libgfor_cv_have_float128=yes
   ],[
--- libgfortran/configure.jj2022-06-28 13:14:45.331799215 +0200
+++ libgfortran/configure   2022-06-29 11:45:49.951148846 +0200
@@ -30130,7 +30130,6 @@ else
 
 _Float128 foo (_Float128 x)
 {
-
  _Complex _Float128 z1, z2;
 
  z1 = x;
@@ -30145,14 +30144,21 @@ else
   return x * __builtin_huge_valf128 ();
 }
 
+__float128 baz (__float128 x)
+{
+  return x * __builtin_huge_valf128 ();
+}
+
 int
 main ()
 {
 
 foo (1.2F128);
 bar (1.2F128);
+baz (1.2F128);
 foo (1.2Q);
 bar (1.2Q);
+baz (1.2Q);
 
   ;
   return 0;
@@ -30177,7 +30183,6 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_
 
 _Float128 foo (_Float128 x)
 {
-
  _Complex _Float128 z1, z2;
 
  z1 = x;
@@ -30192,14 +30197,21 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_
   return x * __builtin_huge_valf128 ();
 }
 
+__float128 baz (__float128 x)
+{
+  return x * __builtin_huge_valf128 ();
+}
+
 int
 main ()
 {
 
 foo (1.2F128);
 bar (1.2F128);
+baz (1.2F128);
 foo (1.2Q);
 bar (1.2Q);
+baz (1.2Q);
 
   ;
   return 0;

Jakub

[PATCH] libgfortran: Switch some more __float128 uses to _Float128

2022-06-29 Thread Jakub Jelinek via Gcc-patches

Hi!

My patch apparently left some __float128 uses in libgfortran
that could use _Float128 instead, the following patch changes that.

Ok for trunk?

2022-06-29  Jakub Jelinek  

* mk-kinds-h.sh: Change __float128 to _Float128 in a comment.
* acinclude.m4 (LIBGFOR_CHECK_FLOAT128): Adjust comment.
(LIBGFOR_CHECK_MATH_IEEE128): Use _Float128 instead of __float128.
* libgfortran.h (isnan): Change __float128 to _Float128 in a comment.
(__acoshieee128, __acosieee128, __asinhieee128, __asinieee128,
__atan2ieee128, __atanhieee128, __atanieee128, __copysignieee128,
__coshieee128, __cosieee128, __erfcieee128, __erfieee128,
__expieee128, __fabsieee128, __fmaieee128, __fmodieee128, __jnieee128,
__log10ieee128, __logieee128, __powieee128, __sinhieee128,
__sinieee128, __sqrtieee128, __tanhieee128, __tanieee128,
__ynieee128, __strtoieee128): Use _Float128 instead of __float128.
* configure: Regenerated.

--- libgfortran/mk-kinds-h.sh.jj2022-06-28 13:14:45.334799175 +0200
+++ libgfortran/mk-kinds-h.sh   2022-06-29 14:01:32.935361103 +0200
@@ -67,7 +67,7 @@ for k in $possible_real_kinds; do
   8) ctype="double" ; cplxtype="complex double" ; suffix="" ;;
   # If we have a REAL(KIND=10), it is always long double
   10) ctype="long double" ; cplxtype="complex long double" ; suffix="l" ;;
-  # If we have a REAL(KIND=16), it is either long double or __float128
+  # If we have a REAL(KIND=16), it is either long double or _Float128
   16) if [ $long_double_kind -ne 16 ]; then
ctype="_Float128"
cplxtype="_Complex _Float128"
--- libgfortran/acinclude.m4.jj 2022-06-29 11:45:19.286551469 +0200
+++ libgfortran/acinclude.m42022-06-29 14:00:22.964279364 +0200
@@ -261,7 +261,7 @@ __mingw_snprintf (NULL, 0, "%d\n", 1);
   fi
 ])
 
-dnl Check whether we have a __float128 type
+dnl Check whether we have a __float128 and _Float128 type
 AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
   LIBQUADSPEC=
   LIBQUADLIB=
@@ -537,8 +537,8 @@ AC_DEFUN([LIBGFOR_CHECK_MATH_IEEE128],
   AC_REQUIRE([GCC_CHECK_MATH_HEADERS])
   AC_CACHE_CHECK([for $1], [gcc_cv_math_func_$1],
 [AC_LINK_IFELSE([AC_LANG_SOURCE([
-__float128 $1 (__float128);
-__float128 (*ptr)(__float128) = $1;
+_Float128 $1 (_Float128);
+_Float128 (*ptr)(_Float128) = $1;
 
 int
 main ()
--- libgfortran/libgfortran.h.jj2022-06-28 13:14:45.333799188 +0200
+++ libgfortran/libgfortran.h   2022-06-29 14:01:14.364604813 +0200
@@ -247,7 +247,7 @@ extern int __mingw_snprintf (char *, siz
 
Another advantage for GCC's builtins for these type-generic macros
is that it handles floating-point types that the system headers
-   may not support (like __float128).  */
+   may not support (like _Float128).  */
 
 #undef isnan
 #define isnan(x) __builtin_isnan(x)
@@ -1960,59 +1960,59 @@ internal_proto(cshift1_16_c17);
 
 /* Prototypes for the POWER __ieee128 functions.  */
 #ifdef POWER_IEEE128
-extern __float128 __acoshieee128 (__float128)
+extern _Float128 __acoshieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __acosieee128 (__float128)
+extern _Float128 __acosieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __asinhieee128 (__float128)
+extern _Float128 __asinhieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __asinieee128 (__float128)
+extern _Float128 __asinieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __atan2ieee128 (__float128)
+extern _Float128 __atan2ieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __atanhieee128 (__float128)
+extern _Float128 __atanhieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __atanieee128 (__float128)
+extern _Float128 __atanieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __copysignieee128 (__float128, __float128)
+extern _Float128 __copysignieee128 (_Float128, _Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __coshieee128 (__float128)
+extern _Float128 __coshieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __cosieee128 (__float128)
+extern _Float128 __cosieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __erfcieee128 (__float128)
+extern _Float128 __erfcieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __erfieee128 (__float128)
+extern _Float128 __erfieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __expieee128 (__float128)
+extern _Float128 __expieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __fabsieee128 (__float128)
+extern _Float128 __fabsieee128 (_Float128)
   __attribute__ ((__nothrow__, __leaf__));
-extern __float128 __fmaieee128 (__float128, __float128, __float128)

Re: [committed] openmp: Add support for HBW or large capacity or interleaved memory through the libmemkind.so library

2022-06-29 Thread Andrew Stubbs


On 29/06/2022 11:45, Jakub Jelinek wrote:

And omp_init_allocator needs to decide what to do if one asks for features
that need memkind as well as for features that need whatever you/Abid have
been working on.  A possible resolution is punt (return omp_null_allocator),
or prefer one feature over the other one or vice versa.
 From the features currently handled by memkind, even before my changes for
consistency with libomp from llvm omp_high_bw_mem_space was considered a
hard request, ditto omp_atk_pinned omp_atv_true, but the rest was just taken
as optimization hint.


Right now I'm rebasing our patches so that they build and pass our tests 
again.


I don't know what to do if someone requests pinned high-bandwidth 
memory. I don't even know if that's a thing that can exist. Right now, 
as I have my dev branch, the high-bandwidth will take precedence simply 
because memkind is checked first. I presume that if memkind ever grew 
the ability to allocate pinned memory you would still have the choose 
one "kind" or the other.


Andrew

[PATCH] aarch64: Move vreinterpret definitions into the compiler

2022-06-29 Thread Andrew Carlotti via Gcc-patches

Hi,

This removes a significant number of intrinsic definitions from the arm_neon.h
header file, and reduces the amount of code duplication. The new macros and
data structures are intended to also facilitate moving other intrinsic
definitions out of the header file in future.

There is a a slight change in the behaviour of the bf16 vreinterpret intrinsics
when compiling without bf16 support. Expressions like:

b = vreinterpretq_s32_bf16(vreinterpretq_bf16_s64(a));

are now compiled successfully, instead of causing a 'target specific option
mismatch' during inlining.

Bootstrapped and tested on aarch64-none-linux-gnu

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(v1di_UP): Add V1DI mode.
(MODE_d_bf16, MODE_d_f16, MODE_d_f32, MODE_d_f64, MODE_d_s8)
(MODE_d_s16, MODE_d_s32, MODE_d_s64, MODE_d_u8, MODE_d_u16)
(MODE_d_u32, MODE_d_u64, MODE_d_p8, MODE_d_p16, MODE_d_p64)
(MODE_q_bf16, MODE_q_f16, MODE_q_f32, MODE_q_f64, MODE_q_s8)
(MODE_q_s16, MODE_q_s32, MODE_q_s64, MODE_q_u8, MODE_q_u16)
(MODE_q_u32, MODE_q_u64, MODE_q_p8, MODE_q_p16, MODE_q_p64)
(MODE_q_p128): Define macro to map to corresponding mode name.
(QUAL_bf16, QUAL_f16, QUAL_f32, QUAL_f64, QUAL_s8, QUAL_s16)
(QUAL_s32, QUAL_s64, QUAL_u8, QUAL_u16, QUAL_u32, QUAL_u64)
(QUAL_p8, QUAL_p16, QUAL_p64, QUAL_p128): Define macro to map to
corresponding qualifier name.
(LENGTH_d, LENGTH_q): Define macro to map to "" or "q" suffix.
(SIMD_INTR_MODE, SIMD_INTR_QUAL, SIMD_INTR_LENGTH_CHAR): Macro
functions for the above mappings
(VREINTERPRET_BUILTIN2, VREINTERPRET_BUILTINS1, VREINTERPRET_BUILTINS)
(VREINTERPRETQ_BUILTIN2, VREINTERPRETQ_BUILTINS1)
(VREINTERPRETQ_BUILTINS, VREINTERPRET_BUILTIN)
(AARCH64_SIMD_VREINTERPRET_BUILTINS): New macros to create definitions
for all vreinterpret intrinsics
(enum aarch64_builtins): Add vreinterpret function codes
(aarch64_init_simd_intrinsics): New
(handle_arm_neon_h): Improved comment.
(aarch64_general_fold_builtin): Fold vreinterpret calls
* config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode
* config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode
* config/aarch64/aarch64-simd.md (vec_extractv2div1di): New
* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Add V1DI 
mode
* config/aarch64/arm_neon.h
(vreinterpret_p8_f16, vreinterpret_p8_f64, vreinterpret_p8_s8)
(vreinterpret_p8_s16, vreinterpret_p8_s32, vreinterpret_p8_s64)
(vreinterpret_p8_f32, vreinterpret_p8_u8, vreinterpret_p8_u16)
(vreinterpret_p8_u32, vreinterpret_p8_u64, vreinterpret_p8_p16)
(vreinterpret_p8_p64, vreinterpretq_p8_f64, vreinterpretq_p8_s8)
(vreinterpretq_p8_s16, vreinterpretq_p8_s32, vreinterpretq_p8_s64)
(vreinterpretq_p8_f16, vreinterpretq_p8_f32, vreinterpretq_p8_u8)
(vreinterpretq_p8_u16, vreinterpretq_p8_u32, vreinterpretq_p8_u64)
(vreinterpretq_p8_p16, vreinterpretq_p8_p64, vreinterpretq_p8_p128)
(vreinterpret_p16_f16, vreinterpret_p16_f64, vreinterpret_p16_s8)
(vreinterpret_p16_s16, vreinterpret_p16_s32, vreinterpret_p16_s64)
(vreinterpret_p16_f32, vreinterpret_p16_u8, vreinterpret_p16_u16)
(vreinterpret_p16_u32, vreinterpret_p16_u64, vreinterpret_p16_p8)
(vreinterpret_p16_p64, vreinterpretq_p16_f64, vreinterpretq_p16_s8)
(vreinterpretq_p16_s16, vreinterpretq_p16_s32, vreinterpretq_p16_s64)
(vreinterpretq_p16_f16, vreinterpretq_p16_f32, vreinterpretq_p16_u8)
(vreinterpretq_p16_u16, vreinterpretq_p16_u32, vreinterpretq_p16_u64)
(vreinterpretq_p16_p8, vreinterpretq_p16_p64, vreinterpretq_p16_p128)
(vreinterpret_p64_f16, vreinterpret_p64_f64, vreinterpret_p64_s8)
(vreinterpret_p64_s16, vreinterpret_p64_s32, vreinterpret_p64_s64)
(vreinterpret_p64_f32, vreinterpret_p64_u8, vreinterpret_p64_u16)
(vreinterpret_p64_u32, vreinterpret_p64_u64, vreinterpret_p64_p8)
(vreinterpret_p64_p16, vreinterpretq_p64_f64, vreinterpretq_p64_s8)
(vreinterpretq_p64_s16, vreinterpretq_p64_s32, vreinterpretq_p64_s64)
(vreinterpretq_p64_f16, vreinterpretq_p64_f32, vreinterpretq_p64_p128)
(vreinterpretq_p64_u8, vreinterpretq_p64_u16, vreinterpretq_p64_p16)
(vreinterpretq_p64_u32, vreinterpretq_p64_u64, vreinterpretq_p64_p8)
(vreinterpretq_p128_p8, vreinterpretq_p128_p16, vreinterpretq_p128_f16)
(vreinterpretq_p128_f32, vreinterpretq_p128_p64, vreinterpretq_p128_s64)
(vreinterpretq_p128_u64, vreinterpretq_p128_s8, vreinterpretq_p128_s16)
(vreinterpretq_p128_s32, vreinterpretq_p128_u8, vreinterpretq_p128_u16)
(vreinterpretq_p128_u32, vreinterpret_f16_f64, vreinterpret_f16_s8)
(vreinterpret_f16_s16): (vreinterpret_f16_s32): (vreinterpret_f16_s64):

[PATCH] libsanitizer: cherry-pick 791e0d1bc85d

2022-06-29 Thread Martin Liška


Pushed as it's only cherry-pick that fixes the following
rpmlint issue:

libtsan2.s390x: E: executable-stack (Badness: 1) /usr/lib64/libtsan.so.2.0.0

I'm going to take it also to gcc-12 branch.

Cheers,
Martin

791e0d1bc85d: [compiler-rt] Add NO_EXEC_STACK_DIRECTIVE on s390x
---
 libsanitizer/tsan/tsan_rtl_s390x.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libsanitizer/tsan/tsan_rtl_s390x.S 
b/libsanitizer/tsan/tsan_rtl_s390x.S
index fcff35fbc7e..2f445e8f1b2 100644
--- a/libsanitizer/tsan/tsan_rtl_s390x.S
+++ b/libsanitizer/tsan/tsan_rtl_s390x.S
@@ -45,3 +45,5 @@ intercept setjmp, _ZN14__interception11real_setjmpE
 intercept _setjmp, _ZN14__interception12real__setjmpE
 intercept sigsetjmp, _ZN14__interception14real_sigsetjmpE
 intercept __sigsetjmp, _ZN14__interception16real___sigsetjmpE
+
+NO_EXEC_STACK_DIRECTIVE
--
2.36.1

Re: [PATCH] libgccjit: Fix bug where unary_op will return an integer type instead of the correct type

2022-06-29 Thread Antoni Boucher via Gcc-patches

It fails with the following error:

gcc/gcc/testsuite/jit.dg/test-asm.cc: In function 'void
verify_code_2(gcc_jit_context*, gcc_jit_result*)':
gcc/gcc/testsuite/jit.dg/test-asm.cc:160:11: error: ISO C++ forbids
declaration of 'uint32_t' with no type [-fpermissive]
gcc/gcc/testsuite/jit.dg/test-asm.cc:160:11: error: typedef 'uint32_t'
is initialized (use 'decltype' instead)

Are you OK with me adding the stdint.h header?

On Tue, 2022-06-28 at 18:40 -0400, David Malcolm wrote:
> On Wed, 2022-06-01 at 22:45 -0400, Antoni Boucher via Gcc-patches
> wrote:
> > Also, the test gcc/testsuite/jit.dg/test-asm.cc fails and would
> > need
> > this line:
> > 
> > #include 
> 
> I'm curious; how is it failing?
> 
> > 
> > Is this okay if I add it in this patch?
> > 
> > On Wed, 2022-06-01 at 22:13 -0400, Antoni Boucher wrote:
> > > Hi.
> > > The attached patch fix bug 105812:
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105812
> > > 
> > > I'm having an issue where contrib/check_GNU_style.sh doesn't seem
> > > to
> > > work, i.e. it doesn't seem to do any checking.
> > > Is there a new way to do that or am I missing something?
> > > 
> > > Thanks for the review.
> > 
> 
>

Re: [PATCH] libgccjit: Fix bug where unary_op will return an integer type instead of the correct type

2022-06-29 Thread David Malcolm via Gcc-patches

On Wed, 2022-06-29 at 09:44 -0400, Antoni Boucher wrote:
> It fails with the following error:
> 
> gcc/gcc/testsuite/jit.dg/test-asm.cc: In function 'void
> verify_code_2(gcc_jit_context*, gcc_jit_result*)':
> gcc/gcc/testsuite/jit.dg/test-asm.cc:160:11: error: ISO C++ forbids
> declaration of 'uint32_t' with no type [-fpermissive]
> gcc/gcc/testsuite/jit.dg/test-asm.cc:160:11: error: typedef
> 'uint32_t'
> is initialized (use 'decltype' instead)

Aha - I didn't notice the use of uint32_t there.
> 
> Are you OK with me adding the stdint.h header?

Yes, thanks.

Dave

> 
> On Tue, 2022-06-28 at 18:40 -0400, David Malcolm wrote:
> > On Wed, 2022-06-01 at 22:45 -0400, Antoni Boucher via Gcc-patches
> > wrote:
> > > Also, the test gcc/testsuite/jit.dg/test-asm.cc fails and would
> > > need
> > > this line:
> > > 
> > > #include 
> > 
> > I'm curious; how is it failing?
> > 
> > > 
> > > Is this okay if I add it in this patch?
> > > 
> > > On Wed, 2022-06-01 at 22:13 -0400, Antoni Boucher wrote:
> > > > Hi.
> > > > The attached patch fix bug 105812:
> > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105812
> > > > 
> > > > I'm having an issue where contrib/check_GNU_style.sh doesn't
> > > > seem
> > > > to
> > > > work, i.e. it doesn't seem to do any checking.
> > > > Is there a new way to do that or am I missing something?
> > > > 
> > > > Thanks for the review.
> > > 
> > 
> > 
>

Re: [Patch][v4] OpenMP: Move omp requires checks to libgomp

2022-06-29 Thread Tobias Burnus


Hi Jakub, hi all,

new version attached. It now checks during lto1 whether the values are
consistent – and fails with a hard error.

The actually used value (by libgomp) is stored as a scalar weak symbol –
while for checking, each translation unit stores the integer value for
lto (alongside the offload table). This is both used for checking and in
lto1 (device + host lto1), to restore the value of 'omp_requires_mask'
for further use. – Currently, it is only used on the host to make the
value available to libgomp. – However, a device lto1 could also use it.
(Usage: cf. Andrew's USM gcn patch.)

Unchanged from previous version, libgomp outputs an warning/note if a
device could be found but the requires prevented libgomp from using it.
This message is also shown with -foffload=disable but it is not shown
for OMP_TARGET_OFFLOAD=disable.

Other change is that API calls no longer count as relevant for 'omp
requires' – such that compilation units which only contain those will
not output anything (independent whether there is an 'omp requires' or not.)

On 09.06.22 16:19, Jakub Jelinek wrote:

On Thu, Jun 09, 2022 at 02:46:34PM +0200, Tobias Burnus wrote:

On 09.06.22 13:40, Jakub Jelinek via Gcc-patches wrote:

If it is from me, bet it was because of that (mis)understanding that
device routines are device related runtime API calls.
I'd suggest to only mark in the patch what is clear (which is device
constructs) and defer the rest until it is clarified.

Done so.

Shouldn't the vars in that section be const, so that it is a read-only
section?

Is unsigned_type_node what we want (say wouldn't be just unsigned_char_node
be enough, currently we just need 3 bits).

Probably -that would be 8 bits, leaving 5 spare. I have not checked what
Andrew et al. do with the pinned-memory support by -f, but
that will likely use only 1 to 3 bits, if any.

If it is SHF_MERGE, even 16-bit or 32-bit wouldn't be the end of the world,
or if it is in LTO streamed out stuff, we can use a bitpack for it...


As the final binary will only contain a single variable, the size should
not matter much. I currently use 'unsigned' but it could surely be
shorter.  For the .o files, it also outputs the unsigned value for each
TU, but that's also small. I was thinking about adding more data (like
location data, be it location_t or __FILENAME__). However, it uses a
stripped-down stream writer - and to do so, location/string writing
requires a different object (and reading it, data_in). I did not regard
this as worthwhile and, thus, I only output the used requires clauses
and not where they were used.


I think best would be a fatal error if people try to configure
offloading targets for a compiler that doesn't support named sections,
or perhaps that and presence of anything that should be offloaded.


I do not use any named section – but I could if it makes sense. In any
case, the question is whether the current weak symbol makes sense or
not. And whether there are problems in using weak symbols (in libgomp's
target.c + for non-ACCEL_COMPILER, but only when the symbol needs to be
written). I am also not sure about the best naming. – Thoughts?

Otherwise, tested with no offloading configured + with offloading to
nvptx (fully tested) and gcn (partially) [all x86_64-gnu-linux)

Tobias

PS: At some point, we need to think about handling calling from a
program's target region a declare-target device function which is inside
a shared library. I am sure, we currently do not handle it. – For that,
we then also have to think about how to handle the requires clauses.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP: Move omp requires checks to libgomp

Handle reverse_offload, unified_address, and unified_shared_memory
requirements in libgomp by putting them into the '__offload_requires_mask'
weak variable. Additionally, store the value alongside the offload table
in lto - to permit checking the value for consistency in lto1. The value
is only stored when actually required due to 'omp (declare) target ...'.

In lto1 (either the host one, with -flto [+ ENABLE_OFFLOADING], or in the
offload-device lto1), the consistency check is done, erroring out when
an inconistency is found.

For all in-principle supported devices, if a requirement cannot be fulfilled,
the device is excluded from the (supported) devices list. Currently, none of
those requirements are marked as supported for any of the non-host devices.

gcc/c/ChangeLog:

* c-parser.cc (c_parser_declaration_or_fndef): Set
OMP_REQUIRES_TARGET_USED in omp_requires_mask if function has
"omp declare target" attribute.
(c_parser_omp_target_data): Set OMP_REQUIRES_TARGET_USED in
omp_requires_mask.
(c_parser_omp_target_enter_data): Likewise.
(c_p

Re: [PATCH 1/2]AArch64 Add fallback case using sdot for usdot

2022-06-29 Thread Richard Sandiford via Gcc-patches

Richard Biener  writes:
> On Tue, Jun 28, 2022 at 5:54 PM Tamar Christina  
> wrote:
>>
>> > -Original Message-
>> > From: Richard Biener 
>> > Sent: Monday, June 27, 2022 7:10 AM
>> > To: Tamar Christina 
>> > Cc: Richard Sandiford ; Richard Earnshaw
>> > ; nd ; gcc-
>> > patc...@gcc.gnu.org; Marcus Shawcroft 
>> > Subject: Re: [PATCH 1/2]AArch64 Add fallback case using sdot for usdot
>> >
>> > On Mon, Jun 27, 2022 at 7:25 AM Tamar Christina via Gcc-patches > > patc...@gcc.gnu.org> wrote:
>> > >
>> > > > -Original Message-
>> > > > From: Richard Sandiford 
>> > > > Sent: Thursday, June 16, 2022 7:54 PM
>> > > > To: Tamar Christina 
>> > > > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
>> > > > ; Marcus Shawcroft
>> > > > ; Kyrylo Tkachov
>> > 
>> > > > Subject: Re: [PATCH 1/2]AArch64 Add fallback case using sdot for
>> > > > usdot
>> > > >
>> > > > Richard Sandiford via Gcc-patches  writes:
>> > > > > Tamar Christina  writes:
>> > > > >> Hi All,
>> > > > >>
>> > > > >> The usdot operation is common in video encoder and decoders
>> > > > >> including some of the most widely used ones.
>> > > > >>
>> > > > >> This patch adds a +dotprod version of the optab as a fallback for
>> > > > >> when you do have sdot but not usdot available.
>> > > > >>
>> > > > >> The fallback works by adding a bias to the unsigned argument to
>> > > > >> convert it to a signed value and then correcting for the bias later 
>> > > > >> on.
>> > > > >>
>> > > > >> Essentially it relies on (x - 128)y + 128y == xy where x is
>> > > > >> unsigned and y is signed (assuming both are 8-bit values).
>> > > > >> Because the range of a signed byte is only to 127 we split the bias
>> > correction into:
>> > > > >>
>> > > > >>(x - 128)y + 127y + y
>> > > > >
>> > > > > I bet you knew this question was coming, but: this technique isn't
>> > > > > target-specific, so wouldn't it be better to handle it in
>> > > > > tree-vect-patterns.cc instead?
>> > >
>> > > Ok, so after many hours of trying I don't know how to make this work.
>> > > DOT_PROD_EXPR is a reduction, but emitting them as additional pattern
>> > > statement doesn't work because they'll be marked as internal_def
>> > > rather than reduction_def.  I tried marking the new vec_stmt_info that
>> > > I create explicitly as reduction_def but this gets overwritten during 
>> > > analysis.
>> > >
>> > > I then looked into getting it as a vectorizable_operation but has this
>> > > obvious problems In that it no longer treats it as a reduction and so 
>> > > tries to
>> > decompose into hi/lo.
>> > >
>> > > I then looked into treating additional patterns from  a reduction as
>> > > reductions themselves but this is obviously wrong as non-reduction
>> > statements also get marked as reductions.
>> > >
>> > > The conclusion is that I don't think the vectorizer allows additional
>> > > reductions to be emitted from patterns.
>> >
>> > Indeed.  DOT_PROD is a weird beast and it doesn't define which lanes are
>> > reduced to which so it's only usable when the result is reduced to a single
>> > lane.
>> >
>> > An SLP pattern might work if you use reduc-plus for the reduced lanes and
>> > keep the multiply separate?
>>
>> Unfortunately I can't seem to get it to handle the reduction in SLP.  It 
>> seems to always
>> use the non-SLP aware loop vectorizer here.  The suggested unroll factor is 
>> always 1 and
>> even trying to force it gets it to bail out later, presumable because it's 
>> reducing into a
>> scalar that's used outside the loop?
>
> Yes, it possibly needs 1-lane SLP support.

As I mentioned to Tamar off-list, I feel like I've been wasting
people's time recently by spewing out ideas that might or might not work
(usually "not work"), so I wanted to get some confidence that the next
suggestion made sense.  In the end I needed most of an implementation
to do that, so it seemed easiest just to finish it off rather than post
it in a half-complete state.  Sorry for the duplication. :-(

The patch certainly isn't pretty, but I think it's the best we can
do under the current infrastructure, and it should at least make
the costs reasonably accurate.  (Actually, that said, we probably
need to patch the reduction latency calculation in the aarch64
vector code -- didn't think of that until now.)

Tested on aarch64-linux-gnu and x64_64-linux-gnu.  WDYT?

Thanks,
Richard



Following a suggestion from Tamar, this patch adds a fallback
implementation of usdot using sdot.  Specifically, for 8-bit
input types:

   acc_2 = DOT_PROD_EXPR ;

becomes:

   tmp_1 = DOT_PROD_EXPR <64, b_signed, acc_1>;
   tmp_2 = DOT_PROD_EXPR <64, b_signed, tmp_1>;
   acc_2 = DOT_PROD_EXPR ;

on the basis that (x-128)*y + 64*y + 64*y.  Doing the two 64*y
operations first should give more time for x to be calculated,
on the off chance that that's useful.

gcc/
* tree-vect-patterns.cc (vect_recog_dot_prod_pattern): If usdot
isn't available, try sdot instead.
* tree-v

Re: [PATCH] libgfortran: Fix up LIBGFOR_CHECK_FLOAT128 [PR106137]

2022-06-29 Thread Tobias Burnus


On 29.06.22 14:13, Jakub Jelinek via Fortran wrote:

My recent gfortran + libgfortran patch apparently broke (some?) aarch64
builds.  While it is desirable to use just _Float128 rather than __float128,
we only want to use it (and e.g. define HAVE_FLOAT128) on targets where
_Float128 is supported and long double isn't IEEE quad precision.
Which is targets that support __float128 type which we have been testing
for before - _Float128 is supported on those targets and on targets where
long double is IEEE quad precision.

So, the following patch restores check for whether __float128 is supported
into the LIBGFOR_CHECK_FLOAT128 check which determines whether
HAVE_FLOAT128 is defined or whether to use libquadmath, so that e.g. on
aarch64 where long double is IEEE quad we don't do that.

Tested by Tamar on aarch64 and by me on x86_64-linux, ok for trunk?
2022-06-29  Jakub Jelinek  

  PR bootstrap/106137
  * acinclude.m4 (LIBGFOR_CHECK_FLOAT128): Also test for __float128.
  * configure: Regenerated.

--- libgfortran/acinclude.m4.jj   2022-06-28 13:14:45.327799267 +0200
+++ libgfortran/acinclude.m4  2022-06-29 11:45:19.286551469 +0200
@@ -276,7 +276,6 @@ AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
 GCC_TRY_COMPILE_OR_LINK([
  _Float128 foo (_Float128 x)


...


+__float128 baz (__float128 x)


As now both __float128 and _Float128 is tested, can you also update:

 dnl Check whether we have a __float128 type
 AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [

...
   AC_CACHE_CHECK([whether we have a usable _Float128 type],
  libgfor_cv_have_float128, [

I note that your follow-up patch adds _Float128 to the dnl comment,
but I think __float128 should also be added to the cache output to
make clear that both _Float128/__float128 are checked.

Otherwise: LGTM.

Thanks,

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Re: [PATCH] libgfortran: Switch some more __float128 uses to _Float128

2022-06-29 Thread Tobias Burnus


On 29.06.22 14:15, Jakub Jelinek via Fortran wrote:

My patch apparently left some __float128 uses in libgfortran
that could use _Float128 instead, the following patch changes that.

Ok for trunk?


LGTM. Thanks!

Tobias


2022-06-29  Jakub Jelinek  

  * mk-kinds-h.sh: Change __float128 to _Float128 in a comment.
  * acinclude.m4 (LIBGFOR_CHECK_FLOAT128): Adjust comment.
  (LIBGFOR_CHECK_MATH_IEEE128): Use _Float128 instead of __float128.
  * libgfortran.h (isnan): Change __float128 to _Float128 in a comment.
  (__acoshieee128, __acosieee128, __asinhieee128, __asinieee128,
  __atan2ieee128, __atanhieee128, __atanieee128, __copysignieee128,
  __coshieee128, __cosieee128, __erfcieee128, __erfieee128,
  __expieee128, __fabsieee128, __fmaieee128, __fmodieee128, __jnieee128,
  __log10ieee128, __logieee128, __powieee128, __sinhieee128,
  __sinieee128, __sqrtieee128, __tanhieee128, __tanieee128,
  __ynieee128, __strtoieee128): Use _Float128 instead of __float128.
  * configure: Regenerated.

--- libgfortran/mk-kinds-h.sh.jj  2022-06-28 13:14:45.334799175 +0200
+++ libgfortran/mk-kinds-h.sh 2022-06-29 14:01:32.935361103 +0200
@@ -67,7 +67,7 @@ for k in $possible_real_kinds; do
8) ctype="double" ; cplxtype="complex double" ; suffix="" ;;
# If we have a REAL(KIND=10), it is always long double
10) ctype="long double" ; cplxtype="complex long double" ; suffix="l" ;;
-  # If we have a REAL(KIND=16), it is either long double or __float128
+  # If we have a REAL(KIND=16), it is either long double or _Float128
16) if [ $long_double_kind -ne 16 ]; then
  ctype="_Float128"
  cplxtype="_Complex _Float128"
--- libgfortran/acinclude.m4.jj   2022-06-29 11:45:19.286551469 +0200
+++ libgfortran/acinclude.m4  2022-06-29 14:00:22.964279364 +0200
@@ -261,7 +261,7 @@ __mingw_snprintf (NULL, 0, "%d\n", 1);
fi
  ])

-dnl Check whether we have a __float128 type
+dnl Check whether we have a __float128 and _Float128 type
  AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [
LIBQUADSPEC=
LIBQUADLIB=
@@ -537,8 +537,8 @@ AC_DEFUN([LIBGFOR_CHECK_MATH_IEEE128],
AC_REQUIRE([GCC_CHECK_MATH_HEADERS])
AC_CACHE_CHECK([for $1], [gcc_cv_math_func_$1],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([
-__float128 $1 (__float128);
-__float128 (*ptr)(__float128) = $1;
+_Float128 $1 (_Float128);
+_Float128 (*ptr)(_Float128) = $1;

  int
  main ()
--- libgfortran/libgfortran.h.jj  2022-06-28 13:14:45.333799188 +0200
+++ libgfortran/libgfortran.h 2022-06-29 14:01:14.364604813 +0200
@@ -247,7 +247,7 @@ extern int __mingw_snprintf (char *, siz

 Another advantage for GCC's builtins for these type-generic macros
 is that it handles floating-point types that the system headers
-   may not support (like __float128).  */
+   may not support (like _Float128).  */

  #undef isnan
  #define isnan(x) __builtin_isnan(x)
@@ -1960,59 +1960,59 @@ internal_proto(cshift1_16_c17);

  /* Prototypes for the POWER __ieee128 functions.  */
  #ifdef POWER_IEEE128
-extern __float128 __acoshieee128 (__float128)
+extern _Float128 __acoshieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __acosieee128 (__float128)
+extern _Float128 __acosieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __asinhieee128 (__float128)
+extern _Float128 __asinhieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __asinieee128 (__float128)
+extern _Float128 __asinieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __atan2ieee128 (__float128)
+extern _Float128 __atan2ieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __atanhieee128 (__float128)
+extern _Float128 __atanhieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __atanieee128 (__float128)
+extern _Float128 __atanieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __copysignieee128 (__float128, __float128)
+extern _Float128 __copysignieee128 (_Float128, _Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __coshieee128 (__float128)
+extern _Float128 __coshieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __cosieee128 (__float128)
+extern _Float128 __cosieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __erfcieee128 (__float128)
+extern _Float128 __erfcieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __erfieee128 (__float128)
+extern _Float128 __erfieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __expieee128 (__float128)
+extern _Float128 __expieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-extern __float128 __fabsieee128 (__float128)
+extern _Float128 __fabsieee128 (_Float128)
__attribute__ ((__nothrow__, __leaf__));
-

Re: [PATCH] Fortran: improve error recovery for EXTENDS_TYPE_OF() [PR106121]

2022-06-29 Thread Tobias Burnus


On 28.06.22 22:36, Harald Anlauf via Gcc-patches wrote:


the simplification of EXTENDS_TYPE_OF() did not handle the
situation that one of its arguments were a CLASS variable
that was improperly declared.  NULL pointer dereference.

The fix is obvious.  Steve found a similar solution, which
is why I added him as co-author.

Regtested on x86_64-pc-linux-gnu.


LGTM – and I do note that Steven also okayed it in the PR.

Thanks,

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

[committed] nios2: Fix PIC function call slowness

2022-06-29 Thread Joseph Myers

On Nios II, PIC function calls use R_NIOS2_CALL* relocations, which
may refer to a GOT entry that initially points to a PLT entry to
resolve the function on first call and that is then changed by the
dynamic linker to point directly to the function to be called so
subsequent calls do not go through the dynamic linker.  To quote the
ABI, "A global offset table (GOT) entry referenced using
R_NIOS2_GOT16, R_NIOS2_GOT_LO as well as R_NIOS2_GOT_HA must be
resolved at load time.  A GOT entry referenced only using
R_NIOS2_CALL16, R_NIOS2_CALL_LO as well as R_NIOS2_CALL_HA can
initially refer to a procedure linkage table (PLT) entry and then be
resolved lazily.".

However, GCC wrongly treats function addresses loaded from the GOT
with such relocations as constant.  If the address load is pulled out
of a loop, then every call in the loop looks up the function by name.
This shows up as very slow execution of many glibc testcases in glibc
2.35 and later (tests that call functions from shared libc many times
in a loop), where tests are now built as PIE by default.  Fix this
problem by using gen_rtx_MEM instead of gen_const_mem when loading
addresses for PIC function calls.

Tested with no regressions for cross to nios2-linux-gnu, where many
glibc tests pass that previously timed out.

* config/nios2/nios2.cc (nios2_load_pic_address): Use gen_rtx_MEM
not gen_const_mem for UNSPEC_PIC_CALL_SYM.

---

Approved off-list by Sandra and committed.

diff --git a/gcc/config/nios2/nios2.cc b/gcc/config/nios2/nios2.cc
index f193cde5a34..1a33c88f19f 100644
--- a/gcc/config/nios2/nios2.cc
+++ b/gcc/config/nios2/nios2.cc
@@ -2552,7 +2552,10 @@ nios2_load_pic_address (rtx sym, int unspec, rtx tmp)
   return nios2_large_got_address (offset, tmp);
 }
 
-  return gen_const_mem (Pmode, nios2_got_address (sym, unspec));
+  if (unspec == UNSPEC_PIC_CALL_SYM)
+return gen_rtx_MEM (Pmode, nios2_got_address (sym, unspec));
+  else
+return gen_const_mem (Pmode, nios2_got_address (sym, unspec));
 }
 
 /* Nonzero if the constant value X is a legitimate general operand

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [committed] d: Add SIMD intrinsics module and compiler built-ins.

2022-06-29 Thread Iain Buclaw via Gcc-patches

Excerpts from Andreas Schwab's message of Juni 29, 2022 12:09 pm:
> make[3]: Entering directory '/opt/gcc/gcc-20220629/Build/gcc'
> /opt/gcc/gcc-20220629/Build/./prev-gcc/xg++ 
> -B/opt/gcc/gcc-20220629/Build/./prev-gcc/ -B/usr/aarch64-suse-linux/bin/ 
> -nostdinc++ 
> -B/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/src/.libs 
> -B/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/libsupc++/.libs
>   
> -I/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/include/aarch64-suse-linux
>   -I/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/include  
> -I/opt/gcc/gcc-20220629/libstdc++-v3/libsupc++ 
> -L/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/src/.libs 
> -L/opt/gcc/gcc-20220629/Build/prev-aarch64-suse-linux/libstdc++-v3/libsupc++/.libs
>   -fno-PIE -c  -DIN_GCC_FRONTEND -g -O2 -fno-checking -gtoggle -DIN_GCC 
> -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall 
> -Wno-narrowing -Wwrite-strings -Wcast-qual -Wmissing-format-attribute 
> -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros 
> -Wno-overlength-strings -Werror -fno-common  -DHAVE_CONFIG_H -I. -Id 
> -I../../gcc -I../../gcc/d -I../../gcc/../include 
> -I../../gcc/../libcpp/include -I../../gcc/../libcody  
> -I../../gcc/../libdecnumber -I../../gcc/../libdecnumber/bid -I../libdecnumber 
> -I../../gcc/../libbacktrace   -o d/intrinsics.o -MT d/intrinsics.o -MMD -MP 
> -MF d/.deps/intrinsics.TPo ../../gcc/d/intrinsics.cc
> ../../gcc/d/intrinsics.cc: In function 'tree_node* 
> build_shuffle_mask_type(tree)':
> ../../gcc/d/intrinsics.cc:279:42: error: 'nunits' may be used uninitialized 
> [-Werror=maybe-uninitialized]
>   279 |   return build_ctype (TypeVector::create (t->sarrayOf (nunits)));
>   |   ~~~^~
> ../../gcc/d/intrinsics.cc:276:26: note: 'nunits' was declared here
>   276 |   unsigned HOST_WIDE_INT nunits;
>   |  ^~
> In file included from ../../gcc/d/intrinsics.cc:19:
> ../../gcc/system.h: In function 'tree_node* 
> expand_intrinsic_vec_shufflevector(tree)':
> ../../gcc/system.h:396:29: error: 'v1elems' may be used uninitialized 
> [-Werror=maybe-uninitialized]
>   396 | #define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
>   | ^
> ../../gcc/d/intrinsics.cc:1193:35: note: 'v1elems' was declared here
>  1193 |   unsigned HOST_WIDE_INT v0elems, v1elems;
>   |   ^~~
> ../../gcc/d/intrinsics.cc:1193:26: error: 'v0elems' may be used uninitialized 
> [-Werror=maybe-uninitialized]
>  1193 |   unsigned HOST_WIDE_INT v0elems, v1elems;
>   |  ^~~
> cc1plus: all warnings being treated as errors
> make[3]: *** [Makefile:1146: d/intrinsics.o] Error 1
> make[3]: Leaving directory '/opt/gcc/gcc-20220629/Build/gcc'
> 

Hi Andreas,

That's unfortunate, though curious why I am not seeing this on x86.
Does the error goes away if you wrap that around gcc_assert?

As I am already checking this a lot earlier before expansion, perhaps
it would be better to use to_constant().

Regards,
Iain.

[PATCH] c++: warn about using keywords as identifiers [PR106111]

2022-06-29 Thread Marek Polacek via Gcc-patches

In C++03, -Wc++11-compat should warn about

  int constexpr;

since 'constexpr' is a keyword in C++11.  Jonathan reports that
we don't emit a similar warning for 'alignas' or 'alignof', and,
as I found out, 'thread_local'.

Similarly, we don't warn for most C++20 keywords.  That happens
because RID_LAST_CXX20 hasn't been updated in a while.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/106111

gcc/c-family/ChangeLog:

* c-common.h (enum rid): Update RID_LAST_CXX20.

gcc/cp/ChangeLog:

* parser.cc (cp_lexer_get_preprocessor_token): Also warn about
RID_ALIGNOF, RID_ALIGNAS, RID_THREAD.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/keywords1.C: New test.
* g++.dg/cpp2a/keywords1.C: New test.
---
 gcc/c-family/c-common.h|  2 +-
 gcc/cp/parser.cc   | 10 +++---
 gcc/testsuite/g++.dg/cpp0x/keywords1.C | 15 +++
 gcc/testsuite/g++.dg/cpp2a/keywords1.C | 12 
 4 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/keywords1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/keywords1.C

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 47442c95a53..a1e6a55158d 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -271,7 +271,7 @@ enum rid
   RID_FIRST_CXX11 = RID_CONSTEXPR,
   RID_LAST_CXX11 = RID_STATIC_ASSERT,
   RID_FIRST_CXX20 = RID_CONSTINIT,
-  RID_LAST_CXX20 = RID_CONSTINIT,
+  RID_LAST_CXX20 = RID_CO_RETURN,
   RID_FIRST_AT = RID_AT_ENCODE,
   RID_LAST_AT = RID_AT_IMPLEMENTATION,
   RID_FIRST_PQ = RID_IN,
diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index da2f370cdca..cc6525e0509 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -890,10 +890,14 @@ cp_lexer_get_preprocessor_token (unsigned flags, cp_token 
*token)
   else
{
   if (warn_cxx11_compat
-  && C_RID_CODE (token->u.value) >= RID_FIRST_CXX11
-  && C_RID_CODE (token->u.value) <= RID_LAST_CXX11)
+ && ((C_RID_CODE (token->u.value) >= RID_FIRST_CXX11
+  && C_RID_CODE (token->u.value) <= RID_LAST_CXX11)
+ /* These are outside the CXX11 range.  */
+ || C_RID_CODE (token->u.value) == RID_ALIGNOF
+ || C_RID_CODE (token->u.value) == RID_ALIGNAS
+ || C_RID_CODE (token->u.value)== RID_THREAD))
 {
-  /* Warn about the C++0x keyword (but still treat it as
+ /* Warn about the C++11 keyword (but still treat it as
  an identifier).  */
  warning_at (token->location, OPT_Wc__11_compat,
  "identifier %qE is a keyword in C++11",
diff --git a/gcc/testsuite/g++.dg/cpp0x/keywords1.C 
b/gcc/testsuite/g++.dg/cpp0x/keywords1.C
new file mode 100644
index 000..2b2ab6404ea
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/keywords1.C
@@ -0,0 +1,15 @@
+// PR c++/106111
+// { dg-do compile { target c++98_only } }
+// { dg-options "-Wc++11-compat" }
+
+int alignof; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int alignas; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int constexpr; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int decltype; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int noexcept; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int nullptr; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int static_assert; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int thread_local; // { dg-warning "is a keyword in C\\\+\\\+11" }
+int _Alignas;
+int _Alignof;
+int _Thread_local;
diff --git a/gcc/testsuite/g++.dg/cpp2a/keywords1.C 
b/gcc/testsuite/g++.dg/cpp2a/keywords1.C
new file mode 100644
index 000..7f4dba2d3b7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/keywords1.C
@@ -0,0 +1,12 @@
+// PR c++/106111
+// { dg-do compile { target c++17_down } }
+// { dg-options "-Wc++20-compat -Wc++11-compat -Wc++14-compat -Wc++17-compat" }
+
+int constinit; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int consteval; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int requires; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int concept; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int co_await; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int co_yield; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int co_return; // { dg-warning "is a keyword in C\\\+\\\+20" }
+int char8_t; // { dg-warning "is a keyword in C\\\+\\\+20" }

base-commit: b01c075e7e6d84da846c2ff9087433a30ebeb0d2
-- 
2.36.1

[committed] d: Fix build on aarch64-suse-linux

2022-06-29 Thread Iain Buclaw via Gcc-patches

Hi,

The variables being used to get the result out of TYPE_VECTOR_SUBPARTS
were being flagged by -Werror=maybe-uninitialized.  As they have already
been checked for being constant earlier, use `to_constant' instead.

This patch is based on feedback from Andreas.  Given the error they got,
this seems obvious.  Have only regstrapped on x86_64-linux-gnu though.

Regards,
Iain.

---
gcc/d/ChangeLog:

* intrinsics.cc (build_shuffle_mask_type): Use to_constant when
getting the number of subparts from a vector type.
(expand_intrinsic_vec_shufflevector): Likewise.
---
 gcc/d/intrinsics.cc | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc
index 454d940d1b5..75d43186909 100644
--- a/gcc/d/intrinsics.cc
+++ b/gcc/d/intrinsics.cc
@@ -273,8 +273,7 @@ build_shuffle_mask_type (tree type)
  printed (this should really be handled by a D tree printer).  */
   Type *t = build_frontend_type (inner);
   gcc_assert (t != NULL);
-  unsigned HOST_WIDE_INT nunits;
-  TYPE_VECTOR_SUBPARTS (type).is_constant (&nunits);
+  unsigned HOST_WIDE_INT nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
 
   return build_ctype (TypeVector::create (t->sarrayOf (nunits)));
 }
@@ -1190,9 +1189,10 @@ expand_intrinsic_vec_shufflevector (tree callexp)
   tree vec0 = CALL_EXPR_ARG (callexp, 0);
   tree vec1 = CALL_EXPR_ARG (callexp, 1);
 
-  unsigned HOST_WIDE_INT v0elems, v1elems;
-  TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec0)).is_constant (&v0elems);
-  TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec1)).is_constant (&v1elems);
+  unsigned HOST_WIDE_INT v0elems =
+TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec0)).to_constant ();
+  unsigned HOST_WIDE_INT v1elems =
+TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec1)).to_constant ();
 
   unsigned HOST_WIDE_INT num_indices = call_expr_nargs (callexp) - 2;
   unsigned HOST_WIDE_INT masklen = MAX (num_indices, MAX (v0elems, v1elems));
-- 
2.34.1

c++: Rename macro location structs

2022-06-29 Thread Nathan Sidwell via Gcc-patches


The macro location tables should really mention they are about
locations.  So rename them.  Also, add a missing free of the remapping
table, and remove some now-unneeded macro checking.


nathan

--
Nathan SidwellFrom b0f25e1fdc6199725e69023a3dc49021f311ba66 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Fri, 24 Jun 2022 05:17:24 -0700
Subject: [PATCH] c++: Rename macro location structs

The macro location tables should really mention they are about
locations.  So rename them.  Also, add a missing free of the remapping
table, and remove some now-unneeded macro checking.

	gcc/cp/
	* module.cc (macro_info, macro_traits, macro_table,
	macro_remap): Rename to ...
	(macro_loc_info, macro_loc_traits, macro_loc_table,
	macro_loc_remap): ... these.  Update all uses.
	(module_state::write_prepare_maps): Remove unneeded macro checking.
	(module_state::write_begin): Free macro_loc_remap.
---
 gcc/cp/module.cc | 73 +---
 1 file changed, 25 insertions(+), 48 deletions(-)

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 8bb22c2b305..68a7ce53ee4 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -3241,15 +3241,15 @@ public:
 static loc_spans spans;
 
 /* Information about macro locations we stream out.  */
-struct macro_info
+struct macro_loc_info
 {
   const line_map_macro *src;// original expansion
   unsigned remap;	  // serialization
 
   static int compare (const void *a_, const void *b_)
   {
-auto *a = static_cast (a_);
-auto *b = static_cast (b_);
+auto *a = static_cast (a_);
+auto *b = static_cast (b_);
 
 gcc_checking_assert (MAP_START_LOCATION (a->src)
 			 != MAP_START_LOCATION (b->src));
@@ -3259,9 +3259,9 @@ struct macro_info
   return +1;
   }
 };
-struct macro_traits
+struct macro_loc_traits
 {
-  typedef macro_info value_type;
+  typedef macro_loc_info value_type;
   typedef const line_map_macro *compare_type;
 
   static const bool empty_zero_p = false;
@@ -3294,9 +3294,9 @@ struct macro_traits
   static void remove (value_type &) {}
 };
 /* Table keyed by line_map_macro, used for noting.  */
-static  hash_table *macro_table;
+static  hash_table *macro_loc_table;
 /* Sorted vector, used for writing.  */
-static vec *macro_remap;
+static vec *macro_loc_remap;
 
 /* Indirection to allow bsearching imports by ordinary location.  */
 static vec *ool;
@@ -15616,7 +15616,7 @@ module_state::imported_from () const
 void
 module_state::note_location (location_t loc)
 {
-  if (!macro_table)
+  if (!macro_loc_table)
 ;
   else if (loc < RESERVED_LOCATION_COUNT)
 ;
@@ -15635,9 +15635,9 @@ module_state::note_location (location_t loc)
 	{
 	  const line_map *map = linemap_lookup (line_table, loc);
 	  const line_map_macro *mac_map = linemap_check_macro (map);
-	  hashval_t hv = macro_traits::hash (mac_map);
-	  macro_info *slot
-	= macro_table->find_slot_with_hash (mac_map, hv, INSERT);
+	  hashval_t hv = macro_loc_traits::hash (mac_map);
+	  macro_loc_info *slot
+	= macro_loc_table->find_slot_with_hash (mac_map, hv, INSERT);
 	  if (!slot->src)
 	{
 	  slot->src = mac_map;
@@ -15698,11 +15698,11 @@ module_state::write_location (bytes_out &sec, location_t loc)
 }
   else if (loc >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
 {
-  const macro_info *info = nullptr;
+  const macro_loc_info *info = nullptr;
   unsigned offset = 0;
-  if (unsigned hwm = macro_remap->length ())
+  if (unsigned hwm = macro_loc_remap->length ())
 	{
-	  info = macro_remap->begin ();
+	  info = macro_loc_remap->begin ();
 	  while (hwm != 1)
 	{
 	  unsigned mid = hwm / 2;
@@ -15909,7 +15909,7 @@ module_state::read_location (bytes_in &sec) const
 void
 module_state::write_init_maps ()
 {
-  macro_table = new hash_table (EXPERIMENT (1, 400));
+  macro_loc_table = new hash_table (EXPERIMENT (1, 400));
 }
 
 location_map_info
@@ -15955,30 +15955,6 @@ module_state::write_prepare_maps (module_state_config *cfg)
 
 	  info.num_maps.first += omap - fmap;
 	}
-
-  if (span.macro.first != span.macro.second)
-	{
-	  /* Iterate over the span's macros, to elide the empty
-	 expansions.  */
-	  unsigned count = 0;
-	  for (unsigned macro
-		 = linemap_lookup_macro_index (line_table,
-	   span.macro.second - 1);
-	   macro < LINEMAPS_MACRO_USED (line_table);
-	   macro++)
-	{
-	  line_map_macro const *mmap
-		= LINEMAPS_MACRO_MAP_AT (line_table, macro);
-	  if (MAP_START_LOCATION (mmap) < span.macro.first)
-		/* Fallen out of the span.  */
-		break;
-
-	  if (mmap->n_tokens)
-		count++;
-	}
-	  dump (dumper::LOCATION) && dump ("Span:%u %u macro maps", ix, count);
-	  info.num_maps.second += count;
-	}
 }
 
   /* Adjust the maps.  Ordinary ones ascend, and we must maintain
@@ -16024,23 +16000,23 @@ module_state::write_prepare_maps (module_state_config *cfg)
   ord_off = span.ordinary.second + span.ordinary_delta;
 }
 
-  vec_alloc (macro_remap, macro_

Re: [PATCH RFA] ubsan: do return check with -fsanitize=unreachable

2022-06-29 Thread Jason Merrill via Gcc-patches


On 6/27/22 11:44, Jakub Jelinek wrote:

On Wed, Jun 22, 2022 at 12:04:59AM -0400, Jason Merrill wrote:

On 6/20/22 16:16, Jason Merrill wrote:

On 6/20/22 07:05, Jakub Jelinek wrote:

On Fri, Jun 17, 2022 at 05:20:02PM -0400, Jason Merrill wrote:

Related to PR104642, the current situation where we get less
return checking
with just -fsanitize=unreachable than no sanitize flags seems
undesirable; I
propose that we do return checking when -fsanitize=unreachable.


__builtin_unreachable itself (unless turned into trap or
__ubsan_handle_builtin_unreachable) is not any kind of return
checking, it
is just an optimization.


Yes, but I'm talking about "when -fsanitize=unreachable".


The usual case is that people just use -fsanitize=undefined
and get both return and unreachable sanitization, for fall through
into end of functions returning non-void done through return sanitization.

In the rare case people use something different like
-fsanitize=undefined -fno-sanitize=return
or
-fsanitize=unreachable
etc., they presumably don't want the fall through from end of function
diagnosed at runtime.


I disagree with this assumption for the second case; it seems much more 
likely to me that the user just wasn't thinking about needing to also 
mention return.  Missing return is a logical subset of unreachable; if 
we sanitize all the other __builtin_unreachable introduced by the 
compiler, why in the world would we want to leave out this one that is 
such a frequent error?


Full -fsanitize=undefined is much higher overhead than just 
-fsanitize=unreachable, which introduces no extra checks.  And adding 
return checking to unreachable is essentially zero overhead.  I can't 
imagine any reason to want to check unreachable points EXCEPT for 
missing return.



I think the behavior we want is:
1) -fsanitize=return is on -> use ubsan_instrument_return
(__ubsan_missing_return_data or __builtin_trap depending on
 -fsanitize-trap=return); otherwise
2) -funreachable-traps is on (from -O0/-Og by default or explicit),
emit __builtin_trap; otherwise
3) -fsanitize=unreachable is on, not emit anything (__builtin_unreachable
would be just an optimization, but user asked not to instrument returns,
only unreachable, so honor user's decision and avoid confusion); otherwise 
> 4) -O0 is on, not emit anything (__builtin_unreachable wouldn't be much
of an optimization, just surprising and hard to debug effect); otherwise
5) emit __builtin_unreachable

Current trunk with your PR104642 fix in implements 1), will do 2)
only if -fsanitize=unreachable is not on, will do 3), will do 4) and 5).

So, I'd change cp-gimplify.cc (cp_maybe_instrument_return), change:
   if (!sanitize_flags_p (SANITIZE_RETURN, fndecl)
   && ((!optimize && !flag_unreachable_traps)
  || sanitize_flags_p (SANITIZE_UNREACHABLE, fndecl)))
 return;
to
   if (!sanitize_flags_p (SANITIZE_RETURN, fndecl)
   && !flag_unreachable_traps
   && (!optimize || sanitize_flags_p (SANITIZE_UNREACHABLE, fndecl)))
 return;
and
   if (sanitize_flags_p (SANITIZE_RETURN, fndecl))
 t = ubsan_instrument_return (loc);
   else
 t = build_builtin_unreachable (BUILTINS_LOCATION);
to
   if (sanitize_flags_p (SANITIZE_RETURN, fndecl))
 t = ubsan_instrument_return (loc);
   else if (flag_unreachable_traps)
 t = build_call_expr_loc (BUILTINS_LOCATION,
 builtin_decl_explicit (BUILT_IN_TRAP), 0);
   else
 t = build_builtin_unreachable (BUILTINS_LOCATION);

Jakub

Re: Ping^2: [PATCH v2] diagnostics: Honor #pragma GCC diagnostic in the preprocessor [PR53431]

2022-06-29 Thread Jason Merrill via Gcc-patches


On 6/23/22 13:03, Lewis Hyatt via Gcc-patches wrote:

Hello-

https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595556.html
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53431#c49

Would a C++ maintainer have some time to take a look at this patch
please? I feel like the PR is still worth resolving. If this doesn't
seem like a good way, I am happy to try another -- would really
appreciate any feedback. Thanks!


Thanks for your persistence, I'll take a look now.

Incidentally, when pinging it's often useful to ping someone from 
MAINTAINERS directly, as well as the list.  I think your last ping got 
eaten by some trouble Red Hat email was having at the time.


The cp_token_is_module_directive cleanup is OK.


+  bool skip_this_pragma;


This member seems to be equivalent to
 in_pragma && !should_output_pragmas ()
Maybe it could be a member function instead of a data member?

More soon.

Jason

Re: [Patch][v4] OpenMP: Move omp requires checks to libgomp

2022-06-29 Thread Jakub Jelinek via Gcc-patches

On Wed, Jun 29, 2022 at 04:33:02PM +0200, Tobias Burnus wrote:
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -2488,6 +2488,12 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
> fndef_ok,
> break;
>   }
>  
> +  if (flag_openmp
> +   && lookup_attribute ("omp declare target",
> +DECL_ATTRIBUTES (current_function_decl)))
> + omp_requires_mask
> +   = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
> +
>if (DECL_DECLARED_INLINE_P (current_function_decl))
>  tv = TV_PARSE_INLINE;
>else

I thought the above would be left out, at least until clarified what exactly
we mean with device routines in the restrictions.

> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -15389,6 +15389,11 @@ cp_parser_simple_declaration (cp_parser* parser,
> /* Otherwise, we're done with the list of declarators.  */
> else
>   {
> +   if (flag_openmp && lookup_attribute ("omp declare target",
> +DECL_ATTRIBUTES (decl)))
> + omp_requires_mask
> +   = (enum omp_requires) (omp_requires_mask
> +  | OMP_REQUIRES_TARGET_USED);
> pop_deferring_access_checks ();
> cp_finalize_omp_declare_simd (parser, &odsd);
> return;

And the above too.

On the Fortran side I don't see it being done.
> --- a/gcc/lto-cgraph.cc
> +++ b/gcc/lto-cgraph.cc
> @@ -1068,12 +1069,28 @@ read_string (class lto_input_block *ib)
>  void
>  output_offload_tables (void)
>  {
> -  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
> +  bool output_requires = (flag_openmp
> +   && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 
> 0);
> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)
> +  && !output_requires)
>  return;
>  
>struct lto_simple_output_block *ob
>  = lto_create_simple_output_block (LTO_section_offload_table);
>  
> +  if (output_requires)
> +{
> +  HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask
> +& (OMP_REQUIRES_UNIFIED_ADDRESS
> +   | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
> +   | OMP_REQUIRES_REVERSE_OFFLOAD
> +   | OMP_REQUIRES_TARGET_USED));

Why is the OMP_REQUIRES_TARGET_USED bit saved there?  It is always set
if output_requires...
If we want to make sure it is set in omp_requires_mask after streaming
in, we can just or it in back there.

> @@ -1811,6 +1844,24 @@ input_offload_tables (bool do_force_output)
> if (do_force_output)
>   varpool_node::get (var_decl)->force_output = 1;
>   }
> +   else if (tag == LTO_symtab_edge)
> + {
> +   static bool error_emitted = false;
> +   HOST_WIDE_INT val = streamer_read_hwi (ib);
> +
> +   if (omp_requires_mask == 0)
> + omp_requires_mask = (omp_requires) val;

I mean here: (omp_requires) (val | OMP_REQUIRES_TARGET_USED);

> +   else if (omp_requires_mask != val && !error_emitted)
> + {
> +   char buf[64], buf2[64];
> +   omp_requires_to_name (buf, sizeof (buf), omp_requires_mask);
> +   omp_requires_to_name (buf2, sizeof (buf2), val);
> +   error ("OpenMP % directive with non-identical "
> +  "clauses in multiple compilation units: %qs vs. %qs",
> +  buf, buf2);

I think the user should be told also where, so file_data->file_name and
saved another filename from the if (omp_requires_mask == 0) body.
I admit I haven't investigated whether it would be enough to just record
the const char * file_data->file_name or whether what it points to might be
freed before we report it.

> +   error_emitted = true;

> --- a/gcc/omp-offload.cc
> +++ b/gcc/omp-offload.cc
> @@ -398,6 +399,26 @@ omp_finish_file (void)
>unsigned num_funcs = vec_safe_length (offload_funcs);
>unsigned num_vars = vec_safe_length (offload_vars);
>  
> +#ifndef ACCEL_COMPILER
> +  if (flag_openmp && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 0)
> +{
> +  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +  get_identifier ("__offload_requires_mask"),
> +  unsigned_type_node);
> +  TREE_PUBLIC (var) = 1;
> +  TREE_STATIC (var) = 1;
> +  TREE_READONLY (var) = 1;
> +  DECL_INITIAL (var)
> + = build_int_cst (unsigned_type_node,
> +  ((unsigned int) omp_requires_mask
> +   & (OMP_REQUIRES_UNIFIED_ADDRESS
> +  | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
> +  | OMP_REQUIRES_REVERSE_OFFLOAD)));
> +  declare_weak (var);
> +  varpool_node::finalize_decl (var);
> +}
> +#endif

I think this

Re: [PATCH RFA] ubsan: do return check with -fsanitize=unreachable

2022-06-29 Thread Jakub Jelinek via Gcc-patches

On Wed, Jun 29, 2022 at 12:42:04PM -0400, Jason Merrill wrote:
> > The usual case is that people just use -fsanitize=undefined
> > and get both return and unreachable sanitization, for fall through
> > into end of functions returning non-void done through return sanitization.
> > 
> > In the rare case people use something different like
> > -fsanitize=undefined -fno-sanitize=return
> > or
> > -fsanitize=unreachable
> > etc., they presumably don't want the fall through from end of function
> > diagnosed at runtime.
> 
> I disagree with this assumption for the second case; it seems much more
> likely to me that the user just wasn't thinking about needing to also
> mention return.  Missing return is a logical subset of unreachable; if we
> sanitize all the other __builtin_unreachable introduced by the compiler, why
> in the world would we want to leave out this one that is such a frequent
> error?

UBSan was initially implemented in LLVM and our -fsanitize= options try to
match where possible what they do.
And their behavior is too that return and unreachable are separate
sanitizers, fallthrough from function return is sanitized only for the
former, they apparently at -O0 implement something like -funreachable-traps
(but not at -Og) and emit a trap there (regardless of
-fsanitize=unreachable), for -O1 and higher they act as if non-sanitized
__builtin_unreachable () is in there regardless of -fsanitize=unreachable.

It would be strange to diverge from this without a strong reason.
The fact that we use __builtin_unreachable for the function ends is just our
implementation detail and when we'd report that to users, they'd just be
confused on what's going on.  With -fsanitize=return they are told what
happens.

> Full -fsanitize=undefined is much higher overhead than just
> -fsanitize=unreachable, which introduces no extra checks.  And adding return
> checking to unreachable is essentially zero overhead.  I can't imagine any
> reason to want to check unreachable points EXCEPT for missing return.

-fsanitize=unreachable isn't zero overhead, it will force evaluation of all
the conditionals guarding it and preparation of arguments for it etc.

Jakub

[PATCH] c++: generic targs and identity substitution [PR105956]

2022-06-29 Thread Patrick Palka via Gcc-patches

In r13-1045-gcb7fd1ea85feea I assumed that substitution into generic
DECL_TI_ARGS corresponds to an identity mapping of the given arguments,
and hence its safe to always elide such substitution.  But this PR
demonstrates that such a substitution isn't always the identity mapping,
in particular when there's an ARGUMENT_PACK_SELECT argument, which gets
handled specially during substitution:

  * when substituting an APS into a template parameter, we strip the
APS to its underlying argument;
  * and when substituting an APS into a pack expansion, we strip the
APS to its underlying argument pack.

In this testcase, when expanding the pack expansion pattern (idx + Ns)...
with Ns={0,1}, we specialize idx twice, first with Ns=APS<0,{0,1}> and
then Ns=APS<1,{0,1}>.  The DECL_TI_ARGS of idx are the generic template
arguments of the enclosing class template impl, so before r13-1045,
we'd substitute into its DECL_TI_ARGS which gave Ns={0,1} as desired.
But after r13-1045, we elide this substitution and end up attempting to
hash the original Ns argument, an APS, which ICEs.

So this patch partially reverts this part of r13-1045.  I considered
using preserve_args in this case instead, but that'd break the
static_assert in the testcase because preserve_args always strips APS to
its underlying argument, but here we want to strip it to its underlying
argument pack, so we'd incorrectly end up forming the specializations
impl<0>::idx and impl<1>::idx instead of impl<0,1>::idx.

Although we can't elide the substitution into DECL_TI_ARGS in light of
ARGUMENT_PACK_SELECT, it should still be safe to elide template argument
coercion in the case of a non-template decl, which this patch preserves.

It's unfortunate that we need to remove this optimization just because
it doesn't hold for one special tree code.  So this patch implements a
heuristic in tsubst_template_args to avoid allocating a new TREE_VEC if
the substituted elements are identical to those of a level from ARGS.
It turns out that about 30% of all calls to tsubst_template_args benefit
from this optimization, and it reduces memory usage by about 1.5% for
e.g. stdc++.h (relative to r13-1045).  (This is the maybe_reuse stuff,
the rest of the changes to tsubst_template_args are just drive-by
cleanups.)

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?  Patch generated with -w to ignore noisy whitespace changes.

PR c++/105956

gcc/cp/ChangeLog:

* pt.cc (tsubst_template_args): Move variable declarations
closer to their first use.  Replace 'orig_t' with 'r'.  Rename
'need_new' to 'const_subst_p'.  Heuristically detect if the
substituted elements are identical to that of a level from
'args' and avoid allocating a new TREE_VEC if so.
(tsubst_decl) : Revert
r13-1045-gcb7fd1ea85feea change for avoiding substitution into
DECL_TI_ARGS, but still avoid coercion in this case.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/variadic183.C: New test.
---
 gcc/cp/pt.cc | 113 ++-
 gcc/testsuite/g++.dg/cpp0x/variadic183.C |  14 +++
 2 files changed, 85 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/variadic183.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 8672da123f4..7898834faa6 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.  If not see
  Fixed by: C++20 modules.  */
 
 #include "config.h"
+#define INCLUDE_ALGORITHM // for std::equal
 #include "system.h"
 #include "coretypes.h"
 #include "cp-tree.h"
@@ -13544,17 +13545,22 @@ tsubst_argument_pack (tree orig_arg, tree args, 
tsubst_flags_t complain,
 tree
 tsubst_template_args (tree t, tree args, tsubst_flags_t complain, tree in_decl)
 {
-  tree orig_t = t;
-  int len, need_new = 0, i, expanded_len_adjust = 0, out;
-  tree *elts;
-
   if (t == error_mark_node)
 return error_mark_node;
 
-  len = TREE_VEC_LENGTH (t);
-  elts = XALLOCAVEC (tree, len);
+  const int len = TREE_VEC_LENGTH (t);
+  tree *elts = XALLOCAVEC (tree, len);
+  int expanded_len_adjust = 0;
 
-  for (i = 0; i < len; i++)
+  /* True iff no element of T was changed by the substitution.  */
+  bool const_subst_p = true;
+
+  /* If MAYBE_REUSE is non-NULL, as an optimization we'll try to reuse and
+ return this TREE_VEC instead of allocating a new one if possible.  This
+ will either be ARGS or a level from ARGS.  */
+  tree maybe_reuse = NULL_TREE;
+
+  for (int i = 0; i < len; i++)
 {
   tree orig_arg = TREE_VEC_ELT (t, i);
   tree new_arg;
@@ -13580,56 +13586,90 @@ tsubst_template_args (tree t, tree args, 
tsubst_flags_t complain, tree in_decl)
   else if (ARGUMENT_PACK_P (orig_arg))
new_arg = tsubst_argument_pack (orig_arg, args, complain, in_decl);
   else
+   {
  new_arg = tsubst_template_arg (orig_arg, args, complain, in_decl);
 
+ /* If T heuristically ap

Re: [Patch][v4] OpenMP: Move omp requires checks to libgomp

2022-06-29 Thread Tobias Burnus


Hi Jakub,

On 29.06.22 19:02, Jakub Jelinek wrote:

On Wed, Jun 29, 2022 at 04:33:02PM +0200, Tobias Burnus wrote:

+  if (flag_openmp
+  && lookup_attribute ("omp declare target",
+   DECL_ATTRIBUTES (current_function_decl)))
+omp_requires_mask
+  = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);



I thought the above would be left out, at least until clarified what exactly
we mean with device routines in the restrictions.

Missed that – I thought mostly of the API calls. However, I concur that
for 'declare target', it is not really needed as no data transfers or
reverse offloads can happen. - Additionally, I took this part from
Chung-Lin's patch and did not really think about this part ...


--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc

And the above too.

On the Fortran side I don't see it being done.

(Likewise.)

--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
...
+  if (output_requires)
+{
+  HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask
+   & (OMP_REQUIRES_UNIFIED_ADDRESS
+  | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
+  | OMP_REQUIRES_REVERSE_OFFLOAD
+  | OMP_REQUIRES_TARGET_USED));

Why is the OMP_REQUIRES_TARGET_USED bit saved there?  It is always set
if output_requires...
If we want to make sure it is set in omp_requires_mask after streaming
in, we can just or it in back there.


It is there because it is later needed: With -flto, we otherwise
wouldn't generate the variable in omp-offload.cc. And as this value is
only used internally, I thought it could just stay there. However, it
could also be excluded here and ...


@@ -1811,6 +1844,24 @@ input_offload_tables (bool do_force_output)
   if (do_force_output)
 varpool_node::get (var_decl)->force_output = 1;
 }
+  else if (tag == LTO_symtab_edge)
+{
+  static bool error_emitted = false;
+  HOST_WIDE_INT val = streamer_read_hwi (ib);
+
+  if (omp_requires_mask == 0)
+omp_requires_mask = (omp_requires) val;

... added here as:


I mean here: (omp_requires) (val | OMP_REQUIRES_TARGET_USED);

... but that also requires ...

+  else if (omp_requires_mask != val && !error_emitted)


... something like:

(omp_requires_mask & ~OMP_REQUIRES_TARGET_USED) != val

or something like that.


+{
+  char buf[64], buf2[64];
+  omp_requires_to_name (buf, sizeof (buf), omp_requires_mask);
+  omp_requires_to_name (buf2, sizeof (buf2), val);
+  error ("OpenMP % directive with non-identical "
+ "clauses in multiple compilation units: %qs vs. %qs",
+ buf, buf2);

I think the user should be told also where, so file_data->file_name


With -save-temps, the filename is indeed helpful, e.g.:
"a-requires-1.o". However, without, I get names like: "/tmp/ccIgRkOW.o".

As mentioned, I was thinking of storing after the value some location
data, e.g. DECL_SOURCE_FILE() or even DECL_SOURCE_LOCATION() – by
keeping track of the first 'omp requires' in a translation unit.

Those can be streamed with  streamer_write_string and
lto_output_location. However, the both require as argument an "struct
output_block" and in lto-cgraph.cc, I only have a "struct
lto_simple_output_block".

And for reading, I additionally need a "class data_in" argument.

Thus, I think it is doable – however, I was not quite sure whether it
made sense to do all the effort or not. (And it was not immediately
clear to me how to create a "data_in" class and ...)


Can't we arrange for GOMP_offload_register_ver to see the bitmasks somewhere
instead (and force GOMP_offload_register_ver even if there are no vars or
funcs and just the requires mask)?


This probably works – but means some restructuring.
GOMP_offload_register_ver assumes that num_devices is already available
– and we need to exclude those for which the 'omp requires' cannot be
fulfilled.

I think this could be either be done in GOMP_offload_register_ver by
decrementing num_offload_images + shifting the offload_images[i] entries
(or have some table to match user-visible numbers to the original number) .

Or it could just be done by setting a flag – and num_offload_images
updated later. We probably need something which is run later to exclude
those devices for which no image exists (existing but not supported
devices) – and for OpenMP 6's OMP_AVAILABLE_DEVICES env, which permits
to sort the devices and filter out some of them.


Could be the weak __offload_requires_mask (but we probably e.g. can't assume
declare_weak will work),

I assume that it does work in practice, given that it is only needed on
the host – and given which systems effectively support offloading. –
With -flto, we even would know that there is only one variable, but
unfortunately, we cannot count on a host lto1 run.

Or we could ask for the offloa

[PATCH] Fortran: error recovery on invalid CLASS(), PARAMETER declarations [PR105243]

2022-06-29 Thread Harald Anlauf via Gcc-patches

Dear all,

a CLASS entity cannot have the PARAMETER attribute.
This is detected in some situations, but in others
we ICE because we never reach the existing check.
Adding a similar check when handling the declaration
improves error recovery.

The initial patch is by Steve.  I adjusted and moved
it slightly so that it also handles CLASS(*)
(unlimited polymorphic) at the same time.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

This patch actually addresses multiple PRs, some of
which are marked as regressions.  As I consider the
patch safe, I would like to backport to open branches
as far as it seems appropriate.

Thanks,
Harald

From e0d5aeadd218f21e450db6601956691293210156 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Wed, 29 Jun 2022 21:36:17 +0200
Subject: [PATCH] Fortran: error recovery on invalid CLASS(), PARAMETER
 declarations [PR105243]

gcc/fortran/ChangeLog:

	PR fortran/103137
	PR fortran/103138
	PR fortran/103693
	PR fortran/105243
	* decl.cc (gfc_match_data_decl): Reject CLASS entity declaration
	when it is given the PARAMETER attribute.

gcc/testsuite/ChangeLog:

	PR fortran/103137
	PR fortran/103138
	PR fortran/103693
	PR fortran/105243
	* gfortran.dg/class_58.f90: Fix test.
	* gfortran.dg/class_73.f90: New test.
---
 gcc/fortran/decl.cc|  8 
 gcc/testsuite/gfortran.dg/class_58.f90 |  2 +-
 gcc/testsuite/gfortran.dg/class_73.f90 | 17 +
 3 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/class_73.f90

diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index 26ff54d4684..339f8b15035 100644
--- a/gcc/fortran/decl.cc
+++ b/gcc/fortran/decl.cc
@@ -6262,6 +6262,14 @@ gfc_match_data_decl (void)
   goto cleanup;
 }

+  /* F2018:C708.  */
+  if (current_ts.type == BT_CLASS && current_attr.flavor == FL_PARAMETER)
+{
+  gfc_error ("CLASS entity at %C cannot have the PARAMETER attribute");
+  m = MATCH_ERROR;
+  goto cleanup;
+}
+
   if (current_ts.type == BT_CLASS
 	&& current_ts.u.derived->attr.unlimited_polymorphic)
 goto ok;
diff --git a/gcc/testsuite/gfortran.dg/class_58.f90 b/gcc/testsuite/gfortran.dg/class_58.f90
index 20b601a2f51..fceb575432d 100644
--- a/gcc/testsuite/gfortran.dg/class_58.f90
+++ b/gcc/testsuite/gfortran.dg/class_58.f90
@@ -9,5 +9,5 @@ subroutine s
   end type
   class(t), parameter :: x = t()  ! { dg-error "cannot have the PARAMETER attribute" }
   class(t), parameter :: y = x! { dg-error "cannot have the PARAMETER attribute" }
-  class(t) :: z = x   ! { dg-error "must be dummy, allocatable or pointer" }
+  class(t) :: z = t() ! { dg-error "must be dummy, allocatable or pointer" }
 end
diff --git a/gcc/testsuite/gfortran.dg/class_73.f90 b/gcc/testsuite/gfortran.dg/class_73.f90
new file mode 100644
index 000..c11ee38c086
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/class_73.f90
@@ -0,0 +1,17 @@
+! { dg-do compile }
+! Error recovery on invalid CLASS(), PARAMETER declarations
+! PR fortran/103137
+! PR fortran/103138
+! PR fortran/103693
+! PR fortran/105243
+! Contributed by G.Steinmetz
+
+program p
+  type t
+ character(3) :: c = '(a)'
+  end type
+  class(t), parameter :: x = 1.  ! { dg-error "PARAMETER attribute" }
+  class(*), parameter :: y = t() ! { dg-error "PARAMETER attribute" }
+  class(*), parameter :: z = 1   ! { dg-error "PARAMETER attribute" }
+  print x%c  ! { dg-error "Syntax error" }
+end
--
2.35.3

[Patch] OpenMP: Prepare omp-* for ancestor:1 handling

2022-06-29 Thread Tobias Burnus


Currently, this is a rather useless patch - even though it helps to reduce
the number of local patches I have. Due to the printed sorry, adding a
testcase with -fdump-tree-* is also not possible, yet.

For reverse offload, the plan is to call
  GOMP_target_ext
inside the on the device, passing
  'device(omp_initial_device)'
alias device(GOMP_DEVICE_HOST_FALLBACK) to the
target device's libgomp.

The pointer to the generated target-region function is then
passed as argument. However, that only works if that function
is not nullified ...

The reason that nullifying was added is:
  https://gcc.gnu.org/PR100573
  https://gcc.gnu.org/r12-1066-g95d67762171f83277a5700b270c0d1e2756f83f4
  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571285.html


Note: Instead of just checking for GOMP_DEVICE_HOST_FALLBACK,
more effort could be done, e.g. by setting some attribute on the
generated function and then check for check for it. Example:
'omp target device_ancestor' + using lookup_attribute).

That's what's done in the second variant.

OK for mainline (which variant)? Or do you prefer to wait for
a more complete patch?

Tobias

PS: Reverse offload - still to do:
- 'requires' patch
- Generate two variants of the target-region function:
  an empty version on the device (just to have a pointer address
  in the offload_func table) and the full version (on the host only)
Those together are sufficient for a omp_get_num_device() == 0 version
(implied by 'required reverse_offload' not being fulfilled by any device).
For a more useful implementation, more work inside libgomp is required.
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP: Prepare omp-* for ancestor:1 handling

gcc/ChangeLog:

	* omp-expand.cc (expand_omp_target): Set device to
	GOMP_DEVICE_HOST_FALLBACK for ancestor.
	* omp-offload.cc (pass_omp_target_link::execute): Don't
	nullify function pointer for ancestor:1.

 gcc/omp-expand.cc  | 6 +-
 gcc/omp-offload.cc | 4 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index 1023c56fc3d..dc0a963e9e3 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -10005,7 +10005,11 @@ expand_omp_target (struct omp_region *region)
 	need_device_adjustment = true;
 	  device_loc = OMP_CLAUSE_LOCATION (c);
 	  if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
-	sorry_at (device_loc, "% not yet supported");
+	{
+	  device = build_int_cst (integer_type_node,
+  GOMP_DEVICE_HOST_FALLBACK);
+	  sorry_at (device_loc, "% not yet supported");
+	}
 	}
   else
 	{
diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc
index 3a89119371c..d72c1ac23f3 100644
--- a/gcc/omp-offload.cc
+++ b/gcc/omp-offload.cc
@@ -2803,6 +2803,10 @@ pass_omp_target_link::execute (function *fun)
 	{
 	  if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
 	{
+	  tree dev = gimple_call_arg (gsi_stmt (gsi), 0);
+	  if (TREE_CODE (dev) == INTEGER_CST
+		  && wi::to_wide (dev) == GOMP_DEVICE_HOST_FALLBACK)
+		continue;  /* ancestor:1  */
 	  /* Nullify the second argument of __builtin_GOMP_target_ext.  */
 	  gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
 	  update_stmt (gsi_stmt (gsi));
OpenMP: Prepare omp-* for ancestor:1 handling

gcc/ChangeLog:

	* omp-expand.cc (expand_omp_target): Set device to
	GOMP_DEVICE_HOST_FALLBACK for ancestor.
	* omp-low.cc (scan_omp_target): Add 'omp target device_ancestor'
	attribute to generated target-region function for ancestor:1.
	* omp-offload.cc (pass_omp_target_link::execute): Don't nullify
	function pointer for ancestor:1.

 gcc/omp-expand.cc  | 6 +-
 gcc/omp-low.cc | 6 ++
 gcc/omp-offload.cc | 9 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index 1023c56fc3d..dc0a963e9e3 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -10005,7 +10005,11 @@ expand_omp_target (struct omp_region *region)
 	need_device_adjustment = true;
 	  device_loc = OMP_CLAUSE_LOCATION (c);
 	  if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
-	sorry_at (device_loc, "% not yet supported");
+	{
+	  device = build_int_cst (integer_type_node,
+  GOMP_DEVICE_HOST_FALLBACK);
+	  sorry_at (device_loc, "% not yet supported");
+	}
 	}
   else
 	{
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index b9d5529f212..140ef229cc0 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -3094,6 +3094,12 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
   if (offloaded)
 {
   create_omp_child_function (ctx, false);
+  tree c = omp_find_clause (gimple_omp_target_clauses (ctx->stmt),
+OMP_CLAUSE_DEVICE);
+  if (c && OMP_CLAUSE_DEVICE_ANCESTOR (c))
+	DECL_ATTRIBUTES (ctx->cb.dst_fn)
+

Re: [Patch][v4] OpenMP: Move omp requires checks to libgomp

2022-06-29 Thread Jakub Jelinek via Gcc-patches

On Wed, Jun 29, 2022 at 08:10:10PM +0200, Tobias Burnus wrote:
> > > +  if (output_requires)
> > > +{
> > > +  HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask
> > > +   & (OMP_REQUIRES_UNIFIED_ADDRESS
> > > +  | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
> > > +  | OMP_REQUIRES_REVERSE_OFFLOAD
> > > +  | OMP_REQUIRES_TARGET_USED));
> > Why is the OMP_REQUIRES_TARGET_USED bit saved there?  It is always set
> > if output_requires...
> > If we want to make sure it is set in omp_requires_mask after streaming
> > in, we can just or it in back there.
> 
> It is there because it is later needed: With -flto, we otherwise
> wouldn't generate the variable in omp-offload.cc. And as this value is
> only used internally, I thought it could just stay there. However, it
> could also be excluded here and ...

Ok, let's keep it then.  Wanted to save that 1 bit somewhere, but as it
isn't a pack, it is insignificant anyway.
More could be saved by reordering the bitmasks such that the atomic stuff is
in upper bits.

> > > +  char buf[64], buf2[64];
> > > +  omp_requires_to_name (buf, sizeof (buf), 
> > > omp_requires_mask);
> > > +  omp_requires_to_name (buf2, sizeof (buf2), val);
> > > +  error ("OpenMP % directive with non-identical "
> > > + "clauses in multiple compilation units: %qs vs. 
> > > %qs",
> > > + buf, buf2);
> > I think the user should be told also where, so file_data->file_name
> 
> With -save-temps, the filename is indeed helpful, e.g.:
> "a-requires-1.o". However, without, I get names like: "/tmp/ccIgRkOW.o".

Even when using
gcc -fopenmp -c foo.c -o foo.o
gcc -fopenmp -c bar.c -o bar.o
gcc -fopenmp -o foo foo.o bar.o
?
Anyway, I think it would be good to ask Richi or Honza what is the best to
get at the TU's filename.
Perhaps location_t from TRANSLATION_UNIT_DECL or something similar?

> > Can't we arrange for GOMP_offload_register_ver to see the bitmasks somewhere
> > instead (and force GOMP_offload_register_ver even if there are no vars or
> > funcs and just the requires mask)?
> 
> This probably works – but means some restructuring.
> GOMP_offload_register_ver assumes that num_devices is already available
> – and we need to exclude those for which the 'omp requires' cannot be
> fulfilled.

GOMP_offload_register_ver is called from initializers of programs and shared
libraries.  For the program and non-dlopened shared libraries, the usual
case is that it is called before we initialize devices, so we just record it
in offload_images and continue.  When the devices are initialized, we push
it to those devices.

Another case is registration after number of devices is initialized.

The former should just enqueue also those bitmasks, and when we initialize
devices we should complain on mismatches and/or filter out unsuitable
devices.

For the latter case, in theory (at least when we also catch calls to the
device routines with the meaning of device related omp_* APIs) that means
some TU already had the mask and so the later loaded masks should be the
same, so we should just complain on mismatches.

Now, the target data I'm afraid is device specific, but for GOMP_VERSION 2
we could e.g. have the pointer passed to the function point to target data
with the mask at offset -4 bytes before it, or can just have always the
bitmask followed by real target data.

Jakub

lto: Fix option merging [PR106129]

2022-06-29 Thread Joseph Myers

The LTO merging of options from different input files was broken by:

commit 227a2ecf663d69972b851f51f1934d18927b62cd
Author: Martin Liska 
Date:   Fri Mar 12 11:53:47 2021 +0100

lto-wrapper: Use vec data type.

Previously, find_and_merge_options would merge options it read into
those in *opts. After this commit, options in *opts on entry to
find_and_merge_options are ignored; the only merging that takes place
is between multiple sets of options in the same input file that are
read in the same call to this function (not sure how that case can
occur at all). The effects include, for example, that if some objects
are built with PIC enabled and others with it disabled, and the last
LTO object processed has PIC enabled, the choice of PIC for the last
object will result in the whole program being built as PIC, when the
merging logic is intended to ensure that a mixture of PIC and non-PIC
objects results in the whole program being built as non-PIC.

Fix this with an extra argument to find_and_merge_options to determine
whether merging should take place.  This shows up a second issue with
that commit (which I think wasn't actually intended to change code
semantics at all): once merging is enabled again, the check for
-Xassembler options became an infinite loop in the case where both
inputs had -Xassembler options, with the same first option, so fix
that loop to restore the previous semantics.

Note that I'm not sure how LTO option merging might be tested in the
testsuite (clearly there wasn't sufficient, if any, coverage to detect
these bugs).

Bootstrapped with no regressions for x86_64-pc-linux-gnu.  OK to
commit?

PR lto/106129
* lto-wrapper.cc (find_option): Add argument start.
(merge_and_complain): Loop over existing_opt_index and
existing_opt2_index for Xassembler check.  Update calls to
find_option.
(find_and_merge_options): Add argument first to determine whether
to merge options with those passed in *opts.
(run_gcc): Update calls to find_and_merge_options.

diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
index 26e06e7..795ab74 100644
--- a/gcc/lto-wrapper.cc
+++ b/gcc/lto-wrapper.cc
@@ -170,13 +170,14 @@ get_options_from_collect_gcc_options (const char 
*collect_gcc,
   return decoded;
 }
 
-/* Find option in OPTIONS based on OPT_INDEX.  -1 value is returned
-   if the option is not present.  */
+/* Find option in OPTIONS based on OPT_INDEX, starting at START.  -1
+   value is returned if the option is not present.  */
 
 static int
-find_option (vec &options, size_t opt_index)
+find_option (vec &options, size_t opt_index,
+unsigned start = 0)
 {
-  for (unsigned i = 0; i < options.length (); ++i)
+  for (unsigned i = start; i < options.length (); ++i)
 if (options[i].opt_index == opt_index)
   return i;
 
@@ -575,13 +576,16 @@ merge_and_complain (vec 
&decoded_options,
else
  j++;
 
+  int existing_opt_index, existing_opt2_index;
   if (!xassembler_options_error)
-for (i = j = 0; ; i++, j++)
+for (existing_opt_index = existing_opt2_index = 0; ;
+existing_opt_index++, existing_opt2_index++)
   {
-   int existing_opt_index
- = find_option (decoded_options, OPT_Xassembler);
-   int existing_opt2_index
- = find_option (fdecoded_options, OPT_Xassembler);
+   existing_opt_index
+ = find_option (decoded_options, OPT_Xassembler, existing_opt_index);
+   existing_opt2_index
+ = find_option (fdecoded_options, OPT_Xassembler,
+existing_opt2_index);
 
cl_decoded_option *existing_opt = NULL;
cl_decoded_option *existing_opt2 = NULL;
@@ -1100,7 +1104,7 @@ find_crtoffloadtable (int save_temps, const char *dumppfx)
 
 static bool
 find_and_merge_options (int fd, off_t file_offset, const char *prefix,
-   vec decoded_cl_options,
+   vec decoded_cl_options, bool first,
vec *opts, const char *collect_gcc)
 {
   off_t offset, length;
@@ -1110,6 +1114,9 @@ find_and_merge_options (int fd, off_t file_offset, const 
char *prefix,
   int err;
   vec fdecoded_options;
 
+  if (!first)
+fdecoded_options = *opts;
+
   simple_object_read *sobj;
   sobj = simple_object_start_read (fd, file_offset, "__GNU_LTO",
   &errmsg, &err);
@@ -1130,7 +1137,6 @@ find_and_merge_options (int fd, off_t file_offset, const 
char *prefix,
   data = (char *)xmalloc (length);
   read (fd, data, length);
   fopts = data;
-  bool first = true;
   do
 {
   vec f2decoded_options
@@ -1417,8 +1423,10 @@ run_gcc (unsigned argc, char *argv[])
   int auto_parallel = 0;
   bool no_partition = false;
   const char *jobserver_error = NULL;
+  bool fdecoded_options_first = true;
   vec fdecoded_options;
   fdecoded_options.create (16);
+  bool offload_fdecoded_options_first = true;
   vec offload_fdecoded_options = vNULL;
   struct obstack argv_obstack;

Re: [GCC 13][PATCH] PR101836: Add a new option -fstrict-flex-array[=n] and use it in __builtin_object_size

2022-06-29 Thread Qing Zhao via Gcc-patches

Hi, Jakub and Joseph:


> On Jun 28, 2022, at 12:43 PM, Jakub Jelinek  wrote:
> 
> On Tue, Jun 28, 2022 at 03:59:22PM +, Qing Zhao via Gcc-patches wrote:
>>> On Jun 28, 2022, at 11:08 AM, Jakub Jelinek  wrote:
>>> 
>>> On Tue, Jun 28, 2022 at 03:03:12PM +, Qing Zhao wrote:
 2. Then replace all “array_at_struct_end_p” with using DECL_NOT_FLEXARRAY 
 in GCC, adding new testing cases
>>> 
>>> No, IMHO array_at_struct_end_p should stay as is, just test this extra flag
>>> too.
>> 
>> Could you please explain why we still need “array_at_struct_end_p” after we 
>> have the DECL_NOT_FLEXARRAY flag in FIELD_DECL?
> 
> Because the flag just tells whether some array shouldn't be treated as (poor 
> man's)
> flexible array member.  We still need to find out if some FIELD_DECL is to
> be treated like a flexible array member, which is a minority of
> COMPONENT_REFs.
> struct S { int a; char b[0]; int c; } s;
> struct T { int d; char e[]; };
> struct U { int f; struct T g; int h; } u;
> Neither s.b nor u.g.e is to be treated like flexible array member,
> no matter what -fstrict-flex-array= option is used.

I studied the above a little bit today with the following small testing cases:
[opc@qinzhao-ol8u3-x86 trailing_array]$ cat t1.c
struct AX
{
  int n;
  short ax[];
  int m;
};

void warn_ax_local (struct AX *p)
{
  p->ax[2] = 0;   
}
[opc@qinzhao-ol8u3-x86 trailing_array]$ /home/opc/Install/latest/bin/gcc -O2 
-Wall  t1.c -S
t4.c:4:9: error: flexible array member not at end of struct
4 |   short ax[];
  | ^~

Looks like that it’s an error when the flexible array member is not at end of a 
struct.

However, for

[opc@qinzhao-ol8u3-x86 trailing_array]$ cat t2.c
struct AX
{
  int n;
  short ax[];
};

struct UX
{
  struct AX b;
  int m;
};

void warn_ax_local (struct AX *p, struct UX *q)
{
  p->ax[2] = 0;   
  q->b.ax[2] = 0;
}

[opc@qinzhao-ol8u3-x86 trailing_array]$  /home/opc/Install/latest/bin/gcc -O2 
-Wall  t2.c -S
[opc@qinzhao-ol8u3-x86 trailing_array]$ 


My impression is:

C frontend is able to check whether the field is at the end of the structure 
and report error when it detect a flexible array member is not at the end of 
struct.

My question is:

1. Is the usage of flexible array member in struct UX of t2.c correct?
2. Why it’s correct?
3. If not correct, is this a bug in FE?

Thanks.

Qing

[committed] d: Fix error: aggregate value used where floating point was expected (PR106139)

2022-06-29 Thread Iain Buclaw via Gcc-patches

Hi,

Casting from vector to static array is permitted in the D, and the
front-end generates a reinterpret cast, but casting back the other way
resulted in an error.

This has been fixed to be properly handled in the code generation pass
of VectorExp, and the conversion for lvalue and rvalue handling done in
convert_expr and convert_for_rvalue respectively.

As this is a bug also affecting previous versions, this will be
backported as appropriate, with a couple more changes required to fix
other related issues in the dmd front-end, so it'll go out separately.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, and
committed to mainline.

Regards,
Iain.

---
PR d/106139

gcc/d/ChangeLog:

* d-convert.cc (convert_expr): Handle casting from array to vector.
(convert_for_rvalue): Rewrite vector to array casts of the same
element type into a constructor.
(convert_for_assignment): Return calling convert_for_rvalue.
* expr.cc (ExprVisitor::visit (VectorExp *)): Handle generating a
vector expression from a static array.
* toir.cc (IRVisitor::visit (ReturnStatement *)): Call
convert_for_rvalue on return value.

gcc/testsuite/ChangeLog:

* gdc.dg/pr106139a.d: New test.
* gdc.dg/pr106139b.d: New test.
* gdc.dg/pr106139c.d: New test.
* gdc.dg/pr106139d.d: New test.
---
 gcc/d/d-convert.cc   | 44 +++-
 gcc/d/expr.cc| 10 ++--
 gcc/d/toir.cc|  1 +
 gcc/testsuite/gdc.dg/pr106139a.d | 36 ++
 gcc/testsuite/gdc.dg/pr106139b.d | 36 ++
 gcc/testsuite/gdc.dg/pr106139c.d | 27 
 gcc/testsuite/gdc.dg/pr106139d.d | 27 
 7 files changed, 178 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gdc.dg/pr106139a.d
 create mode 100644 gcc/testsuite/gdc.dg/pr106139b.d
 create mode 100644 gcc/testsuite/gdc.dg/pr106139c.d
 create mode 100644 gcc/testsuite/gdc.dg/pr106139d.d

diff --git a/gcc/d/d-convert.cc b/gcc/d/d-convert.cc
index 3a6a32ab024..ec5da6c10a6 100644
--- a/gcc/d/d-convert.cc
+++ b/gcc/d/d-convert.cc
@@ -502,6 +502,15 @@ convert_expr (tree exp, Type *etype, Type *totype)
  gcc_assert (totype->size () == etype->size ());
  result = build_vconvert (build_ctype (totype), exp);
}
+  else if (tbtype->ty == TY::Tvector && tbtype->size () == ebtype->size ())
+   {
+ /* Allow casting from array to vector as if its an unaligned load.  */
+ tree type = build_ctype (totype);
+ tree unaligned_type = build_variant_type_copy (type);
+ SET_TYPE_ALIGN (unaligned_type, 1 * BITS_PER_UNIT);
+ TYPE_USER_ALIGN (unaligned_type) = 1;
+ result = convert (type, build_vconvert (unaligned_type, exp));
+   }
   else
{
  error ("cannot cast expression of type %qs to type %qs",
@@ -643,6 +652,39 @@ convert_for_rvalue (tree expr, Type *etype, Type *totype)
   result = convert (build_ctype (tbtype), result);
 }
 
+  if (tbtype->ty == TY::Tsarray
+  && ebtype->ty == TY::Tsarray
+  && tbtype->nextOf ()->ty == ebtype->nextOf ()->ty
+  && INDIRECT_REF_P (expr)
+  && CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (expr, 0)))
+  && TREE_CODE (TREE_OPERAND (TREE_OPERAND (expr, 0), 0)) == ADDR_EXPR)
+{
+  /* If expression is a vector that was casted to an array either by
+explicit type cast or by taking the vector's `.array' value, strip the
+reinterpret cast and build a constructor instead.  */
+  tree ptr = TREE_OPERAND (TREE_OPERAND (expr, 0), 0);
+
+  if (VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (ptr
+   {
+ /* Rewrite: `*(Array *)&vector'
+   into: `{ vector[0], vector[1], ... }'  */
+ tree array = d_save_expr (TREE_OPERAND (ptr, 0));
+ array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expr), array);
+
+ uinteger_t dim = tbtype->isTypeSArray ()->dim->toUInteger ();
+ vec  *elms = NULL;
+ for (uinteger_t i = 0; i < dim; i++)
+   {
+ tree index = size_int (i);
+ tree value = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
+  array, index, NULL_TREE, NULL_TREE);
+ CONSTRUCTOR_APPEND_ELT (elms, index, value);
+   }
+
+ return build_constructor (build_ctype (totype), elms);
+   }
+}
+
   return result ? result : convert_expr (expr, etype, totype);
 }
 
@@ -703,7 +745,7 @@ convert_for_assignment (tree expr, Type *etype, Type 
*totype)
   return expr;
 }
 
-  return convert_expr (expr, etype, totype);
+  return convert_for_rvalue (expr, etype, totype);
 }
 
 /* Return a TREE representation of EXPR converted to represent
diff --git a/gcc/d/expr.cc b/gcc/d/expr.cc
index 34b3ddd3f10..1bb10a835d2 100644
--- a/gcc/d/expr.cc
+++ b/gcc/d/expr.cc
@@ -291

Re: [GCC 13][PATCH] PR101836: Add a new option -fstrict-flex-array[=n] and use it in __builtin_object_size

2022-06-29 Thread Martin Sebor via Gcc-patches


On 6/28/22 13:01, Qing Zhao wrote:




On Jun 28, 2022, at 2:49 PM, Jakub Jelinek  wrote:

On Tue, Jun 28, 2022 at 06:29:01PM +, Qing Zhao wrote:




On Jun 28, 2022, at 2:22 PM, Jakub Jelinek  wrote:

On Tue, Jun 28, 2022 at 06:15:58PM +, Qing Zhao wrote:

Because the flag just tells whether some array shouldn't be treated as (poor 
man's)
flexible array member.  We still need to find out if some FIELD_DECL is to
be treated like a flexible array member, which is a minority of
COMPONENT_REFs.
struct S { int a; char b[0]; int c; } s;
struct T { int d; char e[]; };
struct U { int f; struct T g; int h; } u;
Neither s.b nor u.g.e is to be treated like flexible array member,
no matter what -fstrict-flex-array= option is used.


Then, to resolve this issue, we might need a opposite  flag DECL_IS_FLEXARRAY 
in FIELD_DECL?

The default is FALSE for all FIELD_DECL.


Doesn't matter whether it is positive or negative, you still need to analyze
it.  See the above example.  If you have struct T t; and test t.e, then it
is flexarray.  But u.g.e is not, even when the COMPONENT_REF refers to the
same FIELD_DECL.  In the t.e case e is the very last field, in the latter
case u.g.e is the last field in struct T, but struct U has the h field after


So, do you mean that the current FE analysis will not be able to decide whether 
a specific array field is at the end of the enclosing structure?
Only the middle end can decide this ?


Well, anything that analyzes it, can be in the FE or middle-end, but there
is no place to store it for later.


Then I am a little confused:

If the FE can decide wether an array field is at the end of the enclosing 
structure,  then combined with whether it’s a [0], [1] or [], and which level 
of -fstrict-flex-array,

The FE should be able to decide whether this array field is a flexible array 
member or not, then set the flag DECL_IS_FLEXARRAY (or DECL_NOT_FLEXARRAY).

The new flag is the place to store such info, right?
Do I miss anything here?


I think the problem is that there is just one FIELD_DECL for member
M of a given type T but there can be more than one instance of that
member, one in each struct that has a subobject of T as its own
member.  Whether M is or isn't a (valid) flexible array member
varies between the two instances.

Martin

[committed] wwwdocs: Document devel/omp/gcc-12

2022-06-29 Thread Kwok Cheung Yeung


Hello

The devel/omp/gcc-12 branch has been pushed to the git repo as the 
development branch for new OpenMP, OpenACC and offloading functionality, 
based on the GCC 12 branch.


The git doc page has been modified to point to the new branch as the 
active OMP development branch, while the previous devel/omp/gcc-11 
branch now joins the list of inactive OMP branches.


KwokFrom 0695e5e969eba730e517a6adbdf38b8774f89437 Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Wed, 29 Jun 2022 22:32:39 +0100
Subject: [PATCH] Document devel/omp/gcc-12 branch

Also moves the old devel/omp/gcc-11 branch to the inactive branches
section next to devel/omp/gcc-10.
---
 htdocs/git.html | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/htdocs/git.html b/htdocs/git.html
index 5202363c..52ad05db 100644
--- a/htdocs/git.html
+++ b/htdocs/git.html
@@ -280,15 +280,15 @@ in Git.
   Makarov mailto:vmaka...@redhat.com";>vmaka...@redhat.com.
   
 
-  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-11";>devel/omp/gcc-11
+  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-12";>devel/omp/gcc-12
   This branch is for collaborative development of
   https://gcc.gnu.org/wiki/OpenACC";>OpenACC and
   https://gcc.gnu.org/wiki/openmp";>OpenMP support and related
   functionality, such
   as https://gcc.gnu.org/wiki/Offloading";>offloading support (OMP:
   offloading and multi processing).
-  The branch is based on releases/gcc-11.
-  Please send patch emails with a short-hand [og11] tag in the
+  The branch is based on releases/gcc-12.
+  Please send patch emails with a short-hand [og12] tag in the
   subject line, and use ChangeLog.omp files.
 
   unified-autovect
@@ -895,13 +895,15 @@ merged.
 
   https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-9";>devel/omp/gcc-9
   https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-10";>devel/omp/gcc-10
+  https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;a=shortlog;h=refs/heads/devel/omp/gcc-11";>devel/omp/gcc-11
   These branches were used for collaborative development of
   https://gcc.gnu.org/wiki/OpenACC";>OpenACC and
   https://gcc.gnu.org/wiki/openmp";>OpenMP support and related
   functionality as the successors to openacc-gcc-9-branch after the move to
   Git.
-  The branches were based on releases/gcc-9 and releases/gcc-10 respectively.
-  Development has now moved to the devel/omp/gcc-11 branch.
+  The branches were based on releases/gcc-9, releases/gcc-10 and
+  releases/gcc-11 respectively.
+  Development has now moved to the devel/omp/gcc-12 branch.
 
   hammer-3_3-branch
   The goal of this branch was to have a stable compiler based on GCC 3.3
-- 
2.25.1

Go patch committed: Check repeated const expressions in new scop

2022-06-29 Thread Ian Lance Taylor via Gcc-patches

This patch to the Go frontend checks repeated const expressions in new
scope, in case they refer to a newly defined name.  The test case is
const8.go in https://go.dev/cl/414795.  This fixes
https://go.dev/issue/53585.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
dabc73c20057d016027fd1af22575e2325cad9ef
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 13cb6ea4046..4fde25af76e 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-c7238f58a26131b7611eff6f555cab02af8a623c
+63782f8a318e9eebfdc983f171a920c7a937c759
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index aadca9710e6..00d35a965a9 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -3352,97 +3352,7 @@ class Find_named_object : public Traverse
   bool found_;
 };
 
-// A reference to a const in an expression.
-
-class Const_expression : public Expression
-{
- public:
-  Const_expression(Named_object* constant, Location location)
-: Expression(EXPRESSION_CONST_REFERENCE, location),
-  constant_(constant), type_(NULL), seen_(false)
-  { }
-
-  Named_object*
-  named_object()
-  { return this->constant_; }
-
-  const Named_object*
-  named_object() const
-  { return this->constant_; }
-
-  // Check that the initializer does not refer to the constant itself.
-  void
-  check_for_init_loop();
-
- protected:
-  int
-  do_traverse(Traverse*);
-
-  Expression*
-  do_lower(Gogo*, Named_object*, Statement_inserter*, int);
-
-  bool
-  do_is_constant() const
-  { return true; }
-
-  bool
-  do_is_zero_value() const
-  { return this->constant_->const_value()->expr()->is_zero_value(); }
-
-  bool
-  do_is_static_initializer() const
-  { return true; }
-
-  bool
-  do_numeric_constant_value(Numeric_constant* nc) const;
-
-  bool
-  do_string_constant_value(std::string* val) const;
-
-  bool
-  do_boolean_constant_value(bool* val) const;
-
-  Type*
-  do_type();
-
-  // The type of a const is set by the declaration, not the use.
-  void
-  do_determine_type(const Type_context*);
-
-  void
-  do_check_types(Gogo*);
-
-  Expression*
-  do_copy()
-  { return this; }
-
-  Bexpression*
-  do_get_backend(Translate_context* context);
-
-  int
-  do_inlining_cost() const
-  { return 1; }
-
-  // When exporting a reference to a const as part of a const
-  // expression, we export the value.  We ignore the fact that it has
-  // a name.
-  void
-  do_export(Export_function_body* efb) const
-  { this->constant_->const_value()->expr()->export_expression(efb); }
-
-  void
-  do_dump_expression(Ast_dump_context*) const;
-
- private:
-  // The constant.
-  Named_object* constant_;
-  // The type of this reference.  This is used if the constant has an
-  // abstract type.
-  Type* type_;
-  // Used to prevent infinite recursion when a constant incorrectly
-  // refers to itself.
-  mutable bool seen_;
-};
+// Class Const_expression.
 
 // Traversal.
 
@@ -3454,6 +3364,14 @@ Const_expression::do_traverse(Traverse* traverse)
   return TRAVERSE_CONTINUE;
 }
 
+// Whether this is the zero value.
+
+bool
+Const_expression::do_is_zero_value() const
+{
+  return this->constant_->const_value()->expr()->is_zero_value();
+}
+
 // Lower a constant expression.  This is where we convert the
 // predeclared constant iota into an integer value.
 
@@ -3708,6 +3626,16 @@ Const_expression::do_get_backend(Translate_context* 
context)
   return expr->get_backend(context);
 }
 
+// When exporting a reference to a const as part of a const
+// expression, we export the value.  We ignore the fact that it has
+// a name.
+
+void
+Const_expression::do_export(Export_function_body* efb) const
+{
+  this->constant_->const_value()->expr()->export_expression(efb);
+}
+
 // Dump ast representation for constant expression.
 
 void
diff --git a/gcc/go/gofrontend/expressions.h b/gcc/go/gofrontend/expressions.h
index 707c19336d8..a1e3733aa1d 100644
--- a/gcc/go/gofrontend/expressions.h
+++ b/gcc/go/gofrontend/expressions.h
@@ -28,6 +28,7 @@ class Map_type;
 class Struct_type;
 class Struct_field;
 class Expression_list;
+class Const_expression;
 class Var_expression;
 class Enclosed_var_expression;
 class Temporary_reference_expression;
@@ -626,6 +627,20 @@ class Expression
   is_type_expression() const
   { return this->classification_ == EXPRESSION_TYPE; }
 
+  // If this is a const reference, return the Const_expression
+  // structure.  Otherwise, return NULL.  This is a controlled dynamic
+  // cast.
+  Const_expression*
+  const_expression()
+  { return this->convert(); }
+
+  const Const_expression*
+  const_expression() const
+  {
+return this->convert();
+  }
+
   // If this is a variable reference, return the Var_expression
   // structure.  Otherwise, return NULL.  This is a controlled dynamic
   // cast.
@@ -1453,6 +14

Re: [PATCH] mksysinfo: add support for musl libc

2022-06-29 Thread Ian Lance Taylor via Gcc-patches

On Tue, Jun 28, 2022 at 7:32 AM Sören Tempel  wrote:
>
> Ian Lance Taylor  wrote:
> > Given that pretty much every one of these musl patches has led to
> > problems on some glibc systems, it would be very nice if you could do
> > some testing with glibc.  Thanks.
>
> Sorry, my bad.
>
> I just tested this on Arch Linux and it compiles fine with the patch.
>
> > Can you expand on the st_atim issue?
>
> The st_atim issue is simply that struct stat contains additional struct
> fields on 32-bit musl architectures (__st_{a,m,c}tim32) in addition to
> st_{a,m,c}tim [1]. The sed expression currently only replaces the first
> occurrence (i.e. __st_{a,m,c}tim32) from gen-sysinfo.go:
>
> $ grep 'st_[acm]tim' sysinfo.go
> type _stat struct { st_dev uint64; __st_dev_padding int32; 
> __st_ino_truncated int32; st_mode uint32; st_nlink uint32; st_uid uint32; 
> st_gid uint32; st_rdev uint64; __st_rdev_padding int32; st_size int64; 
> st_blksize int32; st_blocks int64; __st_atim32 struct { tv_sec int32; tv_nsec 
> int32; }; __st_mtim32 struct { tv_sec int32; tv_nsec int32; }; __st_ctim32 
> struct { tv_sec int32; tv_nsec int32; }; st_ino uint64; st_atim Timespec; 
> st_mtim Timespec; st_ctim Timespec; }
> type Stat_t struct { Dev uint64; __st_dev_padding int32; 
> __Ino_truncated int32; Mode uint32; Nlink uint32; Uid uint32; Gid uint32; 
> Rdev uint64; __st_rdev_padding int32; Size int64; Blksize int32; Blocks 
> int64; __Atim32 struct { tv_sec int32; tv_nsec int32; }; __Mtim32 struct { 
> tv_sec int32; tv_nsec int32; }; __Ctim32 struct { tv_sec int32; tv_nsec 
> int32; }; Ino uint64; st_atim Timespec; st_mtim Timespec; st_ctim Timespec; }
>
> This causes the following build error on 32-bit musl architectures:
>
> stat_atim.go: error: reference to undefined field or method 'Mtim'
>17 | fs.modTime = timespecToTime(fs.sys.Mtim)
>   |   ^
> stat_atim.go: error: reference to undefined field or method 'Atim'
>52 | return timespecToTime(fi.Sys().(*syscall.Stat_t).Atim)
>   | ^
>
> This is fixed by the proposed patch by replacing both members in struct stat.
>
> > What does the musl type look like in gen-sysinfo.go?
>
> $ grep 'st_[acm]tim' gen-sysinfo.go
> // unknowndefine st_atime st_atim.tv_sec
> // unknowndefine st_mtime st_mtim.tv_sec
> // unknowndefine st_ctime st_ctim.tv_sec
> type _stat struct { st_dev uint64; __st_dev_padding int32; 
> __st_ino_truncated int32; st_mode uint32; st_nlink uint32; st_uid uint32; 
> st_gid uint32; st_rdev uint64; __st_rdev_padding int32; st_size int64; 
> st_blksize int32; st_blocks int64; __st_atim32 struct { tv_sec int32; tv_nsec 
> int32; }; __st_mtim32 struct { tv_sec int32; tv_nsec int32; }; __st_ctim32 
> struct { tv_sec int32; tv_nsec int32; }; st_ino uint64; st_atim _timespec; 
> st_mtim _timespec; st_ctim _timespec; }
>
> > What is the value of SYS_SECCOMP in musl?  Is it a system call number?
>
> SYS_SECCOMP is a macro defined in signal.h which can be used to check
> the si_code member of siginfo_t for SIGSYS, see the SECCOMP_RET_TRAP
> description in seccomp(2). As such, it is not a system call number.
> The value of SYS_SECCOMP is simply 1 [2].
>
> > What does it look like in gen-sysinfo.go?
>
> Defined in gen-sysinfo.go as follows:
>
> 4688:const _SYS_seccomp = 317
> 4775:const _SYS_SECCOMP = 1
>
> where the former is the syscall and the latter is the expanded
> SYS_SECCOMP macro constant. When generating sysinfo.go the name seems to
> be uppercased, thus resulting in a compilation failure. The generated
> sysinfo.go contains the following lines:
>
> 3067:const _SYS_seccomp = 317
> 3154:const _SYS_SECCOMP = 1
> 6600:const SYS_SECCOMP = _SYS_seccomp
> 6606:const SYS_SECCOMP = _SYS_SECCOMP
>
> Build error:
>
> sysinfo.go:6606:7: error: redefinition of 'SYS_SECCOMP'
>  6606 | const SYS_SECCOMP = _SYS_SECCOMP
>   |   ^
> sysinfo.go:6600:7: note: previous definition of 'SYS_SECCOMP' was here
>  6600 | const SYS_SECCOMP = _SYS_seccomp
>   |   ^
>
> This is fixed by the patch by only extracting _SYS_seccomp, not _SYS_SECCOMP.
>
> If you need more information, just let me know :)


Thanks for the info.  Does this patch work?  It tweaks the handling of
SYS_SECCOMP to be specific to that constant.

Ian
diff --git a/libgo/mksysinfo.sh b/libgo/mksysinfo.sh
index 5aa309155..ea1fa17d4 100755
--- a/libgo/mksysinfo.sh
+++ b/libgo/mksysinfo.sh
@@ -127,6 +127,7 @@ fi
 
 # The syscall numbers.  We force the names to upper case.
 grep '^const _SYS_' gen-sysinfo.go | \
+  grep -v '^const _SYS_SECCOMP = ' | \
   sed -e 's/const _\(SYS_[^= ]*\).*$/\1/' | \
   while read sys; do
 sup=`echo $sys | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`

[pushed][PATCH] LoongArch: Remove undefined behavior from code [PR 106097]

2022-06-29 Thread Lulu Cheng

C++2017 and previous standard description:

The value of E1 << E2 is E1 left-shifted E2 bit positions;
vacated bits are zero-filled. If E1 has an unsigned type,
the value of the result is E1×2E2, reduced modulo one more
than the maximum value representable inthe result type.
Otherwise, if E1 has a signed type and non-negative value,
and E1×2E2 is representablein the corresponding unsigned
type of the result type, then that value, converted to the
result type, is the resulting value; otherwise, the behavior
is undefined.

The value of E1 >> E2 is E1 right-shifted E2 bit positions.
If E1 has an unsigned type or if E1 has a signed type and
a non-negative value, the value of the result is the integral
part of the quotient of E1/2E2. If E1 has a signed type and
a negative value, the resulting value is implementation-defined.

gcc/ChangeLog:

PR target/106097
* config/loongarch/loongarch.cc (loongarch_build_integer):
Remove undefined behavior from code.
---
 gcc/config/loongarch/loongarch.cc | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index c8502b0b0f3..48d9ccdc331 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -160,7 +160,7 @@ enum loongarch_load_imm_method
 struct loongarch_integer_op
 {
   enum rtx_code code;
-  unsigned HOST_WIDE_INT value;
+  HOST_WIDE_INT value;
   enum loongarch_load_imm_method method;
 };
 
@@ -1468,7 +1468,7 @@ loongarch_build_integer (struct loongarch_integer_op 
*codes,
   unsigned int cost = 0;
 
   /* Get the lower 32 bits of the value.  */
-  HOST_WIDE_INT low_part = TARGET_64BIT ? value << 32 >> 32 : value;
+  HOST_WIDE_INT low_part = (int32_t)value;
 
   if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part))
 {
@@ -1502,6 +1502,7 @@ loongarch_build_integer (struct loongarch_integer_op 
*codes,
   bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B};
 
   int sign31 = (value & (1UL << 31)) >> 31;
+  int sign51 = (value & (1UL << 51)) >> 51;
   /* Determine whether the upper 32 bits are sign-extended from the lower
 32 bits. If it is, the instructions to load the high order can be
 ommitted.  */
@@ -1512,12 +1513,12 @@ loongarch_build_integer (struct loongarch_integer_op 
*codes,
   else if (lu32i[sign31])
{
  codes[cost].method = METHOD_LU52I;
- codes[cost].value = (value >> 52) << 52;
+ codes[cost].value = value & LU52I_B;
  return cost + 1;
}
 
   codes[cost].method = METHOD_LU32I;
-  codes[cost].value = ((value << 12) >> 44) << 32;
+  codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
   cost++;
 
   /* Determine whether the 52-61 bits are sign-extended from the low order,
@@ -1525,7 +1526,7 @@ loongarch_build_integer (struct loongarch_integer_op 
*codes,
   if (!lu52i[(value & (1ULL << 51)) >> 51])
{
  codes[cost].method = METHOD_LU52I;
- codes[cost].value = (value >> 52) << 52;
+ codes[cost].value = value & LU52I_B;
  cost++;
}
 }
-- 
2.31.1

[COMMITTED] PR tree-optimization/106114 - Don't use gori dependencies to optimize.

2022-06-29 Thread Andrew MacLeod via Gcc-patches

The routine which tried to fold and's and or's using relations was using 
the dependency cache as a shortcut to determine if there were 2 ssa 
names on the feeding expressions, and assuming that was correct.


ie

  _16 = a.0_1 < -117;
  _17 = a.0_1 >= -83;
  _18 = _16 | _17;

the dependency cache indicates that a.0_1 is "ssa1" dependency for _16 
and also for _17.  we dont have to scan the statement, so temporal out 
of date info is very quick.


Its also not meant to reflect that actual statement.. ie, it can get out 
of date.  Not is a way that makes anything incorrect, but in a way that 
may possibly result in a  either a missed opportunity or slightly more 
work when statements are being rewritten on the fly..  ie  DOM rewrites 
that to:


  _16 = a.1_15 < -117;
  _17 = a.1_15 >= -83;
  _18 = _16 | _17;

When fold_using_range is later invoked, a.1_15 is added a dependency to 
_16 and _17,  not attempting to understand that its a replacement, we 
simply now think that both a.0_1 and a.1_15 are dependencies.  so if 
either one becomes out of date, then ranger will recalculate _16 and/or _17


fold_using_range::relation_fold_and_or was using thet dependency cache 
as if it represent the operands of the statement accurately... so after 
the DOM rewrite, it thought that there were 2 operands on the _16 and 
_17 expression, the 2 dependencies in the cache, misconstruing it as


  _16 = a.0_1_ < a.1_15;
  _17 = a.0_1 >= a.1_15;
  _18 = _16 | _17;

Thus it thought is could fold it away.


The dependency cache shortcut should NOT be used for optimizations.   
THis patch correct the problem, and simply looks at the 2 operands of 
the feeding instructions.


bootstrapped on build-x86_64-pc-linux-gnu with no regressions. Pushed.

This is less likely to occur in GCC12 since there is less IL change on 
the fly, but it should be safe to make this change just in case.  OK for 
GCC12?


Andrew

PS. and yes, it fixes the other 2 testcases as well.
From c11461be4e5f3107d32187f2df9e5d4ec3f7b0d7 Mon Sep 17 00:00:00 2001
From: Andrew MacLeod 
Date: Wed, 29 Jun 2022 13:34:05 -0400
Subject: [PATCH] Don't use gori depedencies to optimize.

  The routine fold_using_range::relation_fold_and_or needs to veriyf that both
operands of 2 stmts are the same, and uses GORIs dependency cache for this.
This cache cannot be counted on to reflect the current contents of a
stmt, expecially in the presence of an IL changing pass.  Instead, look at the
statement operands.

	PR tree-optimization/106114
	gcc/
	* gimple-range-fold.cc (fold_using_range::relation_fold_and_or): Check
	statement operands instead of GORI cache.
	testsuite/
	* gcc.dg/pr106114.c: New.
---
 gcc/gimple-range-fold.cc| 30 +-
 gcc/testsuite/gcc.dg/pr106114.c | 14 ++
 2 files changed, 31 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr106114.c

diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc
index 2a8c66e0c05..0f815b50b9a 100644
--- a/gcc/gimple-range-fold.cc
+++ b/gcc/gimple-range-fold.cc
@@ -1397,14 +1397,25 @@ fold_using_range::relation_fold_and_or (irange& lhs_range, gimple *s,
   // Ideally we search dependencies for common names, and see what pops out.
   // until then, simply try to resolve direct dependencies.
 
-  // Both names will need to have 2 direct dependencies.
-  tree ssa1_dep2 = src.gori ()->depend2 (ssa1);
-  tree ssa2_dep2 = src.gori ()->depend2 (ssa2);
-  if (!ssa1_dep2 || !ssa2_dep2)
+  gimple *ssa1_stmt = SSA_NAME_DEF_STMT (ssa1);
+  gimple *ssa2_stmt = SSA_NAME_DEF_STMT (ssa2);
+
+  range_op_handler handler1 (SSA_NAME_DEF_STMT (ssa1));
+  range_op_handler handler2 (SSA_NAME_DEF_STMT (ssa2));
+
+  // If either handler is not present, no relation can be found.
+  if (!handler1 || !handler2)
+return;
+
+  // Both stmts will need to have 2 ssa names in the stmt.
+  tree ssa1_dep1 = gimple_range_ssa_p (gimple_range_operand1 (ssa1_stmt));
+  tree ssa1_dep2 = gimple_range_ssa_p (gimple_range_operand2 (ssa1_stmt));
+  tree ssa2_dep1 = gimple_range_ssa_p (gimple_range_operand1 (ssa2_stmt));
+  tree ssa2_dep2 = gimple_range_ssa_p (gimple_range_operand2 (ssa2_stmt));
+
+  if (!ssa1_dep1 || !ssa1_dep2 || !ssa2_dep1 || !ssa2_dep2)
 return;
 
-  tree ssa1_dep1 = src.gori ()->depend1 (ssa1);
-  tree ssa2_dep1 = src.gori ()->depend1 (ssa2);
   // Make sure they are the same dependencies, and detect the order of the
   // relationship.
   bool reverse_op2 = true;
@@ -1413,13 +1424,6 @@ fold_using_range::relation_fold_and_or (irange& lhs_range, gimple *s,
   else if (ssa1_dep1 != ssa2_dep2 || ssa1_dep2 != ssa2_dep1)
 return;
 
-  range_op_handler handler1 (SSA_NAME_DEF_STMT (ssa1));
-  range_op_handler handler2 (SSA_NAME_DEF_STMT (ssa2));
-
-  // If either handler is not present, no relation is found.
-  if (!handler1 || !handler2)
-return;
-
   int_range<2> bool_one (boolean_true_node, boolean_true_node);
 
   relation_kind relation1 = handler1.op1_op2_relation

[PATCH] i386: Add AVX512BW to AVX512F in MASK_ISA2

2022-06-29 Thread Haochen Jiang via Gcc-patches

Hi all,

I just found in MASK_ISA2_UNSET part, since AVX512BW is based on AVX512F, we
should add OPTION_MASK_ISA2_AVX512BW_UNSET to AVX512F for maintainence
convenience and logic correctness, or we will need to add all future ISAs based
on AVX512BW in both AVX512F and AVX512BW. This will be easily forgot and might
cause confusion.

Also remove the redundant ones in this change.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AVX512F_UNSET):
Add OPTION_MASK_ISA2_AVX512BW_UNSET, remove
OPTION_MASK_ISA2_AVX512BF16_UNSET and
OPTION_MASK_ISA2_AVX512FP16_UNSET.
---
 gcc/common/config/i386/i386-common.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index cb878163492..c0c2ad74d87 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -315,11 +315,10 @@ along with GCC; see the file COPYING3.  If not see
| OPTION_MASK_ISA_SSE_UNSET)
 
 #define OPTION_MASK_ISA2_AVX512F_UNSET \
-  (OPTION_MASK_ISA2_AVX512BF16_UNSET \
+  (OPTION_MASK_ISA2_AVX512BW_UNSET \
| OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
| OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
-   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET \
-   | OPTION_MASK_ISA2_AVX512FP16_UNSET)
+   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
 #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
   OPTION_MASK_ISA2_SSE_UNSET
 #define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
-- 
2.18.1

RE: [PATCH] i386: Add AVX512BW to AVX512F in MASK_ISA2

2022-06-29 Thread Liu, Hongtao via Gcc-patches




> -Original Message-
> From: Jiang, Haochen 
> Sent: Thursday, June 30, 2022 9:51 AM
> To: gcc-patches@gcc.gnu.org
> Cc: ubiz...@gmail.com; Liu, Hongtao 
> Subject: [PATCH] i386: Add AVX512BW to AVX512F in MASK_ISA2
> 
> Hi all,
> 
> I just found in MASK_ISA2_UNSET part, since AVX512BW is based on AVX512F,
> we should add OPTION_MASK_ISA2_AVX512BW_UNSET to AVX512F for
> maintainence convenience and logic correctness, or we will need to add all
> future ISAs based on AVX512BW in both AVX512F and AVX512BW. This will be
> easily forgot and might cause confusion.
> 
> Also remove the redundant ones in this change.
> 
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
LGTM.
> 
> BRs,
> Haochen
> 
> gcc/ChangeLog:
> 
>   * common/config/i386/i386-common.cc
> (OPTION_MASK_ISA2_AVX512F_UNSET):
>   Add OPTION_MASK_ISA2_AVX512BW_UNSET, remove
>   OPTION_MASK_ISA2_AVX512BF16_UNSET and
>   OPTION_MASK_ISA2_AVX512FP16_UNSET.
> ---
>  gcc/common/config/i386/i386-common.cc | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/common/config/i386/i386-common.cc
> b/gcc/common/config/i386/i386-common.cc
> index cb878163492..c0c2ad74d87 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -315,11 +315,10 @@ along with GCC; see the file COPYING3.  If not see
> | OPTION_MASK_ISA_SSE_UNSET)
> 
>  #define OPTION_MASK_ISA2_AVX512F_UNSET \
> -  (OPTION_MASK_ISA2_AVX512BF16_UNSET \
> +  (OPTION_MASK_ISA2_AVX512BW_UNSET \
> | OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
> | OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
> -   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET \
> -   | OPTION_MASK_ISA2_AVX512FP16_UNSET)
> +   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
>  #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
>OPTION_MASK_ISA2_SSE_UNSET
>  #define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
> --
> 2.18.1

Re: [PATCH] testsuite/102690: Only check warning for lp64 in Warray-bounds-16.C

2022-06-29 Thread Kito Cheng

Committed to trunk, thanks :)

Is it OK for gcc-11 and gcc-12 branches?

On Wed, Jun 29, 2022 at 5:00 PM Richard Biener  wrote:
>
> On Tue, 28 Jun 2022, Kito Cheng wrote:
>
> > That warning won't happen on ilp32 targets, seems like Andrew Pinski
> > already mention that[1] before.
> >
> > Verified on riscv32-unknown-elf and riscv64-unknown-elf.
> >
> > [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92879#c1
>
> OK
>
> > gcc/testsuite/ChangeLog:
> >
> >   PR testsuite/102690
> >   * g++.dg/warn/Warray-bounds-16.C: XFAIL only on lp64 for the
> >   warning.
> > ---
> >  gcc/testsuite/g++.dg/warn/Warray-bounds-16.C | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C 
> > b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> > index 89cbadb91c7..45a14b19ea3 100644
> > --- a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> > +++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> > @@ -19,7 +19,7 @@ struct S
> >  p = (int*) new unsigned char [sizeof (int) * m];
> >
> >  for (int i = 0; i < m; i++)
> > -  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-* 
> > } } */
> > +  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail lp64 } 
> > } */
> >}
> >  };
> >
> >
>
> --
> Richard Biener 
> SUSE Software Solutions Germany GmbH, Frankenstra

[PATCH] loongarch: use -mno-check-zero-division as the default for optimized code

2022-06-29 Thread Xi Ruoyao via Gcc-patches

Hi,

We've made a consensus [1] that not to enable trapping for division by
zero by default for LLVM, and we think GCC should behave similarly.

The main rationales:

1. Division by zero is undefined behavior, so in theory any portable
program shall not depend on it.
2. There are already many targets where both the hardware and GCC port
do nothing to trap on division by zero.  A list taken from
gcc.c-torture/execute/20101011-1.c:  PowerPC, RISC-V, ARM64, MSP430, and
many others.  So in practice any portable program cannot depend on this
trap.
3. As an ICPC assistant coach, I'm well aware that the main disadvantage
not to trap on division by zero is "it breaks expectations of newbies".
So, we keep -mcheck-zero-division defaulted for -O0 and -Og.  For other
optimization levels, it's well known that UBs are already breaking
newbies' expectations [2].
4. GCC is going to optimize more heavily exploiting integer division by
zero [3].  So let's stop encouraging people to rely on any integer
division by zero behavior from now.

[1]: https://reviews.llvm.org/D128572/new/#3612039
[2]: http://blog.llvm.org/2011/05/what-every-c-programmer-should-know_14.html
[3]: https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595099.html

Patch content following.  Bootstrapped and regtested on loongarch64-
linux-gnu. Ok for trunk?

-- >8 --

Integer division by zero is undefined behavior anyway, and there are
already many platforms where neither the GCC port and the hardware do
anything to trap on division by zero.  So any portable program shall not
rely on SIGFPE on division by zero, in both theory and practice.  As the
result, there is no real reason to cost two additional instructions just
for the trap on division by zero with a new ISA.

One remaining reason to trap on division by zero may be debugging,
especially while -fsanitize=integer-divide-by-zero is not implemented
for LoongArch yet.  To make debugging easier, keep -mcheck-zero-division
as the default for -O0 and -Og, but use -mno-check-zero-division as the
default for all other optimization levels.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_check_zero_div_p):
New static function.
(loongarch_idiv_insns): Use loongarch_check_zero_div_p instead
of TARGET_CHECK_ZERO_DIV.
(loongarch_output_division): Likewise.
* doc/invoke.texi: Update to match the new behavior.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/20101011-1.c (dg-additional-options):
add -mcheck-zero-division for LoongArch targets.
---
 gcc/config/loongarch/loongarch.cc  | 18 +++---
 gcc/doc/invoke.texi|  3 ++-
 .../gcc.c-torture/execute/20101011-1.c |  1 +
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index c8502b0b0f3..f297083c2e9 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2101,6 +2101,19 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
   return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
 }
 
+/* Return true if we need to trap on division by zero.  */
+
+static bool
+loongarch_check_zero_div_p (void)
+{
+  /* if -m[no-]check-zero-division is given explicitly.  */
+  if (target_flags_explicit & MASK_CHECK_ZERO_DIV)
+return TARGET_CHECK_ZERO_DIV;
+
+  /* if not, don't trap for optimized code except -Og.  */
+  return !optimize || optimize_debug;
+}
+
 /* Return the number of instructions needed for an integer division.  */
 
 int
@@ -2109,7 +2122,7 @@ loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED)
   int count;
 
   count = 1;
-  if (TARGET_CHECK_ZERO_DIV)
+  if (loongarch_check_zero_div_p ())
 count += 2;
 
   return count;
@@ -4050,7 +4063,6 @@ loongarch_do_optimize_block_move_p (void)
   return !optimize_size;
 }
 
-
 /* Expand a QI or HI mode atomic memory operation.
 
GENERATOR contains a pointer to the gen_* function that generates
@@ -5262,7 +5274,7 @@ loongarch_output_division (const char *division, rtx 
*operands)
   const char *s;
 
   s = division;
-  if (TARGET_CHECK_ZERO_DIV)
+  if (loongarch_check_zero_div_p ())
 {
   output_asm_insn (s, operands);
   s = "bne\t%2,%.,1f\n\tbreak\t7\n1:";
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index bde59ff0472..7e2a0b01233 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -24740,7 +24740,8 @@ Set the cost of branches to roughly @var{n} 
instructions.
 @itemx -mno-check-zero-divison
 @opindex -mcheck-zero-division
 Trap (do not trap) on integer division by zero.  The default is
-@option{-mcheck-zero-division}.
+@option{-mcheck-zero-division} for @option{-O0} or @option{-Og}, and
+@option{-mno-check-zero-division} for other optimization levels.
 
 @item -mcond-move-int
 @itemx -mno-cond-move-int
diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c 
b/gcc/testsuite/gcc.c-torture/execute/201010

[PATCH] libsanitizer: don't enable for MIPS Linux without GNU libc [PR 106136]

2022-06-29 Thread Xi Ruoyao via Gcc-patches

In libsanitizer code, the size of some GNU libc data structure (notably,
struct stat) is hard coded.  These sizes may trigger a static assert
buidling against another libc.

Just make non-GNU libc targets UNSUPPORTED now.  If someone really cares
about those alternative libc implementations, please submit patch to
LLVM project adding the support.

libsanitizer/ChangeLog

PR sanitizer/106136
* configure.tgt: Change mips*-*-linux* to mips*-*-linux-gnu*
because it fails to build with non-GNU libc.
---
 libsanitizer/configure.tgt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libsanitizer/configure.tgt b/libsanitizer/configure.tgt
index fb89df4935c..54b74b60e2f 100644
--- a/libsanitizer/configure.tgt
+++ b/libsanitizer/configure.tgt
@@ -54,7 +54,7 @@ case "${target}" in
;;
   arm*-*-linux*)
;;
-  mips*-*-linux*)
+  mips*-*-linux-gnu*)
;;
   aarch64*-*-linux*)
if test x$ac_cv_sizeof_void_p = x8; then
-- 
2.37.0

Re: [COMMITTED] PR tree-optimization/106114 - Don't use gori dependencies to optimize.

2022-06-29 Thread Richard Biener via Gcc-patches




> Am 30.06.2022 um 03:43 schrieb Andrew MacLeod via Gcc-patches 
> :
> 
> The routine which tried to fold and's and or's using relations was using the 
> dependency cache as a shortcut to determine if there were 2 ssa names on the 
> feeding expressions, and assuming that was correct.
> 
> ie
> 
>   _16 = a.0_1 < -117;
>   _17 = a.0_1 >= -83;
>   _18 = _16 | _17;
> 
> the dependency cache indicates that a.0_1 is "ssa1" dependency for _16 and 
> also for _17.  we dont have to scan the statement, so temporal out of date 
> info is very quick.
> 
> Its also not meant to reflect that actual statement.. ie, it can get out of 
> date.  Not is a way that makes anything incorrect, but in a way that may 
> possibly result in a  either a missed opportunity or slightly more work when 
> statements are being rewritten on the fly..  ie  DOM rewrites that to:
> 
>   _16 = a.1_15 < -117;
>   _17 = a.1_15 >= -83;
>   _18 = _16 | _17;
> 
> When fold_using_range is later invoked, a.1_15 is added a dependency to _16 
> and _17,  not attempting to understand that its a replacement, we simply now 
> think that both a.0_1 and a.1_15 are dependencies.  so if either one becomes 
> out of date, then ranger will recalculate _16 and/or _17
> 
> fold_using_range::relation_fold_and_or was using thet dependency cache as if 
> it represent the operands of the statement accurately... so after the DOM 
> rewrite, it thought that there were 2 operands on the _16 and _17 expression, 
> the 2 dependencies in the cache, misconstruing it as
> 
>   _16 = a.0_1_ < a.1_15;
>   _17 = a.0_1 >= a.1_15;
>   _18 = _16 | _17;
> 
> Thus it thought is could fold it away.
> 
> 
> The dependency cache shortcut should NOT be used for optimizations.   THis 
> patch correct the problem, and simply looks at the 2 operands of the feeding 
> instructions.
> 
> bootstrapped on build-x86_64-pc-linux-gnu with no regressions. Pushed.
> 
> This is less likely to occur in GCC12 since there is less IL change on the 
> fly, but it should be safe to make this change just in case.  OK for GCC12?

Ok for gcc12

Richard 


> Andrew
> 
> PS. and yes, it fixes the other 2 testcases as well.
> <0001-Don-t-use-gori-depedencies-to-optimize.patch>

Re: lto: Fix option merging [PR106129]

2022-06-29 Thread Richard Biener via Gcc-patches




> Am 29.06.2022 um 22:44 schrieb Joseph Myers :
> 
> The LTO merging of options from different input files was broken by:
> 
> commit 227a2ecf663d69972b851f51f1934d18927b62cd
> Author: Martin Liska 
> Date:   Fri Mar 12 11:53:47 2021 +0100
> 
>lto-wrapper: Use vec data type.
> 
> Previously, find_and_merge_options would merge options it read into
> those in *opts. After this commit, options in *opts on entry to
> find_and_merge_options are ignored; the only merging that takes place
> is between multiple sets of options in the same input file that are
> read in the same call to this function (not sure how that case can
> occur at all). The effects include, for example, that if some objects
> are built with PIC enabled and others with it disabled, and the last
> LTO object processed has PIC enabled, the choice of PIC for the last
> object will result in the whole program being built as PIC, when the
> merging logic is intended to ensure that a mixture of PIC and non-PIC
> objects results in the whole program being built as non-PIC.
> 
> Fix this with an extra argument to find_and_merge_options to determine
> whether merging should take place.  This shows up a second issue with
> that commit (which I think wasn't actually intended to change code
> semantics at all): once merging is enabled again, the check for
> -Xassembler options became an infinite loop in the case where both
> inputs had -Xassembler options, with the same first option, so fix
> that loop to restore the previous semantics.
> 
> Note that I'm not sure how LTO option merging might be tested in the
> testsuite (clearly there wasn't sufficient, if any, coverage to detect
> these bugs).
> 
> Bootstrapped with no regressions for x86_64-pc-linux-gnu.  OK to
> commit?
> 

Ok.

Thanks,
Richard 


>PR lto/106129
>* lto-wrapper.cc (find_option): Add argument start.
>(merge_and_complain): Loop over existing_opt_index and
>existing_opt2_index for Xassembler check.  Update calls to
>find_option.
>(find_and_merge_options): Add argument first to determine whether
>to merge options with those passed in *opts.
>(run_gcc): Update calls to find_and_merge_options.
> 
> diff --git a/gcc/lto-wrapper.cc b/gcc/lto-wrapper.cc
> index 26e06e7..795ab74 100644
> --- a/gcc/lto-wrapper.cc
> +++ b/gcc/lto-wrapper.cc
> @@ -170,13 +170,14 @@ get_options_from_collect_gcc_options (const char 
> *collect_gcc,
>   return decoded;
> }
> 
> -/* Find option in OPTIONS based on OPT_INDEX.  -1 value is returned
> -   if the option is not present.  */
> +/* Find option in OPTIONS based on OPT_INDEX, starting at START.  -1
> +   value is returned if the option is not present.  */
> 
> static int
> -find_option (vec &options, size_t opt_index)
> +find_option (vec &options, size_t opt_index,
> + unsigned start = 0)
> {
> -  for (unsigned i = 0; i < options.length (); ++i)
> +  for (unsigned i = start; i < options.length (); ++i)
> if (options[i].opt_index == opt_index)
>   return i;
> 
> @@ -575,13 +576,16 @@ merge_and_complain (vec 
> &decoded_options,
>else
>  j++;
> 
> +  int existing_opt_index, existing_opt2_index;
>   if (!xassembler_options_error)
> -for (i = j = 0; ; i++, j++)
> +for (existing_opt_index = existing_opt2_index = 0; ;
> + existing_opt_index++, existing_opt2_index++)
>   {
> -int existing_opt_index
> -  = find_option (decoded_options, OPT_Xassembler);
> -int existing_opt2_index
> -  = find_option (fdecoded_options, OPT_Xassembler);
> +existing_opt_index
> +  = find_option (decoded_options, OPT_Xassembler, existing_opt_index);
> +existing_opt2_index
> +  = find_option (fdecoded_options, OPT_Xassembler,
> + existing_opt2_index);
> 
>cl_decoded_option *existing_opt = NULL;
>cl_decoded_option *existing_opt2 = NULL;
> @@ -1100,7 +1104,7 @@ find_crtoffloadtable (int save_temps, const char 
> *dumppfx)
> 
> static bool
> find_and_merge_options (int fd, off_t file_offset, const char *prefix,
> -vec decoded_cl_options,
> +vec decoded_cl_options, bool first,
>vec *opts, const char *collect_gcc)
> {
>   off_t offset, length;
> @@ -1110,6 +1114,9 @@ find_and_merge_options (int fd, off_t file_offset, 
> const char *prefix,
>   int err;
>   vec fdecoded_options;
> 
> +  if (!first)
> +fdecoded_options = *opts;
> +
>   simple_object_read *sobj;
>   sobj = simple_object_start_read (fd, file_offset, "__GNU_LTO",
>   &errmsg, &err);
> @@ -1130,7 +1137,6 @@ find_and_merge_options (int fd, off_t file_offset, 
> const char *prefix,
>   data = (char *)xmalloc (length);
>   read (fd, data, length);
>   fopts = data;
> -  bool first = true;
>   do
> {
>   vec f2decoded_options
> @@ -1417,8 +1423,10 @@ run_gcc (unsigned argc, char *argv[])
>   int auto_parallel = 0;
>   bool no_partition = false;
>   const char *jobserver_error = NULL;
> +  bool fdecoded_options_first = true;
>   vec fdecoded_option

[PATCH] i386: Extend cvtps2pd to memory

2022-06-29 Thread Haochen Jiang via Gcc-patches

Hi all,

This patch aims to fix the cvtps2pd insn, which should also work on
memory operand but currently does not. After this fix, when loop == 2,
it will eliminate movq instruction.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/43618
* config/i386/sse.md (extendv2sfv2df2): New define_expand.
(sse2_cvtps2pd_load): Rename extendvsdfv2df2.

gcc/testsuite/ChangeLog:

PR target/43618
* gcc.target/i386/pr43618-1.c: New test.
---
 gcc/config/i386/sse.md| 24 ++-
 gcc/testsuite/gcc.target/i386/pr43618-1.c | 13 
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr43618-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b2602bfa79..f96bb3dc6c3 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9175,11 +9175,25 @@
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_expand "extendv2sfv2df2"
+  [(set (match_operand:V2DF 0 "register_operand")
+(float_extend:V2DF
+  (match_operand:V2SF 1 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  if (!MEM_P (operands[1]))
+{
+  operands[1] = lowpart_subreg (V4SFmode, operands[1], V2SFmode);
+  emit_insn (gen_sse2_cvtps2pd (operands[0], operands[1]));
+  DONE;
+}
+})
+
 (define_insn "sse2_cvtps2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
  (vec_select:V2SF
-   (match_operand:V4SF 1 "vector_operand" "vm")
+   (match_operand:V4SF 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)]]
   "TARGET_SSE2 && "
   "%vcvtps2pd\t{%1, %0|%0, %q1}"
@@ -9191,12 +9205,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
 
-(define_insn "extendv2sfv2df2"
+(define_insn "sse2_cvtps2pd_load"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
- (match_operand:V2SF 1 "register_operand" "v")))]
-  "TARGET_MMX_WITH_SSE"
-  "%vcvtps2pd\t{%1, %0|%0, %1}"
+ (match_operand:V2SF 1 "memory_operand" "m")))]
+  "TARGET_MMX_WITH_SSE && "
+  "%vcvtps2pd\t{%1, %0|%0, %q1}"
   [(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "direct")
(set_attr "athlon_decode" "double")
diff --git a/gcc/testsuite/gcc.target/i386/pr43618-1.c 
b/gcc/testsuite/gcc.target/i386/pr43618-1.c
new file mode 100644
index 000..3c84ea444aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr43618-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movq" } } */
+/* { dg-final { scan-assembler "cvtps2pd" } } */
+
+void
+foo (float a[2], double b[2])
+{
+int i;
+for (i = 0; i < 2; i++)
+  b[i] = a[i];
+}
+
-- 
2.18.1

Re: [PATCH] i386: Extend cvtps2pd to memory

2022-06-29 Thread Uros Bizjak via Gcc-patches

On Thu, Jun 30, 2022 at 7:59 AM Haochen Jiang  wrote:
>
> Hi all,
>
> This patch aims to fix the cvtps2pd insn, which should also work on
> memory operand but currently does not. After this fix, when loop == 2,
> it will eliminate movq instruction.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> PR target/43618
> * config/i386/sse.md (extendv2sfv2df2): New define_expand.
> (sse2_cvtps2pd_load): Rename extendvsdfv2df2.
>
> gcc/testsuite/ChangeLog:
>
> PR target/43618
> * gcc.target/i386/pr43618-1.c: New test.

This patch could be as simple as:

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8cd0f617bf3..c331445cb2d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9195,7 +9195,7 @@
(define_insn "extendv2sfv2df2"
  [(set (match_operand:V2DF 0 "register_operand" "=v")
   (float_extend:V2DF
- (match_operand:V2SF 1 "register_operand" "v")))]
+ (match_operand:V2SF 1 "nonimmediate_operand" "vm")))]
  "TARGET_MMX_WITH_SSE"
  "%vcvtps2pd\t{%1, %0|%0, %1}"
  [(set_attr "type" "ssecvt")

Uros.

Re: [PATCH 3/3] lto-plugin: implement LDPT_GET_API_VERSION

2022-06-29 Thread Rui Ueyama via Gcc-patches

Thanks Martin for creating this patch.

Here is a preliminary change for the mold side:
https://github.com/rui314/mold/commit/9ad49d1c556bc963d06cca8233535183490de605

Overall the API is looking fine, though it is not clear what kind of value
is expected as a linker version. A linker version is not a single unsigned
integer but something like "1.3.0". Something like "1.3.0-rc2" can also be
a linker version. So I don't think we can represent a linker version as a
single integer.

On Mon, Jun 20, 2022 at 9:01 PM Martin Liška  wrote:

> On 6/20/22 11:35, Richard Biener wrote:
> > I think this is OK.  Can we get buy-in from mold people?
>
> Sure, I've just pinged Rui:
> https://github.com/rui314/mold/issues/454#issuecomment-1160419030
>
> Martin
>

Re: [PATCH 1/2]AArch64 Add fallback case using sdot for usdot

2022-06-29 Thread Richard Biener via Gcc-patches

On Wed, Jun 29, 2022 at 4:35 PM Richard Sandiford
 wrote:
>
> Richard Biener  writes:
> > On Tue, Jun 28, 2022 at 5:54 PM Tamar Christina  
> > wrote:
> >>
> >> > -Original Message-
> >> > From: Richard Biener 
> >> > Sent: Monday, June 27, 2022 7:10 AM
> >> > To: Tamar Christina 
> >> > Cc: Richard Sandiford ; Richard Earnshaw
> >> > ; nd ; gcc-
> >> > patc...@gcc.gnu.org; Marcus Shawcroft 
> >> > Subject: Re: [PATCH 1/2]AArch64 Add fallback case using sdot for usdot
> >> >
> >> > On Mon, Jun 27, 2022 at 7:25 AM Tamar Christina via Gcc-patches  >> > patc...@gcc.gnu.org> wrote:
> >> > >
> >> > > > -Original Message-
> >> > > > From: Richard Sandiford 
> >> > > > Sent: Thursday, June 16, 2022 7:54 PM
> >> > > > To: Tamar Christina 
> >> > > > Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
> >> > > > ; Marcus Shawcroft
> >> > > > ; Kyrylo Tkachov
> >> > 
> >> > > > Subject: Re: [PATCH 1/2]AArch64 Add fallback case using sdot for
> >> > > > usdot
> >> > > >
> >> > > > Richard Sandiford via Gcc-patches  writes:
> >> > > > > Tamar Christina  writes:
> >> > > > >> Hi All,
> >> > > > >>
> >> > > > >> The usdot operation is common in video encoder and decoders
> >> > > > >> including some of the most widely used ones.
> >> > > > >>
> >> > > > >> This patch adds a +dotprod version of the optab as a fallback for
> >> > > > >> when you do have sdot but not usdot available.
> >> > > > >>
> >> > > > >> The fallback works by adding a bias to the unsigned argument to
> >> > > > >> convert it to a signed value and then correcting for the bias 
> >> > > > >> later on.
> >> > > > >>
> >> > > > >> Essentially it relies on (x - 128)y + 128y == xy where x is
> >> > > > >> unsigned and y is signed (assuming both are 8-bit values).
> >> > > > >> Because the range of a signed byte is only to 127 we split the 
> >> > > > >> bias
> >> > correction into:
> >> > > > >>
> >> > > > >>(x - 128)y + 127y + y
> >> > > > >
> >> > > > > I bet you knew this question was coming, but: this technique isn't
> >> > > > > target-specific, so wouldn't it be better to handle it in
> >> > > > > tree-vect-patterns.cc instead?
> >> > >
> >> > > Ok, so after many hours of trying I don't know how to make this work.
> >> > > DOT_PROD_EXPR is a reduction, but emitting them as additional pattern
> >> > > statement doesn't work because they'll be marked as internal_def
> >> > > rather than reduction_def.  I tried marking the new vec_stmt_info that
> >> > > I create explicitly as reduction_def but this gets overwritten during 
> >> > > analysis.
> >> > >
> >> > > I then looked into getting it as a vectorizable_operation but has this
> >> > > obvious problems In that it no longer treats it as a reduction and so 
> >> > > tries to
> >> > decompose into hi/lo.
> >> > >
> >> > > I then looked into treating additional patterns from  a reduction as
> >> > > reductions themselves but this is obviously wrong as non-reduction
> >> > statements also get marked as reductions.
> >> > >
> >> > > The conclusion is that I don't think the vectorizer allows additional
> >> > > reductions to be emitted from patterns.
> >> >
> >> > Indeed.  DOT_PROD is a weird beast and it doesn't define which lanes are
> >> > reduced to which so it's only usable when the result is reduced to a 
> >> > single
> >> > lane.
> >> >
> >> > An SLP pattern might work if you use reduc-plus for the reduced lanes and
> >> > keep the multiply separate?
> >>
> >> Unfortunately I can't seem to get it to handle the reduction in SLP.  It 
> >> seems to always
> >> use the non-SLP aware loop vectorizer here.  The suggested unroll factor 
> >> is always 1 and
> >> even trying to force it gets it to bail out later, presumable because it's 
> >> reducing into a
> >> scalar that's used outside the loop?
> >
> > Yes, it possibly needs 1-lane SLP support.
>
> As I mentioned to Tamar off-list, I feel like I've been wasting
> people's time recently by spewing out ideas that might or might not work
> (usually "not work"), so I wanted to get some confidence that the next
> suggestion made sense.  In the end I needed most of an implementation
> to do that, so it seemed easiest just to finish it off rather than post
> it in a half-complete state.  Sorry for the duplication. :-(
>
> The patch certainly isn't pretty, but I think it's the best we can
> do under the current infrastructure, and it should at least make
> the costs reasonably accurate.  (Actually, that said, we probably
> need to patch the reduction latency calculation in the aarch64
> vector code -- didn't think of that until now.)
>
> Tested on aarch64-linux-gnu and x64_64-linux-gnu.  WDYT?

Looks reasonable - does this end up in OKish code generation as well?

Thanks,
Richard.

> Thanks,
> Richard
>
> 
>
> Following a suggestion from Tamar, this patch adds a fallback
> implementation of usdot using sdot.  Specifically, for 8-bit
> input types:
>
>acc_2 = DOT_PROD_EXPR ;
>
> becomes:
>
>tmp_1 = DOT_PROD_EXPR <64,

Re: [PATCH] testsuite/102690: Only check warning for lp64 in Warray-bounds-16.C

2022-06-29 Thread Richard Biener via Gcc-patches

On Thu, Jun 30, 2022 at 4:17 AM Kito Cheng  wrote:
>
> Committed to trunk, thanks :)
>
> Is it OK for gcc-11 and gcc-12 branches?

Yes, if the same failure is corrected there

> On Wed, Jun 29, 2022 at 5:00 PM Richard Biener  wrote:
> >
> > On Tue, 28 Jun 2022, Kito Cheng wrote:
> >
> > > That warning won't happen on ilp32 targets, seems like Andrew Pinski
> > > already mention that[1] before.
> > >
> > > Verified on riscv32-unknown-elf and riscv64-unknown-elf.
> > >
> > > [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92879#c1
> >
> > OK
> >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   PR testsuite/102690
> > >   * g++.dg/warn/Warray-bounds-16.C: XFAIL only on lp64 for the
> > >   warning.
> > > ---
> > >  gcc/testsuite/g++.dg/warn/Warray-bounds-16.C | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C 
> > > b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> > > index 89cbadb91c7..45a14b19ea3 100644
> > > --- a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> > > +++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> > > @@ -19,7 +19,7 @@ struct S
> > >  p = (int*) new unsigned char [sizeof (int) * m];
> > >
> > >  for (int i = 0; i < m; i++)
> > > -  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail 
> > > *-*-* } } */
> > > +  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail lp64 
> > > } } */
> > >}
> > >  };
> > >
> > >
> >
> > --
> > Richard Biener 
> > SUSE Software Solutions Germany GmbH, Frankenstra

Re: [PATCH] loongarch: use -mno-check-zero-division as the default for optimized code

2022-06-29 Thread Richard Biener via Gcc-patches

On Thu, Jun 30, 2022 at 5:00 AM Xi Ruoyao via Gcc-patches
 wrote:
>
> Hi,
>
> We've made a consensus [1] that not to enable trapping for division by
> zero by default for LLVM, and we think GCC should behave similarly.
>
> The main rationales:
>
> 1. Division by zero is undefined behavior, so in theory any portable
> program shall not depend on it.
> 2. There are already many targets where both the hardware and GCC port
> do nothing to trap on division by zero.  A list taken from
> gcc.c-torture/execute/20101011-1.c:  PowerPC, RISC-V, ARM64, MSP430, and
> many others.  So in practice any portable program cannot depend on this
> trap.
> 3. As an ICPC assistant coach, I'm well aware that the main disadvantage
> not to trap on division by zero is "it breaks expectations of newbies".
> So, we keep -mcheck-zero-division defaulted for -O0 and -Og.  For other
> optimization levels, it's well known that UBs are already breaking
> newbies' expectations [2].
> 4. GCC is going to optimize more heavily exploiting integer division by
> zero [3].  So let's stop encouraging people to rely on any integer
> division by zero behavior from now.
>
> [1]: https://reviews.llvm.org/D128572/new/#3612039
> [2]: http://blog.llvm.org/2011/05/what-every-c-programmer-should-know_14.html
> [3]: https://gcc.gnu.org/pipermail/gcc-patches/2022-May/595099.html
>
> Patch content following.  Bootstrapped and regtested on loongarch64-
> linux-gnu. Ok for trunk?

It might be worth backporting this behavioral change to the GCC 12 branch
as well (so 12.1 is the only release with different default behavior) and
documenting the change in changes.html

> -- >8 --
>
> Integer division by zero is undefined behavior anyway, and there are
> already many platforms where neither the GCC port and the hardware do
> anything to trap on division by zero.  So any portable program shall not
> rely on SIGFPE on division by zero, in both theory and practice.  As the
> result, there is no real reason to cost two additional instructions just
> for the trap on division by zero with a new ISA.
>
> One remaining reason to trap on division by zero may be debugging,
> especially while -fsanitize=integer-divide-by-zero is not implemented
> for LoongArch yet.  To make debugging easier, keep -mcheck-zero-division
> as the default for -O0 and -Og, but use -mno-check-zero-division as the
> default for all other optimization levels.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.cc (loongarch_check_zero_div_p):
> New static function.
> (loongarch_idiv_insns): Use loongarch_check_zero_div_p instead
> of TARGET_CHECK_ZERO_DIV.
> (loongarch_output_division): Likewise.
> * doc/invoke.texi: Update to match the new behavior.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/20101011-1.c (dg-additional-options):
> add -mcheck-zero-division for LoongArch targets.
> ---
>  gcc/config/loongarch/loongarch.cc  | 18 +++---
>  gcc/doc/invoke.texi|  3 ++-
>  .../gcc.c-torture/execute/20101011-1.c |  1 +
>  3 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/loongarch/loongarch.cc 
> b/gcc/config/loongarch/loongarch.cc
> index c8502b0b0f3..f297083c2e9 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -2101,6 +2101,19 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
>return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
>  }
>
> +/* Return true if we need to trap on division by zero.  */
> +
> +static bool
> +loongarch_check_zero_div_p (void)
> +{
> +  /* if -m[no-]check-zero-division is given explicitly.  */
> +  if (target_flags_explicit & MASK_CHECK_ZERO_DIV)
> +return TARGET_CHECK_ZERO_DIV;
> +
> +  /* if not, don't trap for optimized code except -Og.  */
> +  return !optimize || optimize_debug;
> +}
> +
>  /* Return the number of instructions needed for an integer division.  */
>
>  int
> @@ -2109,7 +2122,7 @@ loongarch_idiv_insns (machine_mode mode 
> ATTRIBUTE_UNUSED)
>int count;
>
>count = 1;
> -  if (TARGET_CHECK_ZERO_DIV)
> +  if (loongarch_check_zero_div_p ())
>  count += 2;
>
>return count;
> @@ -4050,7 +4063,6 @@ loongarch_do_optimize_block_move_p (void)
>return !optimize_size;
>  }
>
> -
>  /* Expand a QI or HI mode atomic memory operation.
>
> GENERATOR contains a pointer to the gen_* function that generates
> @@ -5262,7 +5274,7 @@ loongarch_output_division (const char *division, rtx 
> *operands)
>const char *s;
>
>s = division;
> -  if (TARGET_CHECK_ZERO_DIV)
> +  if (loongarch_check_zero_div_p ())
>  {
>output_asm_insn (s, operands);
>s = "bne\t%2,%.,1f\n\tbreak\t7\n1:";
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index bde59ff0472..7e2a0b01233 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -24740,7 +24740,8 @@ Set the cost of branches to roughly @var{n}

60 matches

Mail list logo