[PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

2022-05-06 Thread Haochen Jiang via Gcc-patches
Hi all,

There are some check files in i386 testsuite are written before the function 
__builtin_cpu_supports is introduced. All of them are using __get_cpuid_count. 
This patch aims to reconstruct the i386 testsuite with __builtin_cpu_supports 
so that we can have a much clearer code.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

Also when writting this patch, I also find some files in testsuite that might 
be useless currently. For example, in the file 
gcc/testsuite/gcc.target/i386/sse-os-support.h, it always return 1. And there 
are also some files will no longer be included at all with this patch. Should 
we remove those files when we have time?

BRs,
Haochen

gcc/testsuite/ChangeLog:

* gcc.target/i386/adx-check.h: Change bit check to
__builtin_cpu_supports.
* gcc.target/i386/aes-avx-check.h: Ditto.
* gcc.target/i386/aes-check.h: Ditto.
* gcc.target/i386/avx-check.h: Ditto.
* gcc.target/i386/avx2-check.h: Ditto.
* gcc.target/i386/avx512-check.h: Ditto.
* gcc.target/i386/bmi-check.h: Ditto.
* gcc.target/i386/bmi2-check.h: Ditto.
* gcc.target/i386/f16c-check.h: Ditto.
* gcc.target/i386/fma-check.h: Ditto.
* gcc.target/i386/fma4-check.h: Ditto.
* gcc.target/i386/lzcnt-check.h: Ditto.
* gcc.target/i386/mmx-3dnow-check.h: Ditto.
* gcc.target/i386/mmx-check.h: Ditto.
* gcc.target/i386/pclmul-avx-check.h: Ditto.
* gcc.target/i386/pclmul-check.h: Ditto.
* gcc.target/i386/rtm-check.h: Ditto.
* gcc.target/i386/sha-check.h: Ditto.
* gcc.target/i386/sse-check.h: Ditto.
* gcc.target/i386/sse2-check.h: Ditto.
* gcc.target/i386/sse3-check.h: Ditto.
* gcc.target/i386/sse4_1-check.h: Ditto.
* gcc.target/i386/sse4_2-check.h: Ditto.
* gcc.target/i386/sse4a-check.h: Ditto.
* gcc.target/i386/ssse3-check.h: Ditto.
* gcc.target/i386/xop-check.h: Ditto.
---
 gcc/testsuite/gcc.target/i386/adx-check.h | 10 +---
 gcc/testsuite/gcc.target/i386/aes-avx-check.h | 14 +
 gcc/testsuite/gcc.target/i386/aes-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/avx-check.h | 12 +---
 gcc/testsuite/gcc.target/i386/avx2-check.h| 20 +--
 gcc/testsuite/gcc.target/i386/avx512-check.h  | 59 +++
 gcc/testsuite/gcc.target/i386/bmi-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/bmi2-check.h| 10 +---
 gcc/testsuite/gcc.target/i386/f16c-check.h| 10 +---
 gcc/testsuite/gcc.target/i386/fma-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/fma4-check.h| 11 +---
 gcc/testsuite/gcc.target/i386/lzcnt-check.h   | 11 +---
 .../gcc.target/i386/mmx-3dnow-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/mmx-check.h | 11 +---
 .../gcc.target/i386/pclmul-avx-check.h| 14 +
 gcc/testsuite/gcc.target/i386/pclmul-check.h  | 11 +---
 gcc/testsuite/gcc.target/i386/rtm-check.h | 10 +---
 gcc/testsuite/gcc.target/i386/sha-check.h | 10 +---
 gcc/testsuite/gcc.target/i386/sse-check.h | 11 +---
 gcc/testsuite/gcc.target/i386/sse2-check.h| 11 +---
 gcc/testsuite/gcc.target/i386/sse3-check.h| 11 +---
 gcc/testsuite/gcc.target/i386/sse4_1-check.h  | 11 +---
 gcc/testsuite/gcc.target/i386/sse4_2-check.h  | 11 +---
 gcc/testsuite/gcc.target/i386/sse4a-check.h   | 11 +---
 gcc/testsuite/gcc.target/i386/ssse3-check.h   | 11 +---
 gcc/testsuite/gcc.target/i386/xop-check.h | 11 +---
 26 files changed, 73 insertions(+), 272 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/adx-check.h 
b/gcc/testsuite/gcc.target/i386/adx-check.h
index cfed1a38483..bed5dcca385 100644
--- a/gcc/testsuite/gcc.target/i386/adx-check.h
+++ b/gcc/testsuite/gcc.target/i386/adx-check.h
@@ -1,5 +1,4 @@
 #include 
-#include "cpuid.h"
 
 static void adx_test (void);
 
@@ -11,13 +10,8 @@ static void __attribute__ ((noinline)) do_test (void)
 int
 main ()
 {
-  unsigned int eax, ebx, ecx, edx;
-
-  if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
-return 0;
-
-  /* Run ADX test only if host has ADX support.  */
-  if (ebx & bit_ADX)
+  /* Check cpu support for ADX.  */
+  if (__builtin_cpu_supports ("adx"))
 {
   do_test ();
 #ifdef DEBUG
diff --git a/gcc/testsuite/gcc.target/i386/aes-avx-check.h 
b/gcc/testsuite/gcc.target/i386/aes-avx-check.h
index f2a4ead4014..74bf597ead4 100644
--- a/gcc/testsuite/gcc.target/i386/aes-avx-check.h
+++ b/gcc/testsuite/gcc.target/i386/aes-avx-check.h
@@ -2,8 +2,6 @@
 #include 
 #endif
 #include 
-#include "cpuid.h"
-#include "avx-os-support.h"
 
 static void aes_avx_test (void);
 
@@ -17,15 +15,9 @@ do_test (void)
 int
 main ()
 {
-  unsigned int eax, ebx, ecx, edx;
- 
-  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
-return 0;
-
-  /* Run AES + AVX test only if host has AES + AVX support.  */
-  if (((ecx & (bit_AVX | bit_OSXSAVE | bit_AES))
-   == (bit_AVX | bit_OSXSAVE | bit_AES))
-  && avx_os_support ())
+  /* Check cp

[PATCH] [i386]Add combine splitter to transform pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.

2022-05-06 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch aims to add a combine splitter to transform 
pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

PR target/104371
* config/i386/sse.md: Add new define_mode_attr and define_split.

gcc/testsuite/ChangeLog:

PR target/104371
* gcc.target/i386/pr104371-1.c: New test.
* gcc.target/i386/pr104371-2.c: Ditto.
---
 gcc/config/i386/sse.md | 19 +++
 gcc/testsuite/gcc.target/i386/pr104371-1.c | 14 ++
 gcc/testsuite/gcc.target/i386/pr104371-2.c | 14 ++
 3 files changed, 47 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104371-1.c
 create mode 100755 gcc/testsuite/gcc.target/i386/pr104371-2.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7b791def542..71afda73c8f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20083,6 +20083,25 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
 
+;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
+(define_mode_attr vi1avx2const
+  [(V32QI "0x") (V16QI "0x")])
+
+(define_split
+  [(set (reg:CCZ FLAGS_REG)
+   (compare:CCZ (unspec:SI
+   [(eq:VI1_AVX2
+   (match_operand:VI1_AVX2 0 "vector_operand")
+   (match_operand:VI1_AVX2 1 "const0_operand"))]
+   UNSPEC_MOVMSK)
+(match_operand 2 "const_int_operand")))]
+  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)
+  && (INTVAL (operands[2]) == (int) ())"
+  [(set (reg:CC FLAGS_REG)
+   (unspec:CC [(match_dup 0)
+   (match_dup 0)]
+  UNSPEC_PTEST))])
+
 (define_expand "sse2_maskmovdqu"
   [(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr104371-1.c 
b/gcc/testsuite/gcc.target/i386/pr104371-1.c
new file mode 100644
index 000..df7c0b074e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104371-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+/* { dg-final { scan-assembler "ptest\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "pxor\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "pcmpeqb\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "pmovmskb\[ \\t\]" } } */
+
+#include 
+#include 
+
+bool is_zero(__m128i x)
+{
+  return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr104371-2.c 
b/gcc/testsuite/gcc.target/i386/pr104371-2.c
new file mode 100755
index 000..f0d0afd5897
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104371-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler "vptest\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "vpxor\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "vpcmpeqb\[ \\t\]" } } */
+/* { dg-final { scan-assembler-not "vpmovmskb\[ \\t\]" } } */
+
+#include 
+#include 
+
+bool is_zero256(__m256i x)
+{
+  return _mm256_movemask_epi8(_mm256_cmpeq_epi8(x, _mm256_setzero_si256())) == 
0x;
+}
-- 
2.18.1



Re: [PATCH 11/12] aarch64: Make bti pass generic so it can be used by the arm backend

2022-05-06 Thread Richard Sandiford via Gcc-patches
Andrea Corallo via Gcc-patches  writes:
> Hi all,
>
> this patch splits and restructures the aarch64 bti pass code in order
> to have it usable by the arm backend as well.  These changes have no
> functional impact.
>
> Best Regards
>
>   Andrea
>
> gcc/Changelog
>
>   * config.gcc (aarch64*-*-*): Rename 'aarch64-bti-insert.o' into
>   'aarch-bti-insert.o'.
>   * config/aarch64/aarch64-protos.h: Remove 'aarch64_bti_enabled'
>   proto.
>   * config/aarch64/aarch64.cc (aarch_bti_enabled): Rename.
>   (aarch_bti_j_insn_p, aarch_pac_insn_p): New functions.
>   (aarch64_output_mi_thunk)
>   (aarch64_print_patchable_function_entry)
>   (aarch64_file_end_indicate_exec_stack): Update renamed function
>   calls to renamed functions.
>   * config/aarch64/t-aarch64 (aarch-bti-insert.o): Update target.
>   * config/arm/aarch-bti-insert.cc: New file including and
>   generalizing code from aarch64-bti-insert.cc.
>   * config/arm/aarch-common-protos.h: Update.
>   * config/arm/arm-passes.def: New file.

Looks good to me, thanks.

Richard

> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 7b58e1314ff..2021bdf9d2f 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -329,7 +329,7 @@ aarch64*-*-*)
>   c_target_objs="aarch64-c.o"
>   cxx_target_objs="aarch64-c.o"
>   d_target_objs="aarch64-d.o"
> - extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o 
> aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o 
> aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o 
> falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o"
> + extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o 
> aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o 
> aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o 
> falkor-tag-collision-avoidance.o aarch-bti-insert.o aarch64-cc-fusion.o"
>   target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.cc 
> \$(srcdir)/config/aarch64/aarch64-sve-builtins.h 
> \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc"
>   target_has_targetm_common=yes
>   ;;
> diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
> index b0c5a4fd6b6..a9aad3abdc2 100644
> --- a/gcc/config/aarch64/aarch64-c.cc
> +++ b/gcc/config/aarch64/aarch64-c.cc
> @@ -179,7 +179,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
>aarch64_def_or_undef (TARGET_RNG, "__ARM_FEATURE_RNG", pfile);
>aarch64_def_or_undef (TARGET_MEMTAG, "__ARM_FEATURE_MEMORY_TAGGING", 
> pfile);
>  
> -  aarch64_def_or_undef (aarch64_bti_enabled (),
> +  aarch64_def_or_undef (aarch_bti_enabled (),
>   "__ARM_FEATURE_BTI_DEFAULT", pfile);
>  
>cpp_undef (pfile, "__ARM_FEATURE_PAC_DEFAULT");
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index fe2180e95ea..9fdf7f9cc9c 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -891,7 +891,6 @@ void aarch64_register_pragmas (void);
>  void aarch64_relayout_simd_types (void);
>  void aarch64_reset_previous_fndecl (void);
>  bool aarch64_return_address_signing_enabled (void);
> -bool aarch64_bti_enabled (void);
>  void aarch64_save_restore_target_globals (tree);
>  void aarch64_addti_scratch_regs (rtx, rtx, rtx *,
>rtx *, rtx *,
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index eec743024c1..2f67f3872f6 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -8534,11 +8534,61 @@ aarch64_return_address_signing_enabled (void)
>  
>  /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
>  bool
> -aarch64_bti_enabled (void)
> +aarch_bti_enabled (void)
>  {
>return (aarch_enable_bti == 1);
>  }
>  
> +/* Check if INSN is a BTI J insn.  */
> +bool
> +aarch_bti_j_insn_p (rtx_insn *insn)
> +{
> +  if (!insn || !INSN_P (insn))
> +return false;
> +
> +  rtx pat = PATTERN (insn);
> +  return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
> +}
> +
> +/* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
> +bool
> +aarch_pac_insn_p (rtx x)
> +{
> +  if (!INSN_P (x))
> +return false;
> +
> +  subrtx_var_iterator::array_type array;
> +  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (x), ALL)
> +{
> +  rtx sub = *iter;
> +  if (sub && GET_CODE (sub) == UNSPEC)
> + {
> +   int unspec_val = XINT (sub, 1);
> +   switch (unspec_val)
> + {
> + case UNSPEC_PACIASP:
> +case UNSPEC_PACIBSP:
> +   return true;
> +
> + default:
> +   return false;
> + }
> +   iter.skip_subrtxes ();
> + }
> +}
> +  return false;
> +}
> +
> +rtx aarch_gen_bti_c (void)
> +{
> +  return gen_bti_c ();
> +}
> +
> +rtx aarch_gen_bti_j (void)
> +{
> +  return gen_bti_j

Re: [PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

2022-05-06 Thread Uros Bizjak via Gcc-patches
On Fri, May 6, 2022 at 9:57 AM Haochen Jiang  wrote:
>
> Hi all,
>
> There are some check files in i386 testsuite are written before the function 
> __builtin_cpu_supports is introduced. All of them are using 
> __get_cpuid_count. This patch aims to reconstruct the i386 testsuite with 
> __builtin_cpu_supports so that we can have a much clearer code.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?

I don't think *_os_support calls should be removed. IIRC,
__builtin_cpu_supports function checks if the feature is supported by
CPU, whereas *_os_supports calls check via xgetbv if OS supports
handling of new registers.

Uros.

>
> Also when writting this patch, I also find some files in testsuite that might 
> be useless currently. For example, in the file 
> gcc/testsuite/gcc.target/i386/sse-os-support.h, it always return 1. And there 
> are also some files will no longer be included at all with this patch. Should 
> we remove those files when we have time?
>
> BRs,
> Haochen
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/adx-check.h: Change bit check to
> __builtin_cpu_supports.
> * gcc.target/i386/aes-avx-check.h: Ditto.
> * gcc.target/i386/aes-check.h: Ditto.
> * gcc.target/i386/avx-check.h: Ditto.
> * gcc.target/i386/avx2-check.h: Ditto.
> * gcc.target/i386/avx512-check.h: Ditto.
> * gcc.target/i386/bmi-check.h: Ditto.
> * gcc.target/i386/bmi2-check.h: Ditto.
> * gcc.target/i386/f16c-check.h: Ditto.
> * gcc.target/i386/fma-check.h: Ditto.
> * gcc.target/i386/fma4-check.h: Ditto.
> * gcc.target/i386/lzcnt-check.h: Ditto.
> * gcc.target/i386/mmx-3dnow-check.h: Ditto.
> * gcc.target/i386/mmx-check.h: Ditto.
> * gcc.target/i386/pclmul-avx-check.h: Ditto.
> * gcc.target/i386/pclmul-check.h: Ditto.
> * gcc.target/i386/rtm-check.h: Ditto.
> * gcc.target/i386/sha-check.h: Ditto.
> * gcc.target/i386/sse-check.h: Ditto.
> * gcc.target/i386/sse2-check.h: Ditto.
> * gcc.target/i386/sse3-check.h: Ditto.
> * gcc.target/i386/sse4_1-check.h: Ditto.
> * gcc.target/i386/sse4_2-check.h: Ditto.
> * gcc.target/i386/sse4a-check.h: Ditto.
> * gcc.target/i386/ssse3-check.h: Ditto.
> * gcc.target/i386/xop-check.h: Ditto.
> ---
>  gcc/testsuite/gcc.target/i386/adx-check.h | 10 +---
>  gcc/testsuite/gcc.target/i386/aes-avx-check.h | 14 +
>  gcc/testsuite/gcc.target/i386/aes-check.h | 11 +---
>  gcc/testsuite/gcc.target/i386/avx-check.h | 12 +---
>  gcc/testsuite/gcc.target/i386/avx2-check.h| 20 +--
>  gcc/testsuite/gcc.target/i386/avx512-check.h  | 59 +++
>  gcc/testsuite/gcc.target/i386/bmi-check.h | 11 +---
>  gcc/testsuite/gcc.target/i386/bmi2-check.h| 10 +---
>  gcc/testsuite/gcc.target/i386/f16c-check.h| 10 +---
>  gcc/testsuite/gcc.target/i386/fma-check.h | 11 +---
>  gcc/testsuite/gcc.target/i386/fma4-check.h| 11 +---
>  gcc/testsuite/gcc.target/i386/lzcnt-check.h   | 11 +---
>  .../gcc.target/i386/mmx-3dnow-check.h | 11 +---
>  gcc/testsuite/gcc.target/i386/mmx-check.h | 11 +---
>  .../gcc.target/i386/pclmul-avx-check.h| 14 +
>  gcc/testsuite/gcc.target/i386/pclmul-check.h  | 11 +---
>  gcc/testsuite/gcc.target/i386/rtm-check.h | 10 +---
>  gcc/testsuite/gcc.target/i386/sha-check.h | 10 +---
>  gcc/testsuite/gcc.target/i386/sse-check.h | 11 +---
>  gcc/testsuite/gcc.target/i386/sse2-check.h| 11 +---
>  gcc/testsuite/gcc.target/i386/sse3-check.h| 11 +---
>  gcc/testsuite/gcc.target/i386/sse4_1-check.h  | 11 +---
>  gcc/testsuite/gcc.target/i386/sse4_2-check.h  | 11 +---
>  gcc/testsuite/gcc.target/i386/sse4a-check.h   | 11 +---
>  gcc/testsuite/gcc.target/i386/ssse3-check.h   | 11 +---
>  gcc/testsuite/gcc.target/i386/xop-check.h | 11 +---
>  26 files changed, 73 insertions(+), 272 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/i386/adx-check.h 
> b/gcc/testsuite/gcc.target/i386/adx-check.h
> index cfed1a38483..bed5dcca385 100644
> --- a/gcc/testsuite/gcc.target/i386/adx-check.h
> +++ b/gcc/testsuite/gcc.target/i386/adx-check.h
> @@ -1,5 +1,4 @@
>  #include 
> -#include "cpuid.h"
>
>  static void adx_test (void);
>
> @@ -11,13 +10,8 @@ static void __attribute__ ((noinline)) do_test (void)
>  int
>  main ()
>  {
> -  unsigned int eax, ebx, ecx, edx;
> -
> -  if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
> -return 0;
> -
> -  /* Run ADX test only if host has ADX support.  */
> -  if (ebx & bit_ADX)
> +  /* Check cpu support for ADX.  */
> +  if (__builtin_cpu_supports ("adx"))
>  {
>do_test ();
>  #ifdef DEBUG
> diff --git a/gcc/testsuite/gcc.target/i386/aes-avx-check.h 
> b/gcc/testsuite/gcc.target/i386/aes-avx-check.h
> index f2a4ead4014..74bf597ead4 100644
> --- a/gcc/testsuite/gcc.target/i386/aes-avx-check.h
> +++ b/gcc/testsuite/gcc.target/i386/aes-avx-check.h
> @@

Re: [PATCH] [i386]Add combine splitter to transform pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.

2022-05-06 Thread Hongyu Wang via Gcc-patches
> +(define_split
> +  [(set (reg:CCZ FLAGS_REG)
> +   (compare:CCZ (unspec:SI
> +   [(eq:VI1_AVX2
> +   (match_operand:VI1_AVX2 0 "vector_operand")
> +   (match_operand:VI1_AVX2 1 "const0_operand"))]
> +   UNSPEC_MOVMSK)
> +(match_operand 2 "const_int_operand")))]
> +  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)

It looks like set_src and set_dst are all CCZmode, do we really need
ix86_match_ccmode?

> +  && (INTVAL (operands[2]) == (int) ())"

I think (int) convert is not needed for const, and INTVAL actually
returns HOST_WIDE_INT

> +#include 
> +
> +bool is_zero(__m128i x)

bool is not necessary here, we can use int and drop stdbool.

Haochen Jiang via Gcc-patches  于2022年5月6日周五 16:01写道:
>
> Hi all,
>
> This patch aims to add a combine splitter to transform 
> pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> PR target/104371
> * config/i386/sse.md: Add new define_mode_attr and define_split.
>
> gcc/testsuite/ChangeLog:
>
> PR target/104371
> * gcc.target/i386/pr104371-1.c: New test.
> * gcc.target/i386/pr104371-2.c: Ditto.
> ---
>  gcc/config/i386/sse.md | 19 +++
>  gcc/testsuite/gcc.target/i386/pr104371-1.c | 14 ++
>  gcc/testsuite/gcc.target/i386/pr104371-2.c | 14 ++
>  3 files changed, 47 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104371-1.c
>  create mode 100755 gcc/testsuite/gcc.target/i386/pr104371-2.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 7b791def542..71afda73c8f 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -20083,6 +20083,25 @@
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "SI")])
>
> +;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> +(define_mode_attr vi1avx2const
> +  [(V32QI "0x") (V16QI "0x")])
> +
> +(define_split
> +  [(set (reg:CCZ FLAGS_REG)
> +   (compare:CCZ (unspec:SI
> +   [(eq:VI1_AVX2
> +   (match_operand:VI1_AVX2 0 "vector_operand")
> +   (match_operand:VI1_AVX2 1 "const0_operand"))]
> +   UNSPEC_MOVMSK)
> +(match_operand 2 "const_int_operand")))]
> +  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)
> +  && (INTVAL (operands[2]) == (int) ())"
> +  [(set (reg:CC FLAGS_REG)
> +   (unspec:CC [(match_dup 0)
> +   (match_dup 0)]
> +  UNSPEC_PTEST))])
> +
>  (define_expand "sse2_maskmovdqu"
>[(set (match_operand:V16QI 0 "memory_operand")
> (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/i386/pr104371-1.c 
> b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> new file mode 100644
> index 000..df7c0b074e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4" } */
> +/* { dg-final { scan-assembler "ptest\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "pxor\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "pcmpeqb\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "pmovmskb\[ \\t\]" } } */
> +
> +#include 
> +#include 
> +
> +bool is_zero(__m128i x)
> +{
> +  return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr104371-2.c 
> b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> new file mode 100755
> index 000..f0d0afd5897
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-final { scan-assembler "vptest\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "vpxor\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "vpcmpeqb\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "vpmovmskb\[ \\t\]" } } */
> +
> +#include 
> +#include 
> +
> +bool is_zero256(__m256i x)
> +{
> +  return _mm256_movemask_epi8(_mm256_cmpeq_epi8(x, _mm256_setzero_si256())) 
> == 0x;
> +}
> --
> 2.18.1
>


Re: [PATCH] [i386]Add combine splitter to transform pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.

2022-05-06 Thread Uros Bizjak via Gcc-patches
On Fri, May 6, 2022 at 10:01 AM Haochen Jiang  wrote:
>
> Hi all,
>
> This patch aims to add a combine splitter to transform 
> pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> PR target/104371
> * config/i386/sse.md: Add new define_mode_attr and define_split.
>
> gcc/testsuite/ChangeLog:
>
> PR target/104371
> * gcc.target/i386/pr104371-1.c: New test.
> * gcc.target/i386/pr104371-2.c: Ditto.
> ---
>  gcc/config/i386/sse.md | 19 +++
>  gcc/testsuite/gcc.target/i386/pr104371-1.c | 14 ++
>  gcc/testsuite/gcc.target/i386/pr104371-2.c | 14 ++
>  3 files changed, 47 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104371-1.c
>  create mode 100755 gcc/testsuite/gcc.target/i386/pr104371-2.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 7b791def542..71afda73c8f 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -20083,6 +20083,25 @@
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "SI")])
>
> +;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> +(define_mode_attr vi1avx2const
> +  [(V32QI "0x") (V16QI "0x")])
> +
> +(define_split
> +  [(set (reg:CCZ FLAGS_REG)
> +   (compare:CCZ (unspec:SI
> +   [(eq:VI1_AVX2
> +   (match_operand:VI1_AVX2 0 "vector_operand")
> +   (match_operand:VI1_AVX2 1 "const0_operand"))]
> +   UNSPEC_MOVMSK)
> +(match_operand 2 "const_int_operand")))]
> +  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)

No need to use ix86_match_ccmode here, the pattern is already limited
to CCZmode,

Uros.

> +  && (INTVAL (operands[2]) == (int) ())"
> +  [(set (reg:CC FLAGS_REG)
> +   (unspec:CC [(match_dup 0)
> +   (match_dup 0)]
> +  UNSPEC_PTEST))])
> +
>  (define_expand "sse2_maskmovdqu"
>[(set (match_operand:V16QI 0 "memory_operand")
> (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/i386/pr104371-1.c 
> b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> new file mode 100644
> index 000..df7c0b074e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4" } */
> +/* { dg-final { scan-assembler "ptest\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "pxor\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "pcmpeqb\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "pmovmskb\[ \\t\]" } } */
> +
> +#include 
> +#include 
> +
> +bool is_zero(__m128i x)
> +{
> +  return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0x;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr104371-2.c 
> b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> new file mode 100755
> index 000..f0d0afd5897
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-final { scan-assembler "vptest\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "vpxor\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "vpcmpeqb\[ \\t\]" } } */
> +/* { dg-final { scan-assembler-not "vpmovmskb\[ \\t\]" } } */
> +
> +#include 
> +#include 
> +
> +bool is_zero256(__m256i x)
> +{
> +  return _mm256_movemask_epi8(_mm256_cmpeq_epi8(x, _mm256_setzero_si256())) 
> == 0x;
> +}
> --
> 2.18.1
>


Re: [PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

2022-05-06 Thread Hongyu Wang via Gcc-patches
> I don't think *_os_support calls should be removed. IIRC,
> __builtin_cpu_supports function checks if the feature is supported by
> CPU, whereas *_os_supports calls check via xgetbv if OS supports
> handling of new registers.

avx_os_support is like

avx_os_support (void)
{
  unsigned int eax, edx;
  unsigned int ecx = XCR_XFEATURE_ENABLED_MASK;

  __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (ecx));

  return (eax & (XSTATE_SSE | XSTATE_YMM)) == (XSTATE_SSE | XSTATE_YMM);
}

While in get_avaliable_features we have

#define XCR_AVX_ENABLED_MASK \
  (XSTATE_SSE | XSTATE_YMM)
  if ((ecx & bit_OSXSAVE))
{
  /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
ZMM16-ZMM31 states are supported by OSXSAVE.  */
  unsigned int xcrlow;
  unsigned int xcrhigh;
  __asm__ (".byte 0x0f, 0x01, 0xd0" /* xgetbv  */
   : "=a" (xcrlow), "=d" (xcrhigh)
   : "c" (XCR_XFEATURE_ENABLED_MASK));
  if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)
{
  avx_usable = 1;

So __builtin_cpu_supports already inherits same check

Uros Bizjak via Gcc-patches  于2022年5月6日周五 16:27写道:
>
> On Fri, May 6, 2022 at 9:57 AM Haochen Jiang  wrote:
> >
> > Hi all,
> >
> > There are some check files in i386 testsuite are written before the 
> > function __builtin_cpu_supports is introduced. All of them are using 
> > __get_cpuid_count. This patch aims to reconstruct the i386 testsuite with 
> > __builtin_cpu_supports so that we can have a much clearer code.
> >
> > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
>
> I don't think *_os_support calls should be removed. IIRC,
> __builtin_cpu_supports function checks if the feature is supported by
> CPU, whereas *_os_supports calls check via xgetbv if OS supports
> handling of new registers.
>
> Uros.
>
> >
> > Also when writting this patch, I also find some files in testsuite that 
> > might be useless currently. For example, in the file 
> > gcc/testsuite/gcc.target/i386/sse-os-support.h, it always return 1. And 
> > there are also some files will no longer be included at all with this 
> > patch. Should we remove those files when we have time?
> >
> > BRs,
> > Haochen
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/adx-check.h: Change bit check to
> > __builtin_cpu_supports.
> > * gcc.target/i386/aes-avx-check.h: Ditto.
> > * gcc.target/i386/aes-check.h: Ditto.
> > * gcc.target/i386/avx-check.h: Ditto.
> > * gcc.target/i386/avx2-check.h: Ditto.
> > * gcc.target/i386/avx512-check.h: Ditto.
> > * gcc.target/i386/bmi-check.h: Ditto.
> > * gcc.target/i386/bmi2-check.h: Ditto.
> > * gcc.target/i386/f16c-check.h: Ditto.
> > * gcc.target/i386/fma-check.h: Ditto.
> > * gcc.target/i386/fma4-check.h: Ditto.
> > * gcc.target/i386/lzcnt-check.h: Ditto.
> > * gcc.target/i386/mmx-3dnow-check.h: Ditto.
> > * gcc.target/i386/mmx-check.h: Ditto.
> > * gcc.target/i386/pclmul-avx-check.h: Ditto.
> > * gcc.target/i386/pclmul-check.h: Ditto.
> > * gcc.target/i386/rtm-check.h: Ditto.
> > * gcc.target/i386/sha-check.h: Ditto.
> > * gcc.target/i386/sse-check.h: Ditto.
> > * gcc.target/i386/sse2-check.h: Ditto.
> > * gcc.target/i386/sse3-check.h: Ditto.
> > * gcc.target/i386/sse4_1-check.h: Ditto.
> > * gcc.target/i386/sse4_2-check.h: Ditto.
> > * gcc.target/i386/sse4a-check.h: Ditto.
> > * gcc.target/i386/ssse3-check.h: Ditto.
> > * gcc.target/i386/xop-check.h: Ditto.
> > ---
> >  gcc/testsuite/gcc.target/i386/adx-check.h | 10 +---
> >  gcc/testsuite/gcc.target/i386/aes-avx-check.h | 14 +
> >  gcc/testsuite/gcc.target/i386/aes-check.h | 11 +---
> >  gcc/testsuite/gcc.target/i386/avx-check.h | 12 +---
> >  gcc/testsuite/gcc.target/i386/avx2-check.h| 20 +--
> >  gcc/testsuite/gcc.target/i386/avx512-check.h  | 59 +++
> >  gcc/testsuite/gcc.target/i386/bmi-check.h | 11 +---
> >  gcc/testsuite/gcc.target/i386/bmi2-check.h| 10 +---
> >  gcc/testsuite/gcc.target/i386/f16c-check.h| 10 +---
> >  gcc/testsuite/gcc.target/i386/fma-check.h | 11 +---
> >  gcc/testsuite/gcc.target/i386/fma4-check.h| 11 +---
> >  gcc/testsuite/gcc.target/i386/lzcnt-check.h   | 11 +---
> >  .../gcc.target/i386/mmx-3dnow-check.h | 11 +---
> >  gcc/testsuite/gcc.target/i386/mmx-check.h | 11 +---
> >  .../gcc.target/i386/pclmul-avx-check.h| 14 +
> >  gcc/testsuite/gcc.target/i386/pclmul-check.h  | 11 +---
> >  gcc/testsuite/gcc.target/i386/rtm-check.h | 10 +---
> >  gcc/testsuite/gcc.target/i386/sha-check.h | 10 +---
> >  gcc/testsuite/gcc.target/i386/sse-check.h | 11 +---
> >  gcc/testsuite/gcc.target/i386/sse2-check.h| 11 +---
> >  gcc/testsuite/gcc.target/i386/sse3-check.h| 11 +---
> >  gcc/testsuite/gcc.target/i386/sse4_1-check.h  | 11 +---
> >  gcc/testsuite/gcc.target/i386/

[wwwdocs] GCC 12: OpenACC

2022-05-06 Thread Thomas Schwinge
Hi!

Right in time for the GCC 12.1 release -- yay \o/ -- I've pushed
to wwwdocs commit c6a7f816f3531d5727674620d74818fe1d150467
"GCC 12: OpenACC", see attached.
Online: .


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From c6a7f816f3531d5727674620d74818fe1d150467 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Fri, 6 May 2022 11:03:13 +0200
Subject: [PATCH] GCC 12: OpenACC

---
 htdocs/gcc-12/changes.html | 35 ---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/htdocs/gcc-12/changes.html b/htdocs/gcc-12/changes.html
index 1b6385cd..c64827bd 100644
--- a/htdocs/gcc-12/changes.html
+++ b/htdocs/gcc-12/changes.html
@@ -170,9 +170,38 @@ You may also want to check out our
   >OpenMP Implementation Status can be found in the libgomp manual.
   
   
-  The new warning flag -Wopenacc-parallelism was added for
-  OpenACC. It warns about potentially suboptimal choices related to
-  OpenACC parallelism.
+  
+Version 2.6 of the https://www.openacc.org/";>OpenACC
+specification continues to be maintained and improved in the C, C++ and
+Fortran compilers.
+See the https://gcc.gnu.org/wiki/OpenACC/Implementation%20Status#status-12";>implementation
+status section on the OpenACC wiki page and the
+https://gcc.gnu.org/onlinedocs/gcc-12.1.0/libgomp/Enabling-OpenACC.html";>
+run-time library documentation for further information.
+In addition to general performance tuning and bug fixing, new features
+include:
+
+  
+	OpenACC worker parallelism for AMD GPUs
+	(already for a long time supported for Nvidia
+	GPUs).
+  
+  
+	Data privatization/sharing at the OpenACC gang level.
+  
+  
+	Considerable improvements for the experimental OpenACC 'kernels'
+	decomposition
+	(https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gcc/Optimize-Options.html#index-param";>--param
+	openacc-kernels=decompose).
+  
+  
+	A new warning
+	flag https://gcc.gnu.org/onlinedocs/gcc-12.1.0/gcc/Warning-Options.html#index-Wopenacc-parallelism";>-Wopenacc-parallelism
+	to warn about potentially suboptimal choices related to OpenACC
+	parallelism.
+  
+
   
   The offload target code generation for OpenMP and OpenACC can now
   be better adjusted using the new 

Re: [PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

2022-05-06 Thread Uros Bizjak via Gcc-patches
On Fri, May 6, 2022 at 11:00 AM Hongyu Wang  wrote:
>
> > I don't think *_os_support calls should be removed. IIRC,
> > __builtin_cpu_supports function checks if the feature is supported by
> > CPU, whereas *_os_supports calls check via xgetbv if OS supports
> > handling of new registers.
>
> avx_os_support is like
>
> avx_os_support (void)
> {
>   unsigned int eax, edx;
>   unsigned int ecx = XCR_XFEATURE_ENABLED_MASK;
>
>   __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (ecx));
>
>   return (eax & (XSTATE_SSE | XSTATE_YMM)) == (XSTATE_SSE | XSTATE_YMM);
> }
>
> While in get_avaliable_features we have
>
> #define XCR_AVX_ENABLED_MASK \
>   (XSTATE_SSE | XSTATE_YMM)
>   if ((ecx & bit_OSXSAVE))
> {
>   /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
> ZMM16-ZMM31 states are supported by OSXSAVE.  */
>   unsigned int xcrlow;
>   unsigned int xcrhigh;
>   __asm__ (".byte 0x0f, 0x01, 0xd0" /* xgetbv  */
>: "=a" (xcrlow), "=d" (xcrhigh)
>: "c" (XCR_XFEATURE_ENABLED_MASK));
>   if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)
> {
>   avx_usable = 1;
>
> So __builtin_cpu_supports already inherits same check

Indeed, thanks for the explanation.

OTOH, we don't change the existing tests (perhaps only dg- directives
when infrastructure improves), so I would leave the existing testcases
as they are. In future, new helper functions should be implemented
with __builtin_cpu_supports, but let's leave existing ones as they
are.

Uros.

> Uros Bizjak via Gcc-patches  于2022年5月6日周五 16:27写道:
> >
> > On Fri, May 6, 2022 at 9:57 AM Haochen Jiang  
> > wrote:
> > >
> > > Hi all,
> > >
> > > There are some check files in i386 testsuite are written before the 
> > > function __builtin_cpu_supports is introduced. All of them are using 
> > > __get_cpuid_count. This patch aims to reconstruct the i386 testsuite with 
> > > __builtin_cpu_supports so that we can have a much clearer code.
> > >
> > > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> >
> > I don't think *_os_support calls should be removed. IIRC,
> > __builtin_cpu_supports function checks if the feature is supported by
> > CPU, whereas *_os_supports calls check via xgetbv if OS supports
> > handling of new registers.
> >
> > Uros.
> >
> > >
> > > Also when writting this patch, I also find some files in testsuite that 
> > > might be useless currently. For example, in the file 
> > > gcc/testsuite/gcc.target/i386/sse-os-support.h, it always return 1. And 
> > > there are also some files will no longer be included at all with this 
> > > patch. Should we remove those files when we have time?
> > >
> > > BRs,
> > > Haochen
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/adx-check.h: Change bit check to
> > > __builtin_cpu_supports.
> > > * gcc.target/i386/aes-avx-check.h: Ditto.
> > > * gcc.target/i386/aes-check.h: Ditto.
> > > * gcc.target/i386/avx-check.h: Ditto.
> > > * gcc.target/i386/avx2-check.h: Ditto.
> > > * gcc.target/i386/avx512-check.h: Ditto.
> > > * gcc.target/i386/bmi-check.h: Ditto.
> > > * gcc.target/i386/bmi2-check.h: Ditto.
> > > * gcc.target/i386/f16c-check.h: Ditto.
> > > * gcc.target/i386/fma-check.h: Ditto.
> > > * gcc.target/i386/fma4-check.h: Ditto.
> > > * gcc.target/i386/lzcnt-check.h: Ditto.
> > > * gcc.target/i386/mmx-3dnow-check.h: Ditto.
> > > * gcc.target/i386/mmx-check.h: Ditto.
> > > * gcc.target/i386/pclmul-avx-check.h: Ditto.
> > > * gcc.target/i386/pclmul-check.h: Ditto.
> > > * gcc.target/i386/rtm-check.h: Ditto.
> > > * gcc.target/i386/sha-check.h: Ditto.
> > > * gcc.target/i386/sse-check.h: Ditto.
> > > * gcc.target/i386/sse2-check.h: Ditto.
> > > * gcc.target/i386/sse3-check.h: Ditto.
> > > * gcc.target/i386/sse4_1-check.h: Ditto.
> > > * gcc.target/i386/sse4_2-check.h: Ditto.
> > > * gcc.target/i386/sse4a-check.h: Ditto.
> > > * gcc.target/i386/ssse3-check.h: Ditto.
> > > * gcc.target/i386/xop-check.h: Ditto.
> > > ---
> > >  gcc/testsuite/gcc.target/i386/adx-check.h | 10 +---
> > >  gcc/testsuite/gcc.target/i386/aes-avx-check.h | 14 +
> > >  gcc/testsuite/gcc.target/i386/aes-check.h | 11 +---
> > >  gcc/testsuite/gcc.target/i386/avx-check.h | 12 +---
> > >  gcc/testsuite/gcc.target/i386/avx2-check.h| 20 +--
> > >  gcc/testsuite/gcc.target/i386/avx512-check.h  | 59 +++
> > >  gcc/testsuite/gcc.target/i386/bmi-check.h | 11 +---
> > >  gcc/testsuite/gcc.target/i386/bmi2-check.h| 10 +---
> > >  gcc/testsuite/gcc.target/i386/f16c-check.h| 10 +---
> > >  gcc/testsuite/gcc.target/i386/fma-check.h | 11 +---
> > >  gcc/testsuite/gcc.target/i386/fma4-check.h| 11 +---
> > >  gcc/testsuite/gcc.target/i386/lzcnt-check.h   | 11 +---
> > >  .../gcc.target/i386/mmx-3dnow-check.h | 

GCC 9.4.1 Status Report (2022-05-06)

2022-05-06 Thread Richard Biener via Gcc-patches


Status
==

The GCC 9 branch is in regression and documentation fixing mode.

After the release of GCC 12.1 it's time to retire this branch and do
a last release from it - GCC 9.5.  I will do a GCC 9.5 release
candidate in two weeks from now, May 20th, following by the release
a week after that if no serious problems arise.

Following the GCC 9.5 release and branch closing will be the release
of GCC 10.4 with a timeline still to be announced.  So this is now
the time to consider backporting fixes that landed in GCC 11.3 to
the GCC 10 branch and possibly also the GCC 9 branch.  Please consider
that there will be no further release from the GCC 9 branch and thus
be extra careful to not introduce new regressions on it.

It's also a very good point in time to ensure the branch still builds
and has reasonable testresults for the target you maintain.


Quality Data


Priority  #   Change from last report
---   ---
P1  
P2  427   + 123
P3  42+  16
P4  188   +  15
P5  25+   2
---   ---
Total P1-P3 469   + 138
Total   682   + 155


Previous Report
===

https://gcc.gnu.org/pipermail/gcc/2021-June/236176.html


Re: [PATCH][GCC 13] Enable match.pd dumping with -fdump-tree-original

2022-05-06 Thread Alex Coplan via Gcc-patches
Ping.

https://gcc.gnu.org/pipermail/gcc-patches/2022-March/592307.html

On 25/03/2022 15:07, Alex Coplan via Gcc-patches wrote:
> Hi,
> 
> I noticed that, while the C/C++ frontends invoke the GENERIC match.pd
> simplifications to do early folding, the debug output from
> generic-match.cc does not appear in the -fdump-tree-original output,
> even with -fdump-tree-original-folding or -fdump-tree-original-all. This
> patch fixes that.
> 
> For example, before the patch, for the following code:
> 
> int a[2];
> void bar ();
> void f()
> {
> if ((unsigned long)(a + 1) == 0)
> bar ();
> }
> 
> on AArch64 at -O0, -fdump-tree-original-all would give:
> 
> ;; Function f (null)
> ;; enabled by -tree-original
> 
> 
> {
>   if (0)
> {
>   bar ();
> }
> }
> 
> After the patch, we get:
> 
> Applying pattern match.pd:3774, generic-match.cc:24535
> Matching expression match.pd:146, generic-match.cc:23
> Applying pattern match.pd:5638, generic-match.cc:13388
> 
> ;; Function f (null)
> ;; enabled by -tree-original
> 
> 
> {
>   if (0)
> {
>   bar ();
> }
> }
> 
> The reason we don't get the match.pd output as it stands, is that the
> original dump is treated specially in c-opts.cc: it gets its own state
> which is independent from that used by other dump files in the compiler.
> Like most of the compiler, the generated generic-match.cc has code of
> the form:
> 
>   if (dump_file && (dump_flags & TDF_FOLDING))
> fprintf (dump_file, ...);
> 
> But, as it stands, -fdump-tree-original has its own FILE * and flags in
> c-opts.cc (original_dump_{file,flags}) and never touches the global
> dump_{file,flags} (managed by dumpfile.{h,cc}). This patch adjusts the
> code in c-opts.cc to use the main dump infrastructure used by the rest
> of the compiler, instead of treating the original dump specially.
> 
> We take the opportunity to make a small refactor: the code in
> c-gimplify.cc:c_genericize can, with this change, use the global dump
> infrastructure to get the original dump file and flags instead of using
> the bespoke get_dump_info function implemented in c-opts.cc. With this
> change, we remove the only use of get_dump_info, so this can be removed.
> 
> Note that we also fix a leak of the original dump file in
> c_common_parse_file. I originally thought it might be possible to
> achieve this with only one static call to dump_finish () (by simply
> moving it earlier in the loop), but unfortunately the dump file is
> required to be open while c_parse_final_cleanups runs, as we (e.g.)
> perform some template instantiations here for C++, which need to appear
> in the original dump file.
> 
> We adjust cgraph_node::get_create to avoid introducing noise in the
> original dump file: without this, these "Introduced new external node"
> lines start appearing in the original dump files, which breaks tests
> that do a scan-tree-dump-times on the original dump looking for a
> certain function name.
> 
> Bootstrapped/regtested on aarch64-linux-gnu, OK for GCC 13?
> 
> Thanks,
> Alex
> 
> gcc/c-family/ChangeLog:
> 
>   * c-common.h (get_dump_info): Delete.
>   * c-gimplify.cc (c_genericize): Get TDI_original dump file info
>   from the global dump_manager instead of the (now obsolete)
>   get_dump_info.
>   * c-opts.cc (original_dump_file): Delete.
>   (original_dump_flags): Delete.
>   (c_common_parse_file): Switch to using global dump_manager to
>   manage the original dump file; fix leak of dump file.
>   (get_dump_info): Delete.
> 
> gcc/ChangeLog:
> 
>   * cgraph.cc (cgraph_node::get_create): Don't dump if the current
>   dump file is that of -fdump-tree-original.

> diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
> index 52a85bfb783..b829cdbfe28 100644
> --- a/gcc/c-family/c-common.h
> +++ b/gcc/c-family/c-common.h
> @@ -950,7 +950,6 @@ extern bool c_common_post_options (const char **);
>  extern bool c_common_init (void);
>  extern void c_common_finish (void);
>  extern void c_common_parse_file (void);
> -extern FILE *get_dump_info (int, dump_flags_t *);
>  extern alias_set_type c_common_get_alias_set (tree);
>  extern void c_register_builtin_type (tree, const char*);
>  extern bool c_promoting_integer_type_p (const_tree);
> diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> index a00b0a02dcc..a6f26c9b0d3 100644
> --- a/gcc/c-family/c-gimplify.cc
> +++ b/gcc/c-family/c-gimplify.cc
> @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "dumpfile.h"
>  #include "c-ubsan.h"
>  #include "tree-nested.h"
> +#include "context.h"
>  
>  /*  The gimplification pass converts the language-dependent trees
>  (ld-trees) emitted by the parser into language-independent trees
> @@ -552,6 +553,7 @@ c_genericize_control_r (tree *stmt_p, int *walk_subtrees, 
> void *data)
>  void
>  c_genericize (tree fndecl)
>  {
> +  dump_file_info *dfi;
>FILE *dump_orig;
>dump_flags_t local_dump_flags;
>

Re: [1/2] PR96463 - aarch64 specific changes

2022-05-06 Thread Richard Sandiford via Gcc-patches
Prathamesh Kulkarni  writes:
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index c24c0548724..1ef4ea2087b 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -44,6 +44,14 @@
>  #include "aarch64-sve-builtins-shapes.h"
>  #include "aarch64-sve-builtins-base.h"
>  #include "aarch64-sve-builtins-functions.h"
> +#include "aarch64-builtins.h"
> +#include "gimple-ssa.h"
> +#include "tree-phinodes.h"
> +#include "tree-ssa-operands.h"
> +#include "ssa-iterators.h"
> +#include "stringpool.h"
> +#include "value-range.h"
> +#include "tree-ssanames.h"

Minor, but: I think the preferred approach is to include "ssa.h"
rather than include some of these headers directly.

>  
>  using namespace aarch64_sve;
>  
> @@ -1207,6 +1215,56 @@ public:
>  insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
>  return e.use_contiguous_load_insn (icode);
>}
> +
> +  gimple *
> +  fold (gimple_folder &f) const OVERRIDE
> +  {
> +tree arg0 = gimple_call_arg (f.call, 0);
> +tree arg1 = gimple_call_arg (f.call, 1);
> +
> +/* Transform:
> +   lhs = svld1rq ({-1, -1, ... }, arg1)
> +   into:
> +   tmp = mem_ref [(int * {ref-all}) arg1]
> +   lhs = vec_perm_expr.
> +   on little endian target.  */
> +
> +if (!BYTES_BIG_ENDIAN
> + && integer_all_onesp (arg0))
> +  {
> + tree lhs = gimple_call_lhs (f.call);
> + auto simd_type = aarch64_get_simd_info_for_type (Int32x4_t);

Does this work for other element sizes?  I would have expected it
to be the (128-bit) Advanced SIMD vector associated with the same
element type as the SVE vector.

The testcase should cover more than just int32x4_t -> svint32_t,
just to be sure.

> +
> + tree elt_ptr_type
> +   = build_pointer_type_for_mode (simd_type.eltype, VOIDmode, true);
> + tree zero = build_zero_cst (elt_ptr_type);
> +
> + /* Use element type alignment.  */
> + tree access_type
> +   = build_aligned_type (simd_type.itype, TYPE_ALIGN (simd_type.eltype));
> +
> + tree tmp = make_ssa_name_fn (cfun, access_type, 0);
> + gimple *mem_ref_stmt
> +   = gimple_build_assign (tmp, fold_build2 (MEM_REF, access_type, arg1, 
> zero));

Long line.  Might be easier to format by assigning the fold_build2 result
to a temporary variable.

> + gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
> +
> + tree mem_ref_lhs = gimple_get_lhs (mem_ref_stmt);
> + tree vectype = TREE_TYPE (mem_ref_lhs);
> + tree lhs_type = TREE_TYPE (lhs);

Is this necessary?  The code above supplied the types and I wouldn't
have expected them to change during the build process.

> +
> + int source_nelts = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
> + vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), source_nelts, 1);
> + for (int i = 0; i < source_nelts; i++)
> +   sel.quick_push (i);
> +
> + vec_perm_indices indices (sel, 1, source_nelts);
> + gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type), 
> indices));
> + tree mask = vec_perm_indices_to_tree (lhs_type, indices);
> + return gimple_build_assign (lhs, VEC_PERM_EXPR, mem_ref_lhs, 
> mem_ref_lhs, mask);

Nit: long line.

> +  }
> +
> +return NULL;
> +  }
>  };
>  
>  class svld1ro_impl : public load_replicate
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index f650abbc4ce..47810fec804 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23969,6 +23969,35 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
>return true;
>  }
>  
> +/* Try to implement D using SVE dup instruction.  */
> +
> +static bool
> +aarch64_evpc_sve_dup (struct expand_vec_perm_d *d)
> +{
> +  if (BYTES_BIG_ENDIAN
> +  || d->perm.length ().is_constant ()
> +  || !d->one_vector_p
> +  || d->target == NULL
> +  || d->op0 == NULL

These last two lines mean that we always return false for d->testing.
The idea instead is that the return value should be the same for both
d->testing and !d->testing.  The difference is that for !d->testing we
also emit code to do the permute.

> +  || GET_MODE_NUNITS (GET_MODE (d->target)).is_constant ()

Sorry, I've forgotten the context now, but: these positive tests
for is_constant surprised me.  Do we really only want to do this
for variable-length SVE code generation, rather than fixed-length?

> +  || !GET_MODE_NUNITS (GET_MODE (d->op0)).is_constant ())
> +return false;
> +
> +  if (d->testing_p)
> +return true;

This should happen after the later tests, once we're sure that the
permute vector has the right form.  If the issue is that op0 isn't
provided for testing then I think the hook needs to be passed the
input mode alongside the result mode.

It might then be better to test:

  aarch64_classify_vector_mode (...input_mode...) == VEC_ADVSIMD

(des

[PATCH 2/2] xtensa: Reflect the 32-bit Integer Divide Option

2022-05-06 Thread Takayuki 'January June' Suwa via Gcc-patches

On Espressif's ESP8266 (based on Tensilica LX106, no hardware divider),
this patch reduces the size of each:

  __moddi3() @ libgcc.a :  969 -> 301 (saves 668)
  __divmoddi4() :  -> 426 (saves 685)
  __udivmoddi4(): 1043 -> 319 (saves 724)

in bytes, respectively.

gcc/ChangeLog:

* config/xtensa/xtensa.h (TARGET_HAS_NO_HW_DIVIDE): New macro
definition.
---
 gcc/config/xtensa/xtensa.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 00e2930b30a..d25594f0c1f 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -75,6 +75,11 @@ along with GCC; see the file COPYING3.  If not see
 #define HAVE_AS_TLS 0
 #endif

+/* Define this if the target has no hardware divide instructions.  */
+#if !TARGET_DIV32
+#define TARGET_HAS_NO_HW_DIVIDE
+#endif
+
 
 /* Target CPU builtins.  */
 #define TARGET_CPU_CPP_BUILTINS()  \
--
2.20.1


[PATCH 1/2] xtensa: Rename deprecated extv/extzv insn patterns to extvsi/extzvsi

2022-05-06 Thread Takayuki 'January June' Suwa via Gcc-patches

These patterns were deprecated since GCC 4.8.

gcc/ChangeLog:

* config/xtensa/xtensa.md (extvsi, extvsi_internal, extzvsi,
extzvsi_internal): Rename from extv, extv_internal, extzv and
extzv_internal, respectively.
---
 gcc/config/xtensa/xtensa.md | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 3b61e5d4097..96e043b26b5 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -631,7 +631,7 @@
 
 ;; Field extract instructions.

-(define_expand "extv"
+(define_expand "extvsi"
   [(set (match_operand:SI 0 "register_operand" "")
(sign_extract:SI (match_operand:SI 1 "register_operand" "")
 (match_operand:SI 2 "const_int_operand" "")
@@ -646,12 +646,12 @@
   if (!lsbitnum_operand (operands[3], SImode))
 FAIL;

-  emit_insn (gen_extv_internal (operands[0], operands[1],
-   operands[2], operands[3]));
+  emit_insn (gen_extvsi_internal (operands[0], operands[1],
+ operands[2], operands[3]));
   DONE;
 })

-(define_insn "extv_internal"
+(define_insn "extvsi_internal"
   [(set (match_operand:SI 0 "register_operand" "=a")
(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
 (match_operand:SI 2 "sext_fldsz_operand" "i")
@@ -666,7 +666,7 @@
(set_attr "mode"  "SI")
(set_attr "length""3")])

-(define_expand "extzv"
+(define_expand "extzvsi"
   [(set (match_operand:SI 0 "register_operand" "")
(zero_extract:SI (match_operand:SI 1 "register_operand" "")
 (match_operand:SI 2 "const_int_operand" "")
@@ -675,12 +675,12 @@
 {
   if (!extui_fldsz_operand (operands[2], SImode))
 FAIL;
-  emit_insn (gen_extzv_internal (operands[0], operands[1],
-operands[2], operands[3]));
+  emit_insn (gen_extzvsi_internal (operands[0], operands[1],
+  operands[2], operands[3]));
   DONE;
 })

-(define_insn "extzv_internal"
+(define_insn "extzvsi_internal"
   [(set (match_operand:SI 0 "register_operand" "=a")
(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
 (match_operand:SI 2 "extui_fldsz_operand" "i")
--
2.20.1


Re: [PATCH][GCC 13] Enable match.pd dumping with -fdump-tree-original

2022-05-06 Thread Richard Biener via Gcc-patches
On Fri, Mar 25, 2022 at 4:08 PM Alex Coplan via Gcc-patches
 wrote:
>
> Hi,
>
> I noticed that, while the C/C++ frontends invoke the GENERIC match.pd
> simplifications to do early folding, the debug output from
> generic-match.cc does not appear in the -fdump-tree-original output,
> even with -fdump-tree-original-folding or -fdump-tree-original-all. This
> patch fixes that.
>
> For example, before the patch, for the following code:
>
> int a[2];
> void bar ();
> void f()
> {
> if ((unsigned long)(a + 1) == 0)
> bar ();
> }
>
> on AArch64 at -O0, -fdump-tree-original-all would give:
>
> ;; Function f (null)
> ;; enabled by -tree-original
>
>
> {
>   if (0)
> {
>   bar ();
> }
> }
>
> After the patch, we get:
>
> Applying pattern match.pd:3774, generic-match.cc:24535
> Matching expression match.pd:146, generic-match.cc:23
> Applying pattern match.pd:5638, generic-match.cc:13388
>
> ;; Function f (null)
> ;; enabled by -tree-original
>
>
> {
>   if (0)
> {
>   bar ();
> }
> }
>
> The reason we don't get the match.pd output as it stands, is that the
> original dump is treated specially in c-opts.cc: it gets its own state
> which is independent from that used by other dump files in the compiler.
> Like most of the compiler, the generated generic-match.cc has code of
> the form:
>
>   if (dump_file && (dump_flags & TDF_FOLDING))
> fprintf (dump_file, ...);
>
> But, as it stands, -fdump-tree-original has its own FILE * and flags in
> c-opts.cc (original_dump_{file,flags}) and never touches the global
> dump_{file,flags} (managed by dumpfile.{h,cc}). This patch adjusts the
> code in c-opts.cc to use the main dump infrastructure used by the rest
> of the compiler, instead of treating the original dump specially.
>
> We take the opportunity to make a small refactor: the code in
> c-gimplify.cc:c_genericize can, with this change, use the global dump
> infrastructure to get the original dump file and flags instead of using
> the bespoke get_dump_info function implemented in c-opts.cc. With this
> change, we remove the only use of get_dump_info, so this can be removed.
>
> Note that we also fix a leak of the original dump file in
> c_common_parse_file. I originally thought it might be possible to
> achieve this with only one static call to dump_finish () (by simply
> moving it earlier in the loop), but unfortunately the dump file is
> required to be open while c_parse_final_cleanups runs, as we (e.g.)
> perform some template instantiations here for C++, which need to appear
> in the original dump file.
>
> We adjust cgraph_node::get_create to avoid introducing noise in the
> original dump file: without this, these "Introduced new external node"
> lines start appearing in the original dump files, which breaks tests
> that do a scan-tree-dump-times on the original dump looking for a
> certain function name.
>
> Bootstrapped/regtested on aarch64-linux-gnu, OK for GCC 13?

Thanks for tackling this - the only part that I don't like is the cgraph.cc
one.   Can't we instead gate the dumping on symtab->state != PARSING
or symtab->state > CONSTRUCTION?

Richard.

> Thanks,
> Alex
>
> gcc/c-family/ChangeLog:
>
> * c-common.h (get_dump_info): Delete.
> * c-gimplify.cc (c_genericize): Get TDI_original dump file info
> from the global dump_manager instead of the (now obsolete)
> get_dump_info.
> * c-opts.cc (original_dump_file): Delete.
> (original_dump_flags): Delete.
> (c_common_parse_file): Switch to using global dump_manager to
> manage the original dump file; fix leak of dump file.
> (get_dump_info): Delete.
>
> gcc/ChangeLog:
>
> * cgraph.cc (cgraph_node::get_create): Don't dump if the current
> dump file is that of -fdump-tree-original.


Re: [PATCH v2 2/2] [PR100106] Reject unaligned subregs when strict alignment is required

2022-05-06 Thread Alexandre Oliva via Gcc-patches
On May  5, 2022, Segher Boessenkool  wrote:

> On Thu, May 05, 2022 at 08:59:21AM +0100, Richard Sandiford wrote:
>> Alexandre Oliva via Gcc-patches  writes:
>> I know this is the best being the enemy of the good, but given
>> that we're at the start of stage 1, would it be feasible to try
>> to get rid of (subreg (mem)) altogether for GCC 13?

> Yes please!

I'm not sure this is what you two had in mind, but the news I have is
not great.  With this patch, x86_64 has some regressions in vector
testcases (*), and ppc64le doesn't bootstrap (tsan_interface_atomic.o
ends up with a nil SET_DEST in split all insns).  aarch64 is still
building stage2.

I'm not sure this is enough.  IIRC register allocation modifies in place
pseudos that can't be assigned to hard registers, turning them into
MEMs.  If that's so, SUBREGs of such pseudos will silently become
SUBREGs of MEMs, and I don't know that they are validated again and, if
so, what happens to those that fail validation.

I kind of feel that this is more than I can tackle ATM, so I'd
appreciate if someone else would take this up and drive this transition.


Disallow SUBREG of MEM

Introduce TARGET_ALLOW_SUBREG_OF_MEM, defaulting to 0.

Reject SUBREG of MEM regardless of alignment, unless the macro is
defined to nonzero.


for  gcc/ChangeLog

PR target/100106
* emit-rtl.cc (validate_subreg) [!TARGET_ALLOW_SUBREG_OF_MEM]:
Reject SUBREG of MEM.
---
 gcc/emit-rtl.cc |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index 9c03e27894fff..f055179b3b8a6 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -983,8 +983,12 @@ validate_subreg (machine_mode omode, machine_mode imode,
   return subreg_offset_representable_p (regno, imode, offset, omode);
 }
   /* Do not allow SUBREG with stricter alignment than the inner MEM.  */
-  else if (reg && MEM_P (reg) && STRICT_ALIGNMENT
-  && MEM_ALIGN (reg) < GET_MODE_ALIGNMENT (omode))
+  else if (reg && MEM_P (reg)
+#if TARGET_ALLOW_SUBREG_OF_MEM /* ??? Reject them all eventually.  */
+  && STRICT_ALIGNMENT
+  && MEM_ALIGN (reg) < GET_MODE_ALIGNMENT (omode)
+#endif
+  )
 return false;
 
   /* The outer size must be ordered wrt the register size, otherwise



(*) here are the x86_64 regressions introduced by the patch:

+ FAIL: gcc.target/i386/avx-2.c (internal compiler error: in gen_rtx_SUBREG, at 
emit-rtl.cc:1030)
+ FAIL: gcc.target/i386/avx-2.c (test for excess errors)
+ FAIL: gcc.target/i386/sse-14.c (internal compiler error: in gen_rtx_SUBREG, 
at emit-rtl.cc:1030)
+ FAIL: gcc.target/i386/sse-14.c (test for excess errors)
+ FAIL: gcc.target/i386/sse-22.c (internal compiler error: in gen_rtx_SUBREG, 
at emit-rtl.cc:1030)
+ FAIL: gcc.target/i386/sse-22.c (test for excess errors)
+ FAIL: gcc.target/i386/sse-22a.c (internal compiler error: in gen_rtx_SUBREG, 
at emit-rtl.cc:1030)
+ FAIL: gcc.target/i386/sse-22a.c (test for excess errors)

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [Patch] OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.

2022-05-06 Thread Marcel Vollweiler

Hi Jakub,

Am 05.05.2022 um 11:33 schrieb Jakub Jelinek:

On Mon, Mar 14, 2022 at 04:42:14PM +0100, Marcel Vollweiler wrote:

--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -226,6 +226,11 @@ OMP_5.1 {
 omp_get_teams_thread_limit_;
  } OMP_5.0.2;

+OMP_5.1.1 {
+  global:
+omp_target_is_accessible;
+} OMP_5.1;
+


You've already added another OMP_5.1.1 symbol, so this hunk will need to be
adjusted.  Keep the names in there alphabetically sorted.


Adjusted.


--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -835,6 +835,16 @@
end function omp_target_disassociate_ptr
  end interface

+interface
+  function omp_target_is_accessible (ptr, size, device_num) bind(c)
+use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
+integer(c_int) :: omp_target_is_accessible


The function returning integer(c_int) rather than logical seems like
a screw up in the standard, but too late to fix that :(.


--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3666,6 +3666,24 @@ omp_target_disassociate_ptr (const void *ptr, int 
device_num)
  }

  int
+omp_target_is_accessible (const void *ptr, size_t size, int device_num)
+{
+  if (device_num < 0 || device_num > gomp_get_num_devices ())
+return false;
+
+  if (device_num == gomp_get_num_devices ())
+return true;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+return false;
+
+  /* TODO: Unified shared memory must be handled when available.  */
+
+  return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;


I guess for now it is reasonable, but I wonder if even without
GOMP_OFFLOAD_CAP_SHARED_MEM one can't for CUDA or GCN allocate host
memory (not all, but just some subset) that will be accessible on the
device (I bet that means accessible through the same address on the host and
device, aka partial shared mem).


Currently, I am only aware of

(a) physically shared memory which is used for some architectures where CPU and
GPU are close together (handled via GOMP_OFFLOAD_CAP_SHARED_MEM) and
(b) unified shared memory as being more a logical memory sharing via managed
memory (using sth. like cudaMallocManaged).

For (b) I will submit a follow up patch very soon that depends on the submitted
but not yet approved/committed usm patches:
   https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591349.html



So, ok for trunk.

OT, tried to look how libomptarget implements it and they don't at least
on llvm-project trunk, but while looking at that, noticed that for
omp_target_is_present they do return false from omp_target_is_present
while we return true.  It is unclear if NULL has corresponding storage
on the device (NULL always corresponds to NULL on the device) or not.


That's indeed an interesting point. I am not sure whether returning "true" for a
given NULL pointer is the desired behaviour for omp_target_is_present. For the
host that might be ok (for whatever reason) but for offload devices this implies
that NULL is actually mapped to some address on the device (as far as I
understand the definition):

"The omp_target_is_present routine tests whether a host pointer refers to
storage that is mapped to a given device."

I don't know if such a "NULL mapping" is valid/useful.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[PATCH] OpenMP, libgomp: Handle unified shared memory in omp_target_is_accessible.

2022-05-06 Thread Marcel Vollweiler

Hi,

This is a follow up patch of the patch that adds the OpenMP runtime routine
omp_target_is_accessible:

   https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591601.html

It considers now also unified shared memory (usm) that was submitted recently
(but not yet approved/committed):
   https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591349.html

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Handle unified shared memory in omp_target_is_accessible.

libgomp/ChangeLog:

* target.c (omp_target_is_accessible): Handle unified shared memory.
* testsuite/libgomp.c-c++-common/target-is-accessible-1.c: Updated.
* testsuite/libgomp.fortran/target-is-accessible-1.f90: Updated.
* testsuite/libgomp.c-c++-common/target-is-accessible-2.c: New test.
* testsuite/libgomp.fortran/target-is-accessible-2.f90: New test.

diff --git a/libgomp/target.c b/libgomp/target.c
index 74a031f..e6d00c5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3909,9 +3909,13 @@ omp_target_is_accessible (const void *ptr, size_t size, 
int device_num)
   if (devicep == NULL)
 return false;
 
-  /* TODO: Unified shared memory must be handled when available.  */
+  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+return true;
 
-  return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
+  if (devicep->is_usm_ptr_func && devicep->is_usm_ptr_func ((void *) ptr))
+return true;
+
+  return false;
 }
 
 int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
index 7c2cf62..e3f494b 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
@@ -23,23 +23,28 @@ main ()
   if (omp_target_is_accessible (p, sizeof (int), n + 1))
 __builtin_abort ();
 
-  /* Currently, a host pointer is accessible if the device supports shared
- memory or omp_target_is_accessible is executed on the host. This
- test case must be adapted when unified shared memory is avialable.  */
   int a[128];
   for (int d = 0; d <= omp_get_num_devices (); d++)
 {
+  /* SHARED_MEM is 1 if and only if host and device share the same memory.
+OMP_TARGET_IS_ACCESSIBLE should not return 0 for shared memory.  */
   int shared_mem = 0;
   #pragma omp target map (alloc: shared_mem) device (d)
shared_mem = 1;
-  if (omp_target_is_accessible (p, sizeof (int), d) != shared_mem)
+
+  if (shared_mem && !omp_target_is_accessible (p, sizeof (int), d))
+   __builtin_abort ();
+
+  /* USM is disabled by default.  Hence OMP_TARGET_IS_ACCESSIBLE should
+return 0 if shared_mem is false.  */
+  if (!shared_mem && omp_target_is_accessible (p, sizeof (int), d))
__builtin_abort ();
 
-  if (omp_target_is_accessible (a, 128 * sizeof (int), d) != shared_mem)
+  if (shared_mem && !omp_target_is_accessible (a, 128 * sizeof (int), d))
__builtin_abort ();
 
   for (int i = 0; i < 128; i++)
-   if (omp_target_is_accessible (&a[i], sizeof (int), d) != shared_mem)
+   if (shared_mem && !omp_target_is_accessible (&a[i], sizeof (int), d))
  __builtin_abort ();
 }
 
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-2.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-2.c
new file mode 100644
index 000..24af51f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-2.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-skip-if "USM is only implemented for nvptx." { ! offload_target_nvptx 
} } */
+
+#include 
+#include 
+
+#pragma omp requires unified_shared_memory
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc (sizeof(int), ompx_unified_shared_mem_alloc);
+  if (!a)
+__builtin_abort ();
+
+  for (int d = 0; d <= omp_get_num_devices (); d++)
+if (!omp_target_is_accessible (a, sizeof (int), d))
+  __builtin_abort ();
+
+  omp_free(a, ompx_unified_shared_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90 
b/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
index 2611855..015f74a 100644
--- a/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
@@ -1,3 +1,5 @@
+! { dg-do run }
+
 program main
   use omp_lib
   use iso_c_binding
@@ -25,24 +27,28 @@ program main
   if (omp_target_is_accessible (p, c_sizeof (d), n + 1) /= 0) &
 stop 4
 
-  ! Currently, a host pointer is accessible if the device supports shared
-  ! memory or omp_target_is_accessible is executed on the host. This
-  ! test case must be adapted w

[PATCH, OpenMP] Implement uses_allocators clause for target regions

2022-05-06 Thread Chung-Lin Tang

Hi Jakub,
this patch implements the uses_allocators clause for OpenMP target regions.

For user defined allocator handles, this allows target regions to assign
memory space and traits to allocators, and automatically calls
omp_init/destroy_allocator() in the beginning/end of the target region.

For pre-defined allocators (i.e. omp_..._mem_alloc names), this is a no-op,
such clauses are not created.

Asides from the front-end portions, the target region transforms are
done in gimplify_omp_workshare.

This patch also includes added changes to enforce the "allocate allocator
must be also in a uses_allocator clause", as to mentioned in[1].
This is done during gimplify_scan_omp_clauses.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594039.html

Tested on mainline, please see if this is okay.

Thanks,
Chung-Lin

2022-05-06  Chung-Lin Tang  

gcc/c-family/ChangeLog:

* c-omp.cc (c_omp_split_clauses): Add OMP_CLAUSE_USES_ALLOCATORS case.
* c-pragma.h (enum pragma_omp_clause): Add 
PRAGMA_OMP_CLAUSE_USES_ALLOCATORS.

gcc/c/ChangeLog:

* c-parser.cc (c_parser_omp_clause_name): Add case for uses_allocators
clause.
(c_parser_omp_clause_uses_allocators): New function.
(c_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_USES_ALLOCATORS case.
(OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_USES_ALLOCATORS to mask.
* c-typeck.cc (c_finish_omp_clauses): Add case handling for
OMP_CLAUSE_USES_ALLOCATORS.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_omp_clause_name): Add case for uses_allocators
clause.
(cp_parser_omp_clause_uses_allocators): New function.
(cp_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_USES_ALLOCATORS case.
(OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_USES_ALLOCATORS to mask.
* semantics.cc (finish_omp_clauses): Add case handling for
OMP_CLAUSE_USES_ALLOCATORS.

fortran/ChangeLog:

* gfortran.h (struct gfc_omp_namelist): Add memspace_sym, traits_sym
fields.
(OMP_LIST_USES_ALLOCATORS): New list enum.
* openmp.cc (enum omp_mask2): Add OMP_CLAUSE_USES_ALLOCATORS.
(gfc_match_omp_clause_uses_allocators): New function.
(gfc_match_omp_clauses): Add case to handle OMP_CLAUSE_USES_ALLOCATORS.
(OMP_TARGET_CLAUSES): Add OMP_CLAUSE_USES_ALLOCATORS.
(resolve_omp_clauses): Add "USES_ALLOCATORS" to clause_names[].
* trans-array.cc (gfc_conv_array_initializer): Adjust array index
to always be a created tree expression instead of NULL_TREE when zero.
* trans-openmp.cc (gfc_trans_omp_clauses): For ALLOCATE clause, handle
using gfc_trans_omp_variable for EXPR_VARIABLE exprs.
Add handling of OMP_LIST_USES_ALLOCATORS case.
* types.def (BT_FN_VOID_PTRMODE): Define.
(BT_FN_PTRMODE_PTRMODE_INT_PTR): Define.

gcc/ChangeLog:

* builtin-types.def (BT_FN_VOID_PTRMODE): Define.
(BT_FN_PTRMODE_PTRMODE_INT_PTR): Define.
* omp-builtins.def (BUILT_IN_OMP_INIT_ALLOCATOR): Define.
(BUILT_IN_OMP_DESTROY_ALLOCATOR): Define.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_USES_ALLOCATORS.
* tree-pretty-print.cc (dump_omp_clause): Handle 
OMP_CLAUSE_USES_ALLOCATORS.
* tree.h (OMP_CLAUSE_USES_ALLOCATORS_ALLOCATOR): New macro.
(OMP_CLAUSE_USES_ALLOCATORS_MEMSPACE): New macro.
(OMP_CLAUSE_USES_ALLOCATORS_TRAITS): New macro.
* tree.cc (omp_clause_num_ops): Add OMP_CLAUSE_USES_ALLOCATORS.
(omp_clause_code_name): Add "uses_allocators".

* gimplify.cc (gimplify_scan_omp_clauses): Add checking of OpenMP target
region allocate clauses, to require a uses_allocators clause to exist
for allocators.
(gimplify_omp_workshare): Add handling of OMP_CLAUSE_USES_ALLOCATORS
for OpenMP target regions; create calls of omp_init/destroy_allocator
around target region body.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/uses_allocators-1.c: New test.
* c-c++-common/gomp/uses_allocators-2.c: New test.
* gfortran.dg/gomp/uses_allocators-1.f90: New test.
* gfortran.dg/gomp/uses_allocators-2.f90: New test.
* gfortran.dg/gomp/uses_allocators-3.f90: New test.
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index 3a7cecdf087..be3e6ff697e 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -283,6 +283,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_DFLOAT32_DFLOAT32, BT_DFLOAT32, 
BT_DFLOAT32)
 DEF_FUNCTION_TYPE_1 (BT_FN_DFLOAT64_DFLOAT64, BT_DFLOAT64, BT_DFLOAT64)
 DEF_FUNCTION_TYPE_1 (BT_FN_DFLOAT128_DFLOAT128, BT_DFLOAT128, BT_DFLOAT128)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRMODE, BT_VOID, BT_PTRMODE)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR, BT_VOID, BT_PTR_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_CONST_PTR, BT_VOID, BT_CONST_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UI

Re: [PATCH] libstdc++: Update documentation about copyright and GPL notices in tests

2022-05-06 Thread Jonathan Wakely via Gcc-patches
I've pushed this to trunk now.

On Thu, 28 Apr 2022 at 18:02, Jonathan Wakely  wrote:
>
> On Thu, 28 Apr 2022 at 17:45, Koning, Paul via Libstdc++
>  wrote:
> >
> >
> >
> > > On Apr 28, 2022, at 8:37 AM, Jonathan Wakely via Gcc-patches 
> > >  wrote:
> > >
> > > I intend to commit this patch soon. This isn't changing the policy, just
> > > adjusting the docs to match the current policy.
> > >
> > > I'm open to suggestions for better ways to phrase the second sentence,
> > > clarifying that our tests generally have nothing novel or "authored".
> > >
> > > -- >8 --
> > >
> > > There is no need to require FSF copyright for tests that are just
> > > "self-evident" ways to check the API and behaviour of the library.
> > > This is consistent with tests for the compiler, which do not have
> > > copyright and licence notices either.
> >
> > So is the theory that "self-evident" documents are in the public domain for 
> > that reason?
>
> Yes.
>
> Let's look at a test I added this week:
> libstdc++-v3/testsuite/30_threads/packaged_task/cons/deduction.cc
> It has a copyright notice because (as I said in the commit log) it was
> copied from an existing test that has one. But what part of that file
> constituted original authorship? That code does nothing useful, it
> doesn't even link. All it does is construct objects and verify that
> the compiler deduced the correct type, which verifies that the library
> has defined the deduction guides correctly.
>
> Let's look at another one:
> libstdc++-v3/testsuite/20_util/unique_ptr/comparison/constexpr.cc
> What part of this is copyrightable? Is it where I create some
> variables, or performs a series of repetitive and redundant
> comparisons on them, or both?
> This could almost be machine generated, and I assert that it's not
> meaningful or useful or sensible to consider it as a copyrighted work.
> So I didn't bother putting the notices on this one.
>
> >  Or is the policy that for such file it is fine for the copyright to be 
> > held by the author (which is the default when no assignment is made)?  And 
> > a similar question applies to the license aspect also.
> >
> > I think I understand the intent, and that seems to make sense, but I'm 
> > wondering if it has been verified by the appropriate FSF IP lawyers.
>
> If there's a concern, why haven't they raised it for the compiler's
> own testsuite? Why should libstdc++ tests have copyright notices or
> GPL notices when gcc tests don't?
>
> I count 83 *.[cChm] files under gcc/testsuite with a GPL notice, out
> of some 64 THOUSAND files. The number with FSF copyright notices is
> around 1100, e.g. gcc/testsuite/gcc.target/sparc/ultrasp2.c is
> copyright FSF, but that seems ludicrous (yes, I know it says it's
> simplified from another file which is copyright FSF, but so what ... a
> left shift operation is not copyrightable).



[committed] libstdc++: Do not use #include inside push visibility scope [PR99871]

2022-05-06 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

PR libstdc++/99871
* include/bits/specfun.h: Use visibility attribute on namespace,
instead of pragma push/pop.
* libsupc++/compare: Likewise.
* libsupc++/exception: Likewise.
* libsupc++/exception.h: Likewise.
* libsupc++/exception_ptr.h: Likewise.
* libsupc++/initializer_list: Likewise.
* libsupc++/nested_exception.h: Likewise.
---
 libstdc++-v3/include/bits/specfun.h   | 4 
 libstdc++-v3/libsupc++/compare| 6 +-
 libstdc++-v3/libsupc++/exception  | 8 ++--
 libstdc++-v3/libsupc++/exception.h| 6 +-
 libstdc++-v3/libsupc++/exception_ptr.h| 6 +-
 libstdc++-v3/libsupc++/initializer_list   | 6 +-
 libstdc++-v3/libsupc++/nested_exception.h | 8 +---
 7 files changed, 7 insertions(+), 37 deletions(-)

diff --git a/libstdc++-v3/include/bits/specfun.h 
b/libstdc++-v3/include/bits/specfun.h
index cac350217b8..a12b04b3ea7 100644
--- a/libstdc++-v3/include/bits/specfun.h
+++ b/libstdc++-v3/include/bits/specfun.h
@@ -30,8 +30,6 @@
 #ifndef _GLIBCXX_BITS_SPECFUN_H
 #define _GLIBCXX_BITS_SPECFUN_H 1
 
-#pragma GCC visibility push(default)
-
 #include 
 
 #define __STDCPP_MATH_SPEC_FUNCS__ 201003L
@@ -1385,6 +1383,4 @@ _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace __gnu_cxx
 #endif // __STRICT_ANSI__
 
-#pragma GCC visibility pop
-
 #endif // _GLIBCXX_BITS_SPECFUN_H
diff --git a/libstdc++-v3/libsupc++/compare b/libstdc++-v3/libsupc++/compare
index e9cf9139def..066867e9ce1 100644
--- a/libstdc++-v3/libsupc++/compare
+++ b/libstdc++-v3/libsupc++/compare
@@ -34,15 +34,13 @@
 
 #if __cplusplus > 201703L && __cpp_impl_three_way_comparison >= 201907L
 
-#pragma GCC visibility push(default)
-
 #include 
 
 #if __cpp_lib_concepts
 # define __cpp_lib_three_way_comparison 201907L
 #endif
 
-namespace std
+namespace std _GLIBCXX_VISIBILITY(default)
 {
   // [cmp.categories], comparison category types
 
@@ -1239,8 +1237,6 @@ namespace std
 #endif // concepts
 } // namespace std
 
-#pragma GCC visibility pop
-
 #endif // C++20
 
 #endif // _COMPARE
diff --git a/libstdc++-v3/libsupc++/exception b/libstdc++-v3/libsupc++/exception
index ae2b0dd7f78..24c91292a16 100644
--- a/libstdc++-v3/libsupc++/exception
+++ b/libstdc++-v3/libsupc++/exception
@@ -32,14 +32,12 @@
 
 #pragma GCC system_header
 
-#pragma GCC visibility push(default)
-
 #include 
 #include 
 
 extern "C++" {
 
-namespace std
+namespace std _GLIBCXX_VISIBILITY(default)
 {
   /** @addtogroup exceptions
*  @{
@@ -135,7 +133,7 @@ namespace std
   /// @} group exceptions
 } // namespace std
 
-namespace __gnu_cxx
+namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
@@ -162,8 +160,6 @@ _GLIBCXX_END_NAMESPACE_VERSION
 
 } // extern "C++"
 
-#pragma GCC visibility pop
-
 #if (__cplusplus >= 201103L)
 #include 
 #include 
diff --git a/libstdc++-v3/libsupc++/exception.h 
b/libstdc++-v3/libsupc++/exception.h
index 7d905184c64..1df02bddc1b 100644
--- a/libstdc++-v3/libsupc++/exception.h
+++ b/libstdc++-v3/libsupc++/exception.h
@@ -33,13 +33,11 @@
 
 #pragma GCC system_header
 
-#pragma GCC visibility push(default)
-
 #include 
 
 extern "C++" {
 
-namespace std
+namespace std _GLIBCXX_VISIBILITY(default)
 {
   /**
* @defgroup exceptions Exceptions
@@ -82,6 +80,4 @@ namespace std
 
 }
 
-#pragma GCC visibility pop
-
 #endif
diff --git a/libstdc++-v3/libsupc++/exception_ptr.h 
b/libstdc++-v3/libsupc++/exception_ptr.h
index 6433f059e9c..21c53f686ba 100644
--- a/libstdc++-v3/libsupc++/exception_ptr.h
+++ b/libstdc++-v3/libsupc++/exception_ptr.h
@@ -31,8 +31,6 @@
 #ifndef _EXCEPTION_PTR_H
 #define _EXCEPTION_PTR_H
 
-#pragma GCC visibility push(default)
-
 #include 
 #include 
 #include 
@@ -51,7 +49,7 @@
 
 extern "C++" {
 
-namespace std 
+namespace std _GLIBCXX_VISIBILITY(default)
 {
   class type_info;
 
@@ -283,6 +281,4 @@ namespace std
 
 } // extern "C++"
 
-#pragma GCC visibility pop
-
 #endif
diff --git a/libstdc++-v3/libsupc++/initializer_list 
b/libstdc++-v3/libsupc++/initializer_list
index 79d32b219b4..efa1f164cde 100644
--- a/libstdc++-v3/libsupc++/initializer_list
+++ b/libstdc++-v3/libsupc++/initializer_list
@@ -36,11 +36,9 @@
 # include 
 #else // C++0x
 
-#pragma GCC visibility push(default)
-
 #include 
 
-namespace std
+namespace std _GLIBCXX_VISIBILITY(default)
 {
   /// initializer_list
   template
@@ -102,8 +100,6 @@ namespace std
 { return __ils.end(); }
 }
 
-#pragma GCC visibility pop
-
 #endif // C++11
 
 #endif // _INITIALIZER_LIST
diff --git a/libstdc++-v3/libsupc++/nested_exception.h 
b/libstdc++-v3/libsupc++/nested_exception.h
index 6f0d5399708..002a54e9fef 100644
--- a/libstdc++-v3/libsupc++/nested_exception.h
+++ b/libstdc++-v3/libsupc++/nested_exception.h
@@ -30,18 +30,15 @@
 #ifndef _GLIBCXX_NESTED_EXCEPTION_H
 #define _GLIBCXX_NESTED_EXCEPTION_H 1
 
-#pragma GCC visibility push(default)
-
 #if __

[committed] libstdc++: Do not include in

2022-05-06 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.

-- >8 --

This avoids polluting the global namespace with the "abi" namespace
alias.

libstdc++-v3/ChangeLog:

* include/std/stacktrace: Do not include .
(__cxa_demangle): Declare.
---
 libstdc++-v3/include/std/stacktrace | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 98ce9231150..8e6c79a2f4f 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -38,7 +38,6 @@
 #include 
 #include 
 #include 
-#include 
 
 struct __glibcxx_backtrace_state;
 struct __glibcxx_backtrace_simple_data;
@@ -70,6 +69,13 @@ __glibcxx_backtrace_syminfo(__glibcxx_backtrace_state*, 
uintptr_t addr,
void*);
 }
 
+namespace __cxxabiv1
+{
+  extern "C" char*
+  __cxa_demangle(const char* __mangled_name, char* __output_buffer,
+size_t* __length, int* __status);
+}
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
-- 
2.34.1



[committed] libstdc++: Fix test that fails on Solaris [PR104731]

2022-05-06 Thread Jonathan Wakely via Gcc-patches
On Solaris the dirent::d_name member is a single char, causing this test
to fail with warnings about buffer overflow. Change the test to use a
union with additional space for writing a string to the d_name member.

libstdc++-v3/ChangeLog:

PR libstdc++/104731
* testsuite/27_io/filesystem/iterators/error_reporting.cc:
Use a trailing char array as storage for dirent::d_name.
---
 .../filesystem/iterators/error_reporting.cc   | 35 ---
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git 
a/libstdc++-v3/testsuite/27_io/filesystem/iterators/error_reporting.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/iterators/error_reporting.cc
index 1f297a731a3..1c8ea4c9249 100644
--- a/libstdc++-v3/testsuite/27_io/filesystem/iterators/error_reporting.cc
+++ b/libstdc++-v3/testsuite/27_io/filesystem/iterators/error_reporting.cc
@@ -28,35 +28,44 @@
 
 int choice;
 
-struct dirent global_dirent;
-
 extern "C" struct dirent* readdir(DIR*)
 {
+  // On some targets dirent::d_name is very small, but the OS allocates
+  // a trailing char array after the dirent struct. Emulate that here.
+  union State
+  {
+struct dirent d;
+char buf[sizeof(struct dirent) + 16] = {};
+  };
+
+  static State state;
+  char* d_name = state.buf + offsetof(struct dirent, d_name);
+
   switch (choice)
   {
   case 1:
-global_dirent.d_ino = 999;
+state.d.d_ino = 999;
 #if defined _GLIBCXX_HAVE_STRUCT_DIRENT_D_TYPE && defined DT_REG
-global_dirent.d_type = DT_REG;
+state.d.d_type = DT_REG;
 #endif
-global_dirent.d_reclen = 0;
-std::char_traits::copy(global_dirent.d_name, "file", 5);
+state.d.d_reclen = 0;
+std::char_traits::copy(d_name, "file", 5);
 choice = 0;
-return &global_dirent;
+return &state.d;
   case 2:
-global_dirent.d_ino = 111;
+state.d.d_ino = 111;
 #if defined _GLIBCXX_HAVE_STRUCT_DIRENT_D_TYPE && defined DT_DIR
-global_dirent.d_type = DT_DIR;
+state.d.d_type = DT_DIR;
 #endif
-global_dirent.d_reclen = 60;
-std::char_traits::copy(global_dirent.d_name, "subdir", 7);
+state.d.d_reclen = 60;
+std::char_traits::copy(d_name, "subdir", 7);
 choice = 1;
-return &global_dirent;
+return &state.d;
   default:
 errno = EIO;
 return nullptr;
   }
-  return &global_dirent;
+  return &state.d;
 }
 
 void
-- 
2.34.1



[patch][gcc13][i386][pr101891]Adjust -fzero-call-used-regs to always use XOR

2022-05-06 Thread Qing Zhao via Gcc-patches
Hi,

As Kee’s requested in this PR: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101891

=

Currently -fzero-call-used-regs will use a pattern of:

XOR regA,regA
MOV regA,regB
MOV regA,regC
...
RET

However, this introduces both a register ordering dependency (e.g. the CPU 
cannot clear regB without clearing regA first), and while greatly reduces 
available ROP gadgets, it does technically leave a set of "MOV" ROP gadgets at 
the end of functions (e.g. "MOV regA,regC; RET").

Please switch to always using XOR:

XOR regA,regA
XOR regB,regB
XOR regC,regC
...
RET

===

This patch switch all MOV to XOR on i386.

Bootstrapped and regresstion tested on x86_64-linux-gnu. 

Okay for gcc13? 

Thanks.

Qing

==



0001-change-mov-to-xor-fix-all-testing-cases.patch
Description: 0001-change-mov-to-xor-fix-all-testing-cases.patch


Re: [PATCH] libsanitizer: cherry-pick commit b226894d475b from upstream

2022-05-06 Thread H.J. Lu via Gcc-patches
On Thu, May 5, 2022 at 2:02 PM H.J. Lu  wrote:
>
> cherry-pick:
>
> b226894d475b [sanitizer] [sanitizer] Correct GetTls for x32
> ---
>  libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp 
> b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
> index d966d857a76..620267cdd02 100644
> --- a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
> +++ b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
> @@ -462,7 +462,11 @@ static void GetTls(uptr *addr, uptr *size) {
>  #elif SANITIZER_GLIBC && defined(__x86_64__)
>// For aarch64 and x86-64, use an O(1) approach which requires relatively
>// precise ThreadDescriptorSize. g_tls_size was initialized in InitTlsSize.
> +#  if SANITIZER_X32
> +  asm("mov %%fs:8,%0" : "=r"(*addr));
> +#  else
>asm("mov %%fs:16,%0" : "=r"(*addr));
> +#  endif
>*size = g_tls_size;
>*addr -= *size;
>*addr += ThreadDescriptorSize();
> --
> 2.35.1
>

I am backporting this to GCC 12.

-- 
H.J.


Re: [PATCH] x86: Add missing .note.GNU-stack to assembly source

2022-05-06 Thread H.J. Lu via Gcc-patches
On Mon, May 2, 2022 at 11:37 AM H.J. Lu  wrote:
>
> On Fri, Apr 29, 2022 at 10:38 AM H.J. Lu  wrote:
> >
> > Add .note.GNU-stack assembly source to avoid linker warning:
> >
> > ld: warning: /tmp/ccPZSZ7Z.o: missing .note.GNU-stack section implies 
> > executable stack
> > ld: NOTE: This behaviour is deprecated and will be removed in a future 
> > version of the linker
> > FAIL: gcc.target/i386/iamcu/test_3_element_struct_and_unions.c compilation, 
> >  -O0
> >
> > PR testsuite/105433
> > * gcc.target/i386/iamcu/asm-support.S: Add .note.GNU-stack.
> > * gcc.target/x86_64/abi/asm-support.S: Likewise.
> > * gcc.target/x86_64/abi/avx/asm-support.S: Likewise.
> > * gcc.target/x86_64/abi/avx512f/asm-support.S: Likewise.
> > * gcc.target/x86_64/abi/avx512fp16/asm-support.S: Likewise.
> > * gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S: Likewise.
> > * gcc.target/x86_64/abi/avx512fp16/m512h/asm-support.S: Likewise.
> > * gcc.target/x86_64/abi/ms-sysv/do-test.S: Likewise.
> > ---
> >  gcc/testsuite/gcc.target/i386/iamcu/asm-support.S| 1 +
> >  gcc/testsuite/gcc.target/x86_64/abi/asm-support.S| 1 +
> >  gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S| 1 +
> >  gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S| 1 +
> >  gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/asm-support.S | 1 +
> >  .../gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S | 1 +
> >  .../gcc.target/x86_64/abi/avx512fp16/m512h/asm-support.S | 1 +
> >  gcc/testsuite/gcc.target/x86_64/abi/ms-sysv/do-test.S| 1 +
> >  8 files changed, 8 insertions(+)
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/iamcu/asm-support.S 
> > b/gcc/testsuite/gcc.target/i386/iamcu/asm-support.S
> > index b4a4a140e54..db08f52a34f 100644
> > --- a/gcc/testsuite/gcc.target/i386/iamcu/asm-support.S
> > +++ b/gcc/testsuite/gcc.target/i386/iamcu/asm-support.S
> > @@ -300,3 +300,4 @@ iamcu_noprintf:
> > .align 4
> >  .LCiamcu_noprintf1:
> > .long   1132527616
> > +   .section.note.GNU-stack,"",@progbits
> > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/asm-support.S 
> > b/gcc/testsuite/gcc.target/x86_64/abi/asm-support.S
> > index 7a8ed03d119..2f8d3a09c6b 100644
> > --- a/gcc/testsuite/gcc.target/x86_64/abi/asm-support.S
> > +++ b/gcc/testsuite/gcc.target/x86_64/abi/asm-support.S
> > @@ -82,3 +82,4 @@ snapshot_ret:
> > .comm   xmm_regs,256,32
> > .comm   x87_regs,128,32
> > .comm   volatile_var,8,8
> > +   .section.note.GNU-stack,"",@progbits
> > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S 
> > b/gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S
> > index 73a59191d6d..77b3480ac32 100644
> > --- a/gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S
> > +++ b/gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S
> > @@ -79,3 +79,4 @@ snapshot_ret:
> > .comm   ymm_regs,512,32
> > .comm   x87_regs,128,32
> > .comm   volatile_var,8,8
> > +   .section.note.GNU-stack,"",@progbits
> > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S 
> > b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S
> > index 0ef82876dd9..2e3306c44cb 100644
> > --- a/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S
> > +++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512f/asm-support.S
> > @@ -95,3 +95,4 @@ snapshot_ret:
> > .comm   zmm_regs,2048,64
> > .comm   x87_regs,128,32
> > .comm   volatile_var,8,8
> > +   .section.note.GNU-stack,"",@progbits
> > diff --git a/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/asm-support.S 
> > b/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/asm-support.S
> > index 7849acd2649..0793acf048b 100644
> > --- a/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/asm-support.S
> > +++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/asm-support.S
> > @@ -79,3 +79,4 @@ snapshot_ret:
> > .comm   xmm_regs,256,32
> > .comm   x87_regs,128,32
> > .comm   volatile_var,8,8
> > +   .section.note.GNU-stack,"",@progbits
> > diff --git 
> > a/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S 
> > b/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S
> > index 73a59191d6d..77b3480ac32 100644
> > --- a/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S
> > +++ b/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S
> > @@ -79,3 +79,4 @@ snapshot_ret:
> > .comm   ymm_regs,512,32
> > .comm   x87_regs,128,32
> > .comm   volatile_var,8,8
> > +   .section.note.GNU-stack,"",@progbits
> > diff --git 
> > a/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/asm-support.S 
> > b/gcc/testsuite/gcc.target/x86_64/abi/avx512fp16/m512h/asm-support.S
> > index 0ef82876dd9..2e3306c44cb 100644
> > --- a/gc

Re: [PATCH] lto-plugin: add support for feature detection

2022-05-06 Thread Alexander Monakov



On Thu, 5 May 2022, Martin Liška wrote:

> On 5/5/22 12:52, Alexander Monakov wrote:
> > Feels a bit weird to ask, but before entertaining such an API extension,
> > can we step back and understand the v3 variant of get_symbols? It is not
> > documented, and from what little I saw I did not get the "motivation" for
> > its existence (what it is doing that couldn't be done with the v2 api).
> 
> Please see here:
> https://github.com/rui314/mold/issues/181#issuecomment-1037927757

Thanks. I've also re-read [1] and [2] which provided some relevant ideas.

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86490
[2] https://sourceware.org/bugzilla/show_bug.cgi?id=23411


OK, so the crux of the issue is that sometimes the linker needs to feed the
compiler plugin with LTO .o files extracted from static archives. This is
not really obvious, because normally .a archives have an index that enumerates
symbols defined/used by its .o files, and even during LTO the linker can simply
consult the index to find out which members to extract.  In theory, at least.

The theory breaks in the following cases:

 - ld.bfd and common symbols (I wonder if weak/comdat code is also affected?):
 archive index does not indicate which definitions are common, so ld.bfd
 extracts the member and feeds it to the plugin to find out;

 - ld.gold and emulated archives via --start-lib a.o b.o ... --end-lib: here
 there's no index to consult and ld.gold feeds each .o to the plugin.

In those cases it may happen that the linker extracts an .o file that would
not be extracted during non-LTO link, and if that happens, the linker needs to
inform the plugin. This is not the same as marking each symbol from spuriously
extracted .o file as PREEMPTED when the .o file has constructors (the plugin
will assume the constructors are kept while the linker needs to discard them).

So get_symbols_v3 allows the linker to discard an LTO .o file to solve this.

In absence of get_symbols_v3 mold tries to ensure correctness by restarting
itself while appending a list of .o files to be discarded to its command line.

I wonder if mold can invoke plugin cleanup callback to solve this without
restarting.

(also, hm, it seems to confirm my idea that LTO .o files should have had the
correct symbol table so normal linker algorithms would work)

Hopefully this was useful.
Alexander


Re: [PATCH] x86: Add missing .note.GNU-stack to assembly source

2022-05-06 Thread Rainer Orth
Hi H.J,

> On Mon, May 2, 2022 at 11:37 AM H.J. Lu  wrote:
>>
>> On Fri, Apr 29, 2022 at 10:38 AM H.J. Lu  wrote:
>> >
>> > Add .note.GNU-stack assembly source to avoid linker warning:
>> >
>> > ld: warning: /tmp/ccPZSZ7Z.o: missing .note.GNU-stack section implies 
>> > executable stack
>> > ld: NOTE: This behaviour is deprecated and will be removed in a future 
>> > version of the linker
>> > FAIL: gcc.target/i386/iamcu/test_3_element_struct_and_unions.c 
>> > compilation,  -O0
>> >
>> > PR testsuite/105433
>> > * gcc.target/i386/iamcu/asm-support.S: Add .note.GNU-stack.
>> > * gcc.target/x86_64/abi/asm-support.S: Likewise.
>> > * gcc.target/x86_64/abi/avx/asm-support.S: Likewise.
>> > * gcc.target/x86_64/abi/avx512f/asm-support.S: Likewise.
>> > * gcc.target/x86_64/abi/avx512fp16/asm-support.S: Likewise.
>> > * gcc.target/x86_64/abi/avx512fp16/m256h/asm-support.S: Likewise.
>> > * gcc.target/x86_64/abi/avx512fp16/m512h/asm-support.S: Likewise.
>> > * gcc.target/x86_64/abi/ms-sysv/do-test.S: Likewise.
[...]
> I am backporting this to release branches.

Wait: please fix PR target/105472 first rather than causing massive
breakage on release branches, too.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[PATCH] aarch64: remove useless GPF_TF_F16 iterator

2022-05-06 Thread Christophe Lyon via Gcc-patches
This iterator is not used anymore and is a leftover from previous
cleanup (r10-5890-gabbe1ed2735517).

2022-04-28  Christophe Lyon  

gcc/
* config/aarch64/iterators.md (GPF_TF_F16): Delete.
---
 gcc/config/aarch64/iterators.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e72fdf35a82..88af964de7e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -63,9 +63,6 @@ (define_mode_iterator GPF_HF [HF SF DF])
 ;; Iterator for all 16-bit scalar floating point modes (HF, BF)
 (define_mode_iterator HFBF [HF BF])
 
-;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
-(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
-
 ;; Iterator for all scalar floating point modes suitable for moving, including
 ;; special BF type (HF, SF, DF, TF and BF)
 (define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
-- 
2.25.1



[PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Patrick Palka via Gcc-patches
Here ever since r10-7313-gb599bf9d6d1e18, reduced_constant_expression_p
in C++11/14 is rejecting the marked sub-aggregate initializer (of type S)

  W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
 ^

ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set, and
so the function proceeds to verify that all fields of S are initialized.
And before C++17 we don't expect to see base class fields (since
next_initializable_field skips over the), so the base class initializer
causes r_c_e_p to return false.  The base class initializer comes from
the constructor call S::S(int).

The reason this is caused by r10-7313-gb599bf9d6d1e18 is because in that
commit we began using CONSTRUCTOR_NO_CLEARING to also track whether we're
in middle of activating a union member.  This overloaded use of the flag
affects clear_no_implicit_zero, which recurses into sub-aggregate
initializers only if the outer initializer has CONSTRUCTOR_NO_CLEARING
set.  After that commit, the outer union initializer above no longer has
the flag set at this point and so clear_no_implicit_zero no longer clears
CONSTRUCTOR_NO_CLEARING for the marked inner initializer.

This patch fixes this by restoring the recursive behavior of
clear_no_implicit_zero for union initializers.  Arguably we should
we could improve reduced_constant_expression_p to accept the marked
initializer in C++11/14 even if it has CONSTRUCTOR_NO_CLEARING set, but
adjusting clear_no_implicit_zero seems safer to backport.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/12/11/10?

PR c++/105491

gcc/cp/ChangeLog:

* constexpr.cc (clear_no_implicit_zero): Recurse into a union
initializer even if CONSTRUCTOR_NO_CLEARING is already cleared.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-union7.C: New test.
* g++.dg/cpp0x/constexpr-union7a.C: New test.
* g++.dg/cpp2a/constinit17.C: New test.
---
 gcc/cp/constexpr.cc   |  7 +-
 gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C | 17 +
 .../g++.dg/cpp0x/constexpr-union7a.C  | 15 
 gcc/testsuite/g++.dg/cpp2a/constinit17.C  | 24 +++
 4 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constinit17.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 9b1e71857fc..75fecbcbcb7 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1886,7 +1886,12 @@ cxx_eval_internal_function (const constexpr_ctx *ctx, 
tree t,
 static void
 clear_no_implicit_zero (tree ctor)
 {
-  if (CONSTRUCTOR_NO_CLEARING (ctor))
+  if (CONSTRUCTOR_NO_CLEARING (ctor)
+  /* For a union initializer, the flag could already be cleared but not
+necessarily yet for its sub-aggregates, since for unions the flag is
+also used by cxx_eval_store_expression to track whether we're in the
+middle of activating one of its members.  */
+  || TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE)
 {
   CONSTRUCTOR_NO_CLEARING (ctor) = false;
   for (auto &e: CONSTRUCTOR_ELTS (ctor))
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
new file mode 100644
index 000..b3147d9db50
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
@@ -0,0 +1,17 @@
+// PR c++/105491
+// { dg-do compile { target c++11 } }
+
+struct V {
+  int m = 0;
+};
+struct S : V {
+  constexpr S(int) : b() { }
+  bool b;
+};
+struct W {
+  constexpr W() : s(0) { }
+  union {
+S s;
+  };
+};
+constexpr W w;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
new file mode 100644
index 000..b676e7d1748
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
@@ -0,0 +1,15 @@
+// PR c++/105491
+// { dg-do compile { target c++11 } }
+
+struct V {
+  int m = 0;
+};
+struct S : V {
+  constexpr S(int) : b() { }
+  bool b;
+};
+union W {
+  constexpr W() : s(0) { }
+  S s;
+};
+constexpr W w;
diff --git a/gcc/testsuite/g++.dg/cpp2a/constinit17.C 
b/gcc/testsuite/g++.dg/cpp2a/constinit17.C
new file mode 100644
index 000..6431654ac85
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/constinit17.C
@@ -0,0 +1,24 @@
+// PR c++/105491
+// { dg-do compile { target c++11 } }
+
+class Message {
+  virtual int GetMetadata();
+};
+class ProtobufCFileOptions : Message {
+public:
+  constexpr ProtobufCFileOptions(int);
+  bool no_generate_;
+  bool const_strings_;
+  bool use_oneof_field_name_;
+  bool gen_pack_helpers_;
+  bool gen_init_helpers_;
+};
+constexpr ProtobufCFileOptions::ProtobufCFileOptions(int)
+: no_generate_(), const_strings_(), use_oneof_field_name_(),
+  gen_pack_helpers_(), gen_init_helpers_() {}
+struct ProtobufCFileOptionsDefaultTypeInternal {
+  con

RE: [PATCH] aarch64: remove useless GPF_TF_F16 iterator

2022-05-06 Thread Kyrylo Tkachov via Gcc-patches



> -Original Message-
> From: Gcc-patches  bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Christophe
> Lyon via Gcc-patches
> Sent: Friday, May 6, 2022 4:19 PM
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH] aarch64: remove useless GPF_TF_F16 iterator
> 
> This iterator is not used anymore and is a leftover from previous
> cleanup (r10-5890-gabbe1ed2735517).

Ok. This would count as obvious, in my opinion.
Thanks,
Kyrill

> 
> 2022-04-28  Christophe Lyon  
> 
>   gcc/
>   * config/aarch64/iterators.md (GPF_TF_F16): Delete.
> ---
>  gcc/config/aarch64/iterators.md | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md
> index e72fdf35a82..88af964de7e 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -63,9 +63,6 @@ (define_mode_iterator GPF_HF [HF SF DF])
>  ;; Iterator for all 16-bit scalar floating point modes (HF, BF)
>  (define_mode_iterator HFBF [HF BF])
> 
> -;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
> -(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
> -
>  ;; Iterator for all scalar floating point modes suitable for moving, 
> including
>  ;; special BF type (HF, SF, DF, TF and BF)
>  (define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
> --
> 2.25.1



[PATCH v2] Enable match.pd dumping with -fdump-tree-original

2022-05-06 Thread Alex Coplan via Gcc-patches
This is a respin of:
https://gcc.gnu.org/pipermail/gcc-patches/2022-March/592307.html
that implements Richard's suggestion around the cgraph.cc change.
Otherwise the patch is as before.

Bootstrapped and regtested on aarch64-linux-gnu, OK for trunk?

Thanks,
Alex

--

I noticed that, while the C/C++ frontends invoke the GENERIC match.pd
simplifications to do early folding, the debug output from
generic-match.cc does not appear in the -fdump-tree-original output,
even with -fdump-tree-original-folding or -fdump-tree-original-all. This
patch fixes that.

For example, before the patch, for the following code:

int a[2];
void bar ();
void f()
{
if ((unsigned long)(a + 1) == 0)
bar ();
}

on AArch64 at -O0, -fdump-tree-original-all would give:

;; Function f (null)
;; enabled by -tree-original


{
  if (0)
{
  bar ();
}
}

After the patch, we get:

Applying pattern match.pd:3774, generic-match.cc:24535
Matching expression match.pd:146, generic-match.cc:23
Applying pattern match.pd:5638, generic-match.cc:13388

;; Function f (null)
;; enabled by -tree-original


{
  if (0)
{
  bar ();
}
}

The reason we don't get the match.pd output as it stands, is that the
original dump is treated specially in c-opts.cc: it gets its own state
which is independent from that used by other dump files in the compiler.
Like most of the compiler, the generated generic-match.cc has code of
the form:

  if (dump_file && (dump_flags & TDF_FOLDING))
fprintf (dump_file, ...);

But, as it stands, -fdump-tree-original has its own FILE * and flags in
c-opts.cc (original_dump_{file,flags}) and never touches the global
dump_{file,flags} (managed by dumpfile.{h,cc}). This patch adjusts the
code in c-opts.cc to use the main dump infrastructure used by the rest
of the compiler, instead of treating the original dump specially.

We take the opportunity to make a small refactor: the code in
c-gimplify.cc:c_genericize can, with this change, use the global dump
infrastructure to get the original dump file and flags instead of using
the bespoke get_dump_info function implemented in c-opts.cc. With this
change, we remove the only use of get_dump_info, so this can be removed.

Note that we also fix a leak of the original dump file in
c_common_parse_file. I originally thought it might be possible to
achieve this with only one static call to dump_finish () (by simply
moving it earlier in the loop), but unfortunately the dump file is
required to be open while c_parse_final_cleanups runs, as we (e.g.)
perform some template instantiations here for C++, which need to appear
in the original dump file.

We adjust cgraph_node::get_create to avoid introducing noise in the
original dump file: without this, these "Introduced new external node"
lines start appearing in the original dump files, which breaks tests
that do a scan-tree-dump-times on the original dump looking for a
certain function name.

gcc/c-family/ChangeLog:

* c-common.h (get_dump_info): Delete.
* c-gimplify.cc (c_genericize): Get TDI_original dump file info
from the global dump_manager instead of the (now obsolete)
get_dump_info.
* c-opts.cc (original_dump_file): Delete.
(original_dump_flags): Delete.
(c_common_parse_file): Switch to using global dump_manager to
manage the original dump file; fix leak of dump file.
(get_dump_info): Delete.

gcc/ChangeLog:

* cgraph.cc (cgraph_node::get_create): Don't dump if the current
symtab state is PARSING.
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 52a85bfb783..b829cdbfe28 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -950,7 +950,6 @@ extern bool c_common_post_options (const char **);
 extern bool c_common_init (void);
 extern void c_common_finish (void);
 extern void c_common_parse_file (void);
-extern FILE *get_dump_info (int, dump_flags_t *);
 extern alias_set_type c_common_get_alias_set (tree);
 extern void c_register_builtin_type (tree, const char*);
 extern bool c_promoting_integer_type_p (const_tree);
diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
index a00b0a02dcc..a6f26c9b0d3 100644
--- a/gcc/c-family/c-gimplify.cc
+++ b/gcc/c-family/c-gimplify.cc
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "dumpfile.h"
 #include "c-ubsan.h"
 #include "tree-nested.h"
+#include "context.h"
 
 /*  The gimplification pass converts the language-dependent trees
 (ld-trees) emitted by the parser into language-independent trees
@@ -552,6 +553,7 @@ c_genericize_control_r (tree *stmt_p, int *walk_subtrees, 
void *data)
 void
 c_genericize (tree fndecl)
 {
+  dump_file_info *dfi;
   FILE *dump_orig;
   dump_flags_t local_dump_flags;
   struct cgraph_node *cgn;
@@ -581,7 +583,9 @@ c_genericize (tree fndecl)
  do_warn_duplicated_branches_r, NULL);
 
   /* Dump the C-specific tree IR.  */
-  dump_orig = get_dum

Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Jason Merrill via Gcc-patches

On 5/6/22 11:22, Patrick Palka wrote:

Here ever since r10-7313-gb599bf9d6d1e18, reduced_constant_expression_p
in C++11/14 is rejecting the marked sub-aggregate initializer (of type S)

   W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
  ^

ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set, and
so the function proceeds to verify that all fields of S are initialized.
And before C++17 we don't expect to see base class fields (since
next_initializable_field skips over the), so the base class initializer
causes r_c_e_p to return false.


That seems like the primary bug.  I guess r_c_e_p shouldn't be using 
next_initializable_field.  Really that function should only be used for 
aggregates.



The base class initializer comes from
the constructor call S::S(int).

The reason this is caused by r10-7313-gb599bf9d6d1e18 is because in that
commit we began using CONSTRUCTOR_NO_CLEARING to also track whether we're
in middle of activating a union member.  This overloaded use of the flag
affects clear_no_implicit_zero, which recurses into sub-aggregate
initializers only if the outer initializer has CONSTRUCTOR_NO_CLEARING
set.


Is that really overloaded?  In both union and non-union cases, it 
indicates that the object is not completely initialized.



After that commit, the outer union initializer above no longer has
the flag set at this point and so clear_no_implicit_zero no longer clears
CONSTRUCTOR_NO_CLEARING for the marked inner initializer.


Why wasn't it cleared for the inner initializer as part of that evaluation?


This patch fixes this by restoring the recursive behavior of
clear_no_implicit_zero for union initializers.  Arguably we should
we could improve reduced_constant_expression_p to accept the marked
initializer in C++11/14 even if it has CONSTRUCTOR_NO_CLEARING set, but
adjusting clear_no_implicit_zero seems safer to backport.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/12/11/10?

PR c++/105491

gcc/cp/ChangeLog:

* constexpr.cc (clear_no_implicit_zero): Recurse into a union
initializer even if CONSTRUCTOR_NO_CLEARING is already cleared.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-union7.C: New test.
* g++.dg/cpp0x/constexpr-union7a.C: New test.
* g++.dg/cpp2a/constinit17.C: New test.
---
  gcc/cp/constexpr.cc   |  7 +-
  gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C | 17 +
  .../g++.dg/cpp0x/constexpr-union7a.C  | 15 
  gcc/testsuite/g++.dg/cpp2a/constinit17.C  | 24 +++
  4 files changed, 62 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/constinit17.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 9b1e71857fc..75fecbcbcb7 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -1886,7 +1886,12 @@ cxx_eval_internal_function (const constexpr_ctx *ctx, 
tree t,
  static void
  clear_no_implicit_zero (tree ctor)
  {
-  if (CONSTRUCTOR_NO_CLEARING (ctor))
+  if (CONSTRUCTOR_NO_CLEARING (ctor)
+  /* For a union initializer, the flag could already be cleared but not
+necessarily yet for its sub-aggregates, since for unions the flag is
+also used by cxx_eval_store_expression to track whether we're in the
+middle of activating one of its members.  */
+  || TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE)
  {
CONSTRUCTOR_NO_CLEARING (ctor) = false;
for (auto &e: CONSTRUCTOR_ELTS (ctor))
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
new file mode 100644
index 000..b3147d9db50
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
@@ -0,0 +1,17 @@
+// PR c++/105491
+// { dg-do compile { target c++11 } }
+
+struct V {
+  int m = 0;
+};
+struct S : V {
+  constexpr S(int) : b() { }
+  bool b;
+};
+struct W {
+  constexpr W() : s(0) { }
+  union {
+S s;
+  };
+};
+constexpr W w;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
new file mode 100644
index 000..b676e7d1748
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
@@ -0,0 +1,15 @@
+// PR c++/105491
+// { dg-do compile { target c++11 } }
+
+struct V {
+  int m = 0;
+};
+struct S : V {
+  constexpr S(int) : b() { }
+  bool b;
+};
+union W {
+  constexpr W() : s(0) { }
+  S s;
+};
+constexpr W w;
diff --git a/gcc/testsuite/g++.dg/cpp2a/constinit17.C 
b/gcc/testsuite/g++.dg/cpp2a/constinit17.C
new file mode 100644
index 000..6431654ac85
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/constinit17.C
@@ -0,0 +1,24 @@
+// PR c++/105491
+// { dg-do compile { target c++11 } }
+
+class Message {
+  virtual int GetMetadata();
+};
+class ProtobufCFileOptions : Mess

Re: [patch][gcc13][i386][pr101891]Adjust -fzero-call-used-regs to always use XOR

2022-05-06 Thread Uros Bizjak via Gcc-patches
On Fri, May 6, 2022 at 4:29 PM Qing Zhao  wrote:
>
> Hi,
>
> As Kee’s requested in this PR: 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101891
>
> =
>
> Currently -fzero-call-used-regs will use a pattern of:
>
> XOR regA,regA
> MOV regA,regB
> MOV regA,regC
> ...
> RET
>
> However, this introduces both a register ordering dependency (e.g. the CPU 
> cannot clear regB without clearing regA first), and while greatly reduces 
> available ROP gadgets, it does technically leave a set of "MOV" ROP gadgets 
> at the end of functions (e.g. "MOV regA,regC; RET").
>
> Please switch to always using XOR:
>
> XOR regA,regA
> XOR regB,regB
> XOR regC,regC
> ...
> RET
>
> ===
>
> This patch switch all MOV to XOR on i386.
>
> Bootstrapped and regresstion tested on x86_64-linux-gnu.
>
> Okay for gcc13?
>
> Thanks.
>
> Qing
>
> ==

> gcc/ChangeLog:
>
> * config/i386/i386.cc (zero_all_mm_registers): Use SET to zero instead
> of MOV for zeroing scratch registers.
> (ix86_zero_call_used_regs): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/zero-scratch-regs-1.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-10.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-13.c: Add -msse.
> * gcc.target/i386/zero-scratch-regs-14.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-15.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-16.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-17.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-18.c: Add -fno-stack-protector
> -fno-PIC, adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-19.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-2.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-20.c: Add -msse.
> * gcc.target/i386/zero-scratch-regs-21.c: Add -fno-stack-protector
> -fno-PIC, Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-22.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-23.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-26.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-27.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-28.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-3.c: Add -fno-stack-protector.
> * gcc.target/i386/zero-scratch-regs-31.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-4.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-5.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-6.c: Add -fno-stack-protector.
> * gcc.target/i386/zero-scratch-regs-7.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-8.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-9.c: Add -fno-stack-protector.

Please use something like the attached (functionally equivalent) patch
for the last hunk of your patch.

Also, if possible, please use V2SImode as a generic MMX mode instead
of V4HImode.

OK with the above changes.

Thanks,
Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b16df5b183e..87220278d33 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -3923,59 +3923,32 @@ ix86_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
 
   SET_HARD_REG_BIT (zeroed_hardregs, regno);
 
-  rtx reg, tmp, zero_rtx;
   machine_mode mode = zero_call_used_regno_mode (regno);
 
-  reg = gen_rtx_REG (mode, regno);
-  zero_rtx = CONST0_RTX (mode);
+  rtx reg = gen_rtx_REG (mode, regno);
+  rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
 
-  if (mode == SImode)
-   if (zero_gpr == NULL_RTX)
- {
-   zero_gpr = reg;
-   tmp = gen_rtx_SET (reg, zero_rtx);
-   if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
- {
-   rtx clob = gen_rtx_CLOBBER (VOIDmode,
-   gen_rtx_REG (CCmode,
-FLAGS_REG));
-   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
-tmp,
-clob));
- }
-   emit_insn (tmp);
- }
-   else
- emit_move_insn (reg, zero_gpr);
-  else if (mode == V4SFmode)
-   if (zero_vector == NULL_RTX)
- {
-   zero_vector = reg;
-   tmp = gen_rtx_SET (reg, zero_rtx);
-   emit_insn (tmp);
- }
-   else
- emit_move_insn (reg, zero_vector);
-  else if (mode == HImode)
-   if (zero_mask == NULL_RTX)
- {
-   zero_mask = reg;
-   tmp = gen_rtx_SET (reg, zero_rtx);
-   emit_insn (tmp);
- }
-   else
- emit_move_insn (reg, zero_mask);
-  else if (mode == V4HImode)
-   if (zero_mmx == NULL_RTX)
- {
-   zero_mmx = reg;
-   tmp = gen_rtx_SET (reg, zero_rtx);
-   emit_insn (tmp);
- }
-   else
-

Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Marek Polacek via Gcc-patches
On Fri, May 06, 2022 at 11:56:30AM -0400, Jason Merrill via Gcc-patches wrote:
> On 5/6/22 11:22, Patrick Palka wrote:
> > Here ever since r10-7313-gb599bf9d6d1e18, reduced_constant_expression_p
> > in C++11/14 is rejecting the marked sub-aggregate initializer (of type S)
> > 
> >W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> >   ^
> > 
> > ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set, and
> > so the function proceeds to verify that all fields of S are initialized.
> > And before C++17 we don't expect to see base class fields (since
> > next_initializable_field skips over the), so the base class initializer
> > causes r_c_e_p to return false.
> 
> That seems like the primary bug.  I guess r_c_e_p shouldn't be using
> next_initializable_field.  Really that function should only be used for
> aggregates.

Can we please make a note to this effect in the n_i_f comment?

Thanks,
Marek



Re: [PATCH v2] libstdc++: ppc: conditionalize vsx-only simd intrinsics

2022-05-06 Thread Segher Boessenkool
On Mon, May 02, 2022 at 11:00:02PM -0300, Alexandre Oliva wrote:
> On May  2, 2022, Segher Boessenkool  wrote:
> > Send full patches always please.
> 
> I'll try to remember that.  In case I fail, I hope you won't mind too
> much reminding me.
> 
> (You'd also asked me not to send patches as followups, but...  revised
> versions of a patch still belong in the same thread, right?)

No.  This makes it much harder to keep versions apart, even worse if you
use patchwork or similar.

The mail with the patch should be the head of a mail thread (or just
below the head if that is a cover mail), and everything below that is
discussion related to that patch.  The mail with the patch should be
ready to be committed as-is, so it should not have mangled whitespace
or encoding, should have proper commit message, etc.  This a) makes it
possible to review what will actualle be committed, and b) it makes it
possible for someone else to commit it (and that includes patch test
systems, often mistakenly called CI or CD, which are completely
different things).

> libstdc++'s bits/simd.h section for PPC (Altivec) defines various

(Spelling: PowerPC, AltiVec.  But this isn't about AltiVec really
anyway?)

> intrinsic vector types that are only available along with VSX: 64-bit
> long double, double, (un)signed long long, and 64-bit (un)signed long.

> +#if defined __VSX__ || __LONG_WIDTH__ == 32
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
> +#endif

Is __LONG_WIDTH__ the right macro to use here?  Nothing else in
libstdc++v3 uses it.  "__CHAR_BIT__ * __SIZEOF_LONG__" is the usual
thing to do.  Is __LONG_WIDTH__ always defined anyway?

> @@ -2450,10 +2456,11 @@ template 
>  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
>  // allow _Tp == long double with -mlong-double-64
>  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
> -   "no __intrinsic_type support for long double on PPC");
> +   "no __intrinsic_type support for 128-bit floating point on 
> PPC");

You might do s/PPC/PowerPC/ at the same time.

Okay for trunk with __LONG_WIDTH__ dealt with.  Okay for the branches
a week or so after that.  Thanks!


Segher


Re: [PATCH, OpenMP] Implement uses_allocators clause for target regions

2022-05-06 Thread Tobias Burnus

Hi Chung-Lin,

thanks for the patch – and some comments from my side.

On 06.05.22 15:20, Chung-Lin Tang wrote:

For user defined allocator handles, this allows target regions to assign
memory space and traits to allocators, and automatically calls
omp_init/destroy_allocator() in the beginning/end of the target region.


Can please also handle the new clause in Fortran's dump-parse-tree.cc?

I did see some split handling in C, but not in Fortran; do you also need
to up update gfc_split_omp_clauses in Fortran's trans-openmp.cc?

Actually, glancing at the testcases, no combined construct (like
"omp target parallel") is used, I think that would be useful because of ↑.


+/* OpenMP 5.2:
+   uses_allocators ( allocator-list )

That's not completely true: uses_allocators is OpenMP 5.1.
However, 5.1 only supports (for non-predefined allocators):
   uses_allocators( allocator(traits) )
while OpenMP 5.2 added modifiers:
   uses_allocatrors( traits(...), memspace(...) : allocator )
and deprecated the 5.1 'allocator(traits)'. (Scheduled for removal in OMP 6.0)

The advantage of 5.2 syntax is that a memory space can be defined.

BTW: This makes uses_allocators the first OpenMP 5.2 feature which
will make it into GCC :-)


gcc/fortran/openmp.cc:

+  if (gfc_get_symbol ("omp_allocator_handle_kind", NULL, &sym)
+  || !sym->value
+  || sym->value->expr_type != EXPR_CONSTANT
+  || sym->value->ts.type != BT_INTEGER)
+{
+  gfc_error ("OpenMP % constant not found by "
+  "% clause at %C");
+  goto error;
+}
+  allocator_handle_kind = sym;

I think you rather want to use
  gfc_find_symbol ("omp_...", NULL, true, &sym)
  || sym == NULL
where true is for parent_flag to search also the parent namespace.
(The function returns 1 if the symbol is ambiguous, 0 otherwise -
including 0 + sym == NULL when the symbol could not be found.)

  || sym->attr.flavor != FL_PARAMETER
  || sym->ts.type != BT_INTEGER
  || sym->attr.dimension

Looks cleaner than to access sym->value. The attr.dimension is just
to makes sure the user did not smuggle an array into this.
(Invalid as omp_... is a reserved namespace but users will still do
this and some are good in finding ICE as hobby.)

 * * *

However, I fear that will fail for the following two examples (both untested):

  use omp_lib, my_kind = omp_allocator_handle_kind
  integer(my_kind) :: my_allocator

as this gives 'my_kind' in the symtree->name (while symtree->n.sym->name is 
"omp_...").
Hence, by searching the symtree for 'omp_...' the symbol will not be found.


It will likely also fail for the following more realistic example:

module m
  use omp_lib
  implicit none
  private
  integer(omp_allocator_handle_kind), public :: my_allocator
  type(omp_alloctrait), public, parameter :: my_traits(*) = [...]
end module

subroutine foo
  use m
  use omp_lib, only: omp_alloctrait
  implicit none
  ! currently, same scope is required - makes sense for C and 'const' but
  ! not for Fortran's parameters; restriction might be lifted/clarified in
  ! next OpenMP version:
  type(omp_alloctrait), parameter :: traits_array(*) = my_traits
  integer :: A(200)
  A = 0
  !$omp target uses_allocators(my_allocator(traits_array) 
allocate(my_allocator:A) firstprivate(A)
 ...
  !$omp end target
end

In this case, omp_allocator_handle_kind is not in the namespace of 'foo'
but the code should be still valid. Thus, an alternative would be to hard-code
the value - as done for the depobj. As we have:

integer, parameter :: omp_allocator_handle_kind = c_intptr_t
integer, parameter :: omp_memspace_handle_kind = c_intptr_t

that would be
   sym->ts.type == BT_CHARACTER
   sym->ts.kind == gfc_index_integer_kind
for the allocator variable and the the memspace kind.

However, I grant that either example is not very typical. The second one is more
natural – such a code will very likely be written in the real world. But not
with uses_allocators but rather with "!$omp requires dynamic_allocators" and
omp_init_allocator().

Thoughts?

* * *

gcc/fortran/openmp.cc

+  if (++i > 2)
+ {
+   gfc_error ("Only two modifiers are allowed on % "
+  "clause at %C");
+   goto error;
+ }
+


Is this really needed? There is a check for multiple traits and multiple 
memspace
Thus, 'trait(),memspace(),trait()' is already handled and
'trait(),something' give a break and will lead to an error as in that case
a ':' and not ',something' is expected.


+  if (gfc_match_char ('(') == MATCH_YES)
+ {
+   if (memspace_seen || traits_seen)
+ {
+   gfc_error ("Modifiers cannot be used with legacy "
+  "array syntax at %C");

I wouldn't uses the term 'array synax' to denote
  uses_allocators(allocator (alloc_array) )
How about:
  error: "Using both modifiers and allocator variable with traits argument"

(And I think 'deprecated' is better than 'legacy', if we really want to use it.)



+   if (traits_sym->ts.ty

Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Patrick Palka via Gcc-patches
On Fri, 6 May 2022, Jason Merrill wrote:

> On 5/6/22 11:22, Patrick Palka wrote:
> > Here ever since r10-7313-gb599bf9d6d1e18, reduced_constant_expression_p
> > in C++11/14 is rejecting the marked sub-aggregate initializer (of type S)
> > 
> >W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> >   ^
> > 
> > ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set, and
> > so the function proceeds to verify that all fields of S are initialized.
> > And before C++17 we don't expect to see base class fields (since
> > next_initializable_field skips over the), so the base class initializer
> > causes r_c_e_p to return false.
> 
> That seems like the primary bug.  I guess r_c_e_p shouldn't be using
> next_initializable_field.  Really that function should only be used for
> aggregates.

I see, I'll try replacing it in r_c_e_p.  Would that be in addition to
or instead of the clear_no_implicit_zero approach?

> 
> > The base class initializer comes from
> > the constructor call S::S(int).
> > 
> > The reason this is caused by r10-7313-gb599bf9d6d1e18 is because in that
> > commit we began using CONSTRUCTOR_NO_CLEARING to also track whether we're
> > in middle of activating a union member.  This overloaded use of the flag
> > affects clear_no_implicit_zero, which recurses into sub-aggregate
> > initializers only if the outer initializer has CONSTRUCTOR_NO_CLEARING
> > set.
> 
> Is that really overloaded?  In both union and non-union cases, it indicates
> that the object is not completely initialized.

Ah yes, makes sense.  More accurately the immediate clearing of
CONSTRUCTOR_NO_CLEARING after activating a union member at the end of
cxx_eval_store_expression is what affects clear_no_implicit_zero later.

> 
> > After that commit, the outer union initializer above no longer has
> > the flag set at this point and so clear_no_implicit_zero no longer clears
> > CONSTRUCTOR_NO_CLEARING for the marked inner initializer.
> 
> Why wasn't it cleared for the inner initializer as part of that evaluation?

Looks like the inner initializer {.D.2387={.m=0}, .b=0} is formed during
the subobject constructor call:

  V::V (&((struct S *) this)->D.2120);

after the evaluation of which, 'result' in cxx_eval_call_expression is NULL
(presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):

  /* This can be null for a subobject constructor call, in
 which case what we care about is the initialization
 side-effects rather than the value.  We could get at the
 value by evaluating *this, but we don't bother; there's
 no need to put such a call in the hash table.  */
  result = lval ? ctx->object : ctx->ctor;

so we end up not calling clear_no_implicit_zero for the inner initializer
directly.  We only call clear_no_implicit_zero after evaluating the
AGGR_INIT_EXPR for outermost initializer (of type W).

> 
> > This patch fixes this by restoring the recursive behavior of
> > clear_no_implicit_zero for union initializers.  Arguably we should
> > we could improve reduced_constant_expression_p to accept the marked
> > initializer in C++11/14 even if it has CONSTRUCTOR_NO_CLEARING set, but
> > adjusting clear_no_implicit_zero seems safer to backport.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> > trunk/12/11/10?
> > 
> > PR c++/105491
> > 
> > gcc/cp/ChangeLog:
> > 
> > * constexpr.cc (clear_no_implicit_zero): Recurse into a union
> > initializer even if CONSTRUCTOR_NO_CLEARING is already cleared.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp0x/constexpr-union7.C: New test.
> > * g++.dg/cpp0x/constexpr-union7a.C: New test.
> > * g++.dg/cpp2a/constinit17.C: New test.
> > ---
> >   gcc/cp/constexpr.cc   |  7 +-
> >   gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C | 17 +
> >   .../g++.dg/cpp0x/constexpr-union7a.C  | 15 
> >   gcc/testsuite/g++.dg/cpp2a/constinit17.C  | 24 +++
> >   4 files changed, 62 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-union7.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-union7a.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/constinit17.C
> > 
> > diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
> > index 9b1e71857fc..75fecbcbcb7 100644
> > --- a/gcc/cp/constexpr.cc
> > +++ b/gcc/cp/constexpr.cc
> > @@ -1886,7 +1886,12 @@ cxx_eval_internal_function (const constexpr_ctx *ctx,
> > tree t,
> >   static void
> >   clear_no_implicit_zero (tree ctor)
> >   {
> > -  if (CONSTRUCTOR_NO_CLEARING (ctor))
> > +  if (CONSTRUCTOR_NO_CLEARING (ctor)
> > +  /* For a union initializer, the flag could already be cleared but not
> > +necessarily yet for its sub-aggregates, since for unions the flag is
> > +also used by cxx_eval_store_expression to track whether we're in the
> > +middle of activating one of its members.  */
> > +  || TREE_CODE (TR

Re: [patch][gcc13][i386][pr101891]Adjust -fzero-call-used-regs to always use XOR

2022-05-06 Thread Qing Zhao via Gcc-patches


> On May 6, 2022, at 10:58 AM, Uros Bizjak  wrote:
> 
> On Fri, May 6, 2022 at 4:29 PM Qing Zhao  wrote:
>> 
>> Hi,
>> 
>> As Kee’s requested in this PR: 
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101891
>> 
>> =
>> 
>> Currently -fzero-call-used-regs will use a pattern of:
>> 
>> XOR regA,regA
>> MOV regA,regB
>> MOV regA,regC
>> ...
>> RET
>> 
>> However, this introduces both a register ordering dependency (e.g. the CPU 
>> cannot clear regB without clearing regA first), and while greatly reduces 
>> available ROP gadgets, it does technically leave a set of "MOV" ROP gadgets 
>> at the end of functions (e.g. "MOV regA,regC; RET").
>> 
>> Please switch to always using XOR:
>> 
>> XOR regA,regA
>> XOR regB,regB
>> XOR regC,regC
>> ...
>> RET
>> 
>> ===
>> 
>> This patch switch all MOV to XOR on i386.
>> 
>> Bootstrapped and regresstion tested on x86_64-linux-gnu.
>> 
>> Okay for gcc13?
>> 
>> Thanks.
>> 
>> Qing
>> 
>> ==
> 
>> gcc/ChangeLog:
>> 
>> * config/i386/i386.cc (zero_all_mm_registers): Use SET to zero instead
>> of MOV for zeroing scratch registers.
>> (ix86_zero_call_used_regs): Likewise.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> * gcc.target/i386/zero-scratch-regs-1.c: Add -fno-stack-protector
>> -fno-PIC.
>> * gcc.target/i386/zero-scratch-regs-10.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-13.c: Add -msse.
>> * gcc.target/i386/zero-scratch-regs-14.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-15.c: Add -fno-stack-protector
>> -fno-PIC.
>> * gcc.target/i386/zero-scratch-regs-16.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-17.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-18.c: Add -fno-stack-protector
>> -fno-PIC, adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-19.c: Add -fno-stack-protector
>> -fno-PIC.
>> * gcc.target/i386/zero-scratch-regs-2.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-20.c: Add -msse.
>> * gcc.target/i386/zero-scratch-regs-21.c: Add -fno-stack-protector
>> -fno-PIC, Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-22.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-23.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-26.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-27.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-28.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-3.c: Add -fno-stack-protector.
>> * gcc.target/i386/zero-scratch-regs-31.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-4.c: Add -fno-stack-protector
>> -fno-PIC.
>> * gcc.target/i386/zero-scratch-regs-5.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-6.c: Add -fno-stack-protector.
>> * gcc.target/i386/zero-scratch-regs-7.c: Likewise.
>> * gcc.target/i386/zero-scratch-regs-8.c: Adjust mov to xor.
>> * gcc.target/i386/zero-scratch-regs-9.c: Add -fno-stack-protector.
> 
> Please use something like the attached (functionally equivalent) patch
> for the last hunk of your patch.

Sure, I will update the code.
> 
> Also, if possible, please use V2SImode as a generic MMX mode instead
> of V4HImode.
What’s the major purpose of this change? 

thanks.

Qing
> 
> OK with the above changes.
> 
> Thanks,
> Uros.
> 



Re: Ping #5: [PATCH, V4] Eliminate power8 fusion options, use power8 tuning, PR target/102059

2022-05-06 Thread Peter Bergner via Gcc-patches
On 5/5/22 4:27 PM, Segher Boessenkool wrote:
> On Thu, May 05, 2022 at 02:59:07PM -0500, Peter Bergner wrote:
>> On 5/5/22 2:35 PM, Segher Boessenkool wrote:
>>> Just an unconditional
>>>
>>>   callee_isa &= ~OPTION_MASK_P8_FUSION;
>>>   explicit_isa &= ~OPTION_MASK_P8_FUSION;
>>>
>>> will do, no?  That is fine since these options should never have been
>>> used to determine if anything can be inlined, in the first place.
>>>
>>> A patch like that is pre-approved, even for trunk.
>>
>> That works for me!  I will apply this directly to GCC 10 and regtest and
>> push if clean so we can unblock our customer.
>>
>> As for trunk, GCC 12 & 11, I think we can wait for the backport of Mike's
>> patch that removes the option altogether.
> 
> Please put it on trunk and 12 and 11 as well.  To keep things sane.

Ok, pushed to trunk.  I'll work on testing and committing the backports.
Thanks!

Peter





Re: [PATCH v2] libstdc++: ppc: conditionalize vsx-only simd intrinsics

2022-05-06 Thread Jonathan Wakely via Gcc-patches
On Fri, 6 May 2022 at 17:17, Segher Boessenkool wrote:
> > +#if defined __VSX__ || __LONG_WIDTH__ == 32
> >  _GLIBCXX_SIMD_PPC_INTRIN(signed long);
> >  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
> > +#endif
>
> Is __LONG_WIDTH__ the right macro to use here?  Nothing else in
> libstdc++v3 uses it.  "__CHAR_BIT__ * __SIZEOF_LONG__" is the usual
> thing to do.  Is __LONG_WIDTH__ always defined anyway?

Presumably it could be simply __SIZEOF_LONG__ == 4 if this is
PowerPC-specific code where CHAR_BIT==8 is always true?

We don't need to consider hypothetical targets where CHAR_BIT!=8 if we
already know the target is some version of PowerPC.


Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Patrick Palka via Gcc-patches
On Fri, 6 May 2022, Patrick Palka wrote:

> On Fri, 6 May 2022, Jason Merrill wrote:
> 
> > On 5/6/22 11:22, Patrick Palka wrote:
> > > Here ever since r10-7313-gb599bf9d6d1e18, reduced_constant_expression_p
> > > in C++11/14 is rejecting the marked sub-aggregate initializer (of type S)
> > > 
> > >W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> > >   ^
> > > 
> > > ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set, and
> > > so the function proceeds to verify that all fields of S are initialized.
> > > And before C++17 we don't expect to see base class fields (since
> > > next_initializable_field skips over the), so the base class initializer
> > > causes r_c_e_p to return false.
> > 
> > That seems like the primary bug.  I guess r_c_e_p shouldn't be using
> > next_initializable_field.  Really that function should only be used for
> > aggregates.
> 
> I see, I'll try replacing it in r_c_e_p.  Would that be in addition to
> or instead of the clear_no_implicit_zero approach?

I'm testing the following, which uses a custom predicate instead of
next_initializable_field in r_c_e_p.

-- >8 --

gcc/cp/ChangeLog:

* constexpr.cc (reduced_constant_expression_p): Replace use of
next_initializable_field with custom predicate.  Refactor to
remove the use of goto.
* decl.cc (next_initializable_field): Skip over vptr fields.
Document that this function is to be used only for aggregate
classes.
---
 gcc/cp/constexpr.cc | 65 ++---
 gcc/cp/decl.cc  | 15 +--
 2 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 9b1e71857fc..d1cd556591c 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3016,7 +3016,6 @@ reduced_constant_expression_p (tree t)
 
 case CONSTRUCTOR:
   /* And we need to handle PTRMEM_CST wrapped in a CONSTRUCTOR.  */
-  tree field;
   if (CONSTRUCTOR_NO_CLEARING (t))
{
  if (TREE_CODE (TREE_TYPE (t)) == VECTOR_TYPE)
@@ -3041,42 +3040,54 @@ reduced_constant_expression_p (tree t)
}
  if (find_array_ctor_elt (t, max) == -1)
return false;
- goto ok;
}
- else if (cxx_dialect >= cxx20
-  && TREE_CODE (TREE_TYPE (t)) == UNION_TYPE)
+ else if (TREE_CODE (TREE_TYPE (t)) == UNION_TYPE)
{
- if (CONSTRUCTOR_NELTS (t) == 0)
+ if (CONSTRUCTOR_NELTS (t) != 1)
/* An initialized union has a constructor element.  */
return false;
- /* And it only initializes one member.  */
- field = NULL_TREE;
+ if (!reduced_constant_expression_p (CONSTRUCTOR_ELT (t, 
0)->value))
+   return false;
}
  else
-   field = next_initializable_field (TYPE_FIELDS (TREE_TYPE (t)));
+   {
+ /* Similar to the predicate used in next_initializable_field,
+except that we additionally skip over empty types (for which
+we don't require an initializer), and we always consider base
+class fields (not just in C++17 mode) and vptr fields.  */
+ auto ignorable_field_p = [] (tree field) {
+   if (!field)
+ return false;
+   return (TREE_CODE (field) != FIELD_DECL
+   || DECL_UNNAMED_BIT_FIELD (field)
+   || (DECL_ARTIFICIAL (field)
+   && !DECL_FIELD_IS_BASE (field)
+   && !DECL_VIRTUAL_P (field))
+   || is_really_empty_class (TREE_TYPE (field),
+ /*ignore_vptr*/false));
+ };
+ tree field = TYPE_FIELDS (TREE_TYPE (t));
+ for (auto &e: CONSTRUCTOR_ELTS (t))
+   {
+ if (!reduced_constant_expression_p (e.value))
+   return false;
+ while (e.index != field && ignorable_field_p (field))
+   field = DECL_CHAIN (field);
+ if (e.index == field)
+   field = DECL_CHAIN (field);
+ else
+   return false;
+   }
+ while (ignorable_field_p (field))
+   field = DECL_CHAIN (field);
+ if (field)
+   return false;
+   }
}
   else
-   field = NULL_TREE;
-  for (auto &e: CONSTRUCTOR_ELTS (t))
-   {
- /* If VAL is null, we're in the middle of initializing this
-element.  */
+   for (auto &e: CONSTRUCTOR_ELTS (t))
  if (!reduced_constant_expression_p (e.value))
return false;
- /* Empty class field may or may not have an initializer.  */
- for (; field && e.index != field;
-  field = next_initializable_fie

Re: [PATCH] [PR100106] Reject unaligned subregs when strict alignment is required

2022-05-06 Thread Vladimir Makarov via Gcc-patches



On 2022-05-05 02:52, Alexandre Oliva wrote:


Regstrapped on x86_64-linux-gnu and ppc64le-linux-gnu, also tested
targeting ppc- and ppc64-vx7r2.  Ok to install?

I am ok with the modified version of the patch.  It looks reasonable for 
me and I support its commit.


But I think I can not approve the patch formally as emit-rtl.cc is out 
of my jurisdiction and validate_subreg is used in many places besides RA.


Sorry, Alex, some global reviewer should do this.


for  gcc/ChangeLog

PR target/100106
* emit-rtl.c (validate_subreg): Reject a SUBREG of a MEM that
requires stricter alignment than MEM's.

for  gcc/testsuite/ChangeLog

PR target/100106
* gcc.target/powerpc/pr100106-sa.c: New.
---
  gcc/emit-rtl.cc|3 +++
  gcc/testsuite/gcc.target/powerpc/pr100106-sa.c |4 
  2 files changed, 7 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr100106-sa.c

diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc
index 1e02ae254d012..642e47eada0d7 100644
--- a/gcc/emit-rtl.cc
+++ b/gcc/emit-rtl.cc
@@ -982,6 +982,9 @@ validate_subreg (machine_mode omode, machine_mode imode,
  
return subreg_offset_representable_p (regno, imode, offset, omode);

  }
+  else if (reg && MEM_P (reg)
+  && STRICT_ALIGNMENT && MEM_ALIGN (reg) < GET_MODE_ALIGNMENT (omode))
+return false;
  
/* The outer size must be ordered wrt the register size, otherwise

   we wouldn't know at compile time how many registers the outer
diff --git a/gcc/testsuite/gcc.target/powerpc/pr100106-sa.c 
b/gcc/testsuite/gcc.target/powerpc/pr100106-sa.c
new file mode 100644
index 0..6cc29595c8b25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr100106-sa.c
@@ -0,0 +1,4 @@
+/* { dg-do compile { target { ilp32 } } } */
+/* { dg-options "-mcpu=604 -O -mstrict-align" } */
+
+#include "../../gcc.c-torture/compile/pr100106.c"






Go patch committed: Reject duplicate bool keys in a map literal

2022-05-06 Thread Ian Lance Taylor via Gcc-patches
This patch to the Go frontend rejects duplicate bool keys in a map
literal.  This is hardly a common case, but we may as well get it
right.  This is for https://go.dev/issue/35945 and
htps://go.dev/issue/28104.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
374b3c936d62c8b6e7c607fdf6e84a83748e85c7
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index ef20a0aafd6..4559551ab7b 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-70ca85f08edf63f46c87d540fa99c45e2903edc2
+fbadca004b1e09db177c8e071706841038d1dd64
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index 1b3b3bf135e..dce48e03bf4 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -17266,6 +17266,8 @@ Composite_literal_expression::lower_map(Gogo* gogo, 
Named_object* function,
   Location location = this->location();
   Unordered_map(unsigned int, std::vector) st;
   Unordered_map(unsigned int, std::vector) nt;
+  bool saw_false = false;
+  bool saw_true = false;
   if (this->vals_ != NULL)
 {
   if (!this->has_keys_)
@@ -17300,6 +17302,7 @@ Composite_literal_expression::lower_map(Gogo* gogo, 
Named_object* function,
continue;
  std::string sval;
  Numeric_constant nval;
+ bool bval;
  if ((*p)->string_constant_value(&sval)) // Check string keys.
{
  unsigned int h = Gogo::hash_string(sval, 0);
@@ -17373,6 +17376,19 @@ Composite_literal_expression::lower_map(Gogo* gogo, 
Named_object* function,
  mit->second.push_back(*p);
}
}
+ else if ((*p)->boolean_constant_value(&bval))
+   {
+ if ((bval && saw_true) || (!bval && saw_false))
+   {
+ go_error_at((*p)->location(),
+ "duplicate key in map literal");
+ return Expression::make_error(location);
+   }
+ if (bval)
+   saw_true = true;
+ else
+   saw_false = true;
+   }
}
 }
 


Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Jason Merrill via Gcc-patches

On 5/6/22 14:00, Patrick Palka wrote:

On Fri, 6 May 2022, Patrick Palka wrote:


On Fri, 6 May 2022, Jason Merrill wrote:


On 5/6/22 11:22, Patrick Palka wrote:

Here ever since r10-7313-gb599bf9d6d1e18, reduced_constant_expression_p
in C++11/14 is rejecting the marked sub-aggregate initializer (of type S)

W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
   ^

ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set, and
so the function proceeds to verify that all fields of S are initialized.
And before C++17 we don't expect to see base class fields (since
next_initializable_field skips over the), so the base class initializer
causes r_c_e_p to return false.


That seems like the primary bug.  I guess r_c_e_p shouldn't be using
next_initializable_field.  Really that function should only be used for
aggregates.


I see, I'll try replacing it in r_c_e_p.  Would that be in addition to
or instead of the clear_no_implicit_zero approach?


I'm testing the following, which uses a custom predicate instead of
next_initializable_field in r_c_e_p.


Let's make it a public predicate, not internal to r_c_e_p.  Maybe it 
could be next_subobject_field, and the current next_initializable_field 
change to next_aggregate_field?



Looks like the inner initializer {.D.2387={.m=0}, .b=0} is formed during
the subobject constructor call:

  V::V (&((struct S *) this)->D.2120);

after the evaluation of which, 'result' in cxx_eval_call_expression is NULL
(presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):

  /* This can be null for a subobject constructor call, in
 which case what we care about is the initialization
 side-effects rather than the value.  We could get at the
 value by evaluating *this, but we don't bother; there's
 no need to put such a call in the hash table.  */
  result = lval ? ctx->object : ctx->ctor;

so we end up not calling clear_no_implicit_zero for the inner initializer
directly.  We only call clear_no_implicit_zero after evaluating the
AGGR_INIT_EXPR for outermost initializer (of type W).


Maybe for constructors we could call it on ctx->ctor instead of result, 
or call r_c_e_p in C++20+?


It does seem dubious that we would clear the flag on an outer ctor when 
it's still set on an inner ctor, should probably add an assert somewhere.


Jason



Re: [PATCH v3] libstdc++: ppc: conditionalize vsx-only simd intrinsics

2022-05-06 Thread Alexandre Oliva via Gcc-patches
On May  6, 2022, Jonathan Wakely  wrote:

> On Fri, 6 May 2022 at 17:17, Segher Boessenkool wrote:
>> > +#if defined __VSX__ || __LONG_WIDTH__ == 32
>> >  _GLIBCXX_SIMD_PPC_INTRIN(signed long);
>> >  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
>> > +#endif
>> 
>> Is __LONG_WIDTH__ the right macro to use here?  Nothing else in
>> libstdc++v3 uses it.  "__CHAR_BIT__ * __SIZEOF_LONG__" is the usual
>> thing to do.  Is __LONG_WIDTH__ always defined anyway?

> Presumably it could be simply __SIZEOF_LONG__ == 4 if this is
> PowerPC-specific code where CHAR_BIT==8 is always true?

SGTM.  Here's the adjusted patch I'm checking in momentarily, trunk
first, then gcc-12 and gcc-11 after a week or so.  Thanks,


libstdc++: ppc: conditionalize vsx-only simd intrinsics

libstdc++'s bits/simd.h section for PowerPC, guarded by __ALTIVEC__,
defines various intrinsic vector types that are only available with
__VSX__: 64-bit long double, double, (un)signed long long, and 64-bit
(un)signed long.

experimental/simd/standard_abi_usable{,_2}.cc tests error out
reporting the unmet requirements when the target cpu doesn't enable
VSX.  Make the reported instrinsic types conditional on __VSX__ so
that  can be used on PowerPC variants that do not
support VSX.


for  libstdc++-v3/ChangeLog

* include/experimental/bits/simd.h [__ALTIVEC__]: Require VSX
for double, long long, and 64-bit long intrinsic types.
[__ALTIVEC__] (__intrinsic_type): Mention 128-bit in
preexisting long double diagnostic, adjust no-VSX double
diagnostic to cover 64-bit long double as well.
---
 libstdc++-v3/include/experimental/bits/simd.h |   13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 82e9841195e1d..b0226fa4c5304 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2430,17 +2430,23 @@ template 
   template <>  
\
 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
 _GLIBCXX_SIMD_PPC_INTRIN(float);
+#ifdef __VSX__
 _GLIBCXX_SIMD_PPC_INTRIN(double);
+#endif
 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
+#if defined __VSX__ || __SIZEOF_LONG__ == 4
 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
+#endif
+#ifdef __VSX__
 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
+#endif
 #undef _GLIBCXX_SIMD_PPC_INTRIN
 
 template 
@@ -2450,10 +2456,11 @@ template 
 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
 // allow _Tp == long double with -mlong-double-64
 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
- "no __intrinsic_type support for long double on PPC");
+ "no __intrinsic_type support for 128-bit floating point on 
PowerPC");
 #ifndef __VSX__
-static_assert(!is_same_v<_Tp, double>,
- "no __intrinsic_type support for double on PPC w/o VSX");
+static_assert(!(is_same_v<_Tp, double>
+   || (_S_is_ldouble && sizeof(long double) == 
sizeof(double))),
+ "no __intrinsic_type support for 64-bit floating point on 
PowerPC w/o VSX");
 #endif
 using type =
   typename __intrinsic_type_impl<


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Patrick Palka via Gcc-patches
On Fri, 6 May 2022, Jason Merrill wrote:

> On 5/6/22 14:00, Patrick Palka wrote:
> > On Fri, 6 May 2022, Patrick Palka wrote:
> > 
> > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > 
> > > > On 5/6/22 11:22, Patrick Palka wrote:
> > > > > Here ever since r10-7313-gb599bf9d6d1e18,
> > > > > reduced_constant_expression_p
> > > > > in C++11/14 is rejecting the marked sub-aggregate initializer (of type
> > > > > S)
> > > > > 
> > > > > W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> > > > >^
> > > > > 
> > > > > ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set,
> > > > > and
> > > > > so the function proceeds to verify that all fields of S are
> > > > > initialized.
> > > > > And before C++17 we don't expect to see base class fields (since
> > > > > next_initializable_field skips over the), so the base class
> > > > > initializer
> > > > > causes r_c_e_p to return false.
> > > > 
> > > > That seems like the primary bug.  I guess r_c_e_p shouldn't be using
> > > > next_initializable_field.  Really that function should only be used for
> > > > aggregates.
> > > 
> > > I see, I'll try replacing it in r_c_e_p.  Would that be in addition to
> > > or instead of the clear_no_implicit_zero approach?
> > 
> > I'm testing the following, which uses a custom predicate instead of
> > next_initializable_field in r_c_e_p.
> 
> Let's make it a public predicate, not internal to r_c_e_p.  Maybe it could be
> next_subobject_field, and the current next_initializable_field change to
> next_aggregate_field?

Will do.

> 
> > Looks like the inner initializer {.D.2387={.m=0}, .b=0} is formed during
> > the subobject constructor call:
> > 
> >   V::V (&((struct S *) this)->D.2120);
> > 
> > after the evaluation of which, 'result' in cxx_eval_call_expression is NULL
> > (presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):
> > 
> >   /* This can be null for a subobject constructor call, in
> >  which case what we care about is the initialization
> >  side-effects rather than the value.  We could get at the
> >  value by evaluating *this, but we don't bother; there's
> >  no need to put such a call in the hash table.  */
> >   result = lval ? ctx->object : ctx->ctor;
> > 
> > so we end up not calling clear_no_implicit_zero for the inner initializer
> > directly.  We only call clear_no_implicit_zero after evaluating the
> > AGGR_INIT_EXPR for outermost initializer (of type W).
> 
> Maybe for constructors we could call it on ctx->ctor instead of result, or
> call r_c_e_p in C++20+?

But both ctx->ctor and ->object are NULL during a subobject constructor
call (since we apparently clear these fields when entering a
STATEMENT_LIST):

So I tried instead obtaining the constructor by evaluating new_obj via

--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2993,6 +2988,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
  in order to detect reading an unitialized object in constexpr instead
  of value-initializing it.  (reduced_constant_expression_p is expected to
  take care of clearing the flag.)  */
+  if (new_obj && DECL_CONSTRUCTOR_P (fun))
+result = cxx_eval_constant_expression (ctx, new_obj, /*lval=*/false,
+  non_constant_p, overflow_p);
   if (TREE_CODE (result) == CONSTRUCTOR
   && (cxx_dialect < cxx20
  || !DECL_CONSTRUCTOR_P (fun)))

but that seems to break e.g. g++.dg/cpp2a/constexpr-init12.C because
after the subobject constructor call

  S::S (&((struct W *) this)->s, NON_LVALUE_EXPR <8>);

the constructor for the subobject a.s in new_obj is still completely
missing (I suppose because S::S doesn't initialize any of its members)
so trying to obtain it causes us to complain too soon from
cxx_eval_component_reference:

constexpr-init12.C:16:24:   in ‘constexpr’ expansion of ‘W(42)’
constexpr-init12.C:10:22:   in ‘constexpr’ expansion of 
‘((W*)this)->W::s.S::S(8)’
constexpr-init12.C:16:24: error: accessing uninitialized member ‘W::s’
   16 | constexpr auto a = W(42); // { dg-error "not a constant expression" }
  |^

> 
> It does seem dubious that we would clear the flag on an outer ctor when it's
> still set on an inner ctor, should probably add an assert somewhere.

Makes sense, not sure where the best place would be..

> 
> Jason
> 
> 


Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Patrick Palka via Gcc-patches
On Fri, 6 May 2022, Patrick Palka wrote:

> On Fri, 6 May 2022, Jason Merrill wrote:
> 
> > On 5/6/22 14:00, Patrick Palka wrote:
> > > On Fri, 6 May 2022, Patrick Palka wrote:
> > > 
> > > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > > 
> > > > > On 5/6/22 11:22, Patrick Palka wrote:
> > > > > > Here ever since r10-7313-gb599bf9d6d1e18,
> > > > > > reduced_constant_expression_p
> > > > > > in C++11/14 is rejecting the marked sub-aggregate initializer (of 
> > > > > > type
> > > > > > S)
> > > > > > 
> > > > > > W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> > > > > >^
> > > > > > 
> > > > > > ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set,
> > > > > > and
> > > > > > so the function proceeds to verify that all fields of S are
> > > > > > initialized.
> > > > > > And before C++17 we don't expect to see base class fields (since
> > > > > > next_initializable_field skips over the), so the base class
> > > > > > initializer
> > > > > > causes r_c_e_p to return false.
> > > > > 
> > > > > That seems like the primary bug.  I guess r_c_e_p shouldn't be using
> > > > > next_initializable_field.  Really that function should only be used 
> > > > > for
> > > > > aggregates.
> > > > 
> > > > I see, I'll try replacing it in r_c_e_p.  Would that be in addition to
> > > > or instead of the clear_no_implicit_zero approach?
> > > 
> > > I'm testing the following, which uses a custom predicate instead of
> > > next_initializable_field in r_c_e_p.
> > 
> > Let's make it a public predicate, not internal to r_c_e_p.  Maybe it could 
> > be
> > next_subobject_field, and the current next_initializable_field change to
> > next_aggregate_field?
> 
> Will do.
> 
> > 
> > > Looks like the inner initializer {.D.2387={.m=0}, .b=0} is formed during
> > > the subobject constructor call:
> > > 
> > >   V::V (&((struct S *) this)->D.2120);
> > > 
> > > after the evaluation of which, 'result' in cxx_eval_call_expression is 
> > > NULL
> > > (presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):
> > > 
> > >   /* This can be null for a subobject constructor call, in
> > >  which case what we care about is the initialization
> > >  side-effects rather than the value.  We could get at the
> > >  value by evaluating *this, but we don't bother; there's
> > >  no need to put such a call in the hash table.  */
> > >   result = lval ? ctx->object : ctx->ctor;
> > > 
> > > so we end up not calling clear_no_implicit_zero for the inner initializer
> > > directly.  We only call clear_no_implicit_zero after evaluating the
> > > AGGR_INIT_EXPR for outermost initializer (of type W).
> > 
> > Maybe for constructors we could call it on ctx->ctor instead of result, or
> > call r_c_e_p in C++20+?
> 
> But both ctx->ctor and ->object are NULL during a subobject constructor
> call (since we apparently clear these fields when entering a
> STATEMENT_LIST):
> 
> So I tried instead obtaining the constructor by evaluating new_obj via
> 
> --- a/gcc/cp/constexpr.cc
> +++ b/gcc/cp/constexpr.cc
> @@ -2993,6 +2988,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, 
> tree t,
>   in order to detect reading an unitialized object in constexpr instead
>   of value-initializing it.  (reduced_constant_expression_p is expected to
>   take care of clearing the flag.)  */
> +  if (new_obj && DECL_CONSTRUCTOR_P (fun))
> +result = cxx_eval_constant_expression (ctx, new_obj, /*lval=*/false,
> +  non_constant_p, overflow_p);
>if (TREE_CODE (result) == CONSTRUCTOR
>&& (cxx_dialect < cxx20
>   || !DECL_CONSTRUCTOR_P (fun)))
> 
> but that seems to break e.g. g++.dg/cpp2a/constexpr-init12.C because
> after the subobject constructor call
> 
>   S::S (&((struct W *) this)->s, NON_LVALUE_EXPR <8>);
> 
> the constructor for the subobject a.s in new_obj is still completely
> missing (I suppose because S::S doesn't initialize any of its members)
> so trying to obtain it causes us to complain too soon from
> cxx_eval_component_reference:
> 
> constexpr-init12.C:16:24:   in ‘constexpr’ expansion of ‘W(42)’
> constexpr-init12.C:10:22:   in ‘constexpr’ expansion of 
> ‘((W*)this)->W::s.S::S(8)’
> constexpr-init12.C:16:24: error: accessing uninitialized member ‘W::s’
>16 | constexpr auto a = W(42); // { dg-error "not a constant expression" }
>   |^
> 
> > 
> > It does seem dubious that we would clear the flag on an outer ctor when it's
> > still set on an inner ctor, should probably add an assert somewhere.
> 
> Makes sense, not sure where the best place would be..

On second thought, if I'm understanding your suggestion correctly, I
don't think we can generally enforce such a property for
CONSTRUCTOR_NO_CLEARING, given how cxx_eval_store_expression uses it for
unions:

  union U {
struct { int x, y; } a;
  } u;
  u.a.x = 0;

Here after evaluating the assignment, the outer ctor for the union will
have CONST

Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Jason Merrill via Gcc-patches

On 5/6/22 16:10, Patrick Palka wrote:

On Fri, 6 May 2022, Patrick Palka wrote:


On Fri, 6 May 2022, Jason Merrill wrote:


On 5/6/22 14:00, Patrick Palka wrote:

On Fri, 6 May 2022, Patrick Palka wrote:


On Fri, 6 May 2022, Jason Merrill wrote:


On 5/6/22 11:22, Patrick Palka wrote:

Here ever since r10-7313-gb599bf9d6d1e18,
reduced_constant_expression_p
in C++11/14 is rejecting the marked sub-aggregate initializer (of type
S)

 W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
^

ultimately because said initializer has CONSTRUCTOR_NO_CLEARING set,
and
so the function proceeds to verify that all fields of S are
initialized.
And before C++17 we don't expect to see base class fields (since
next_initializable_field skips over the), so the base class
initializer
causes r_c_e_p to return false.


That seems like the primary bug.  I guess r_c_e_p shouldn't be using
next_initializable_field.  Really that function should only be used for
aggregates.


I see, I'll try replacing it in r_c_e_p.  Would that be in addition to
or instead of the clear_no_implicit_zero approach?


I'm testing the following, which uses a custom predicate instead of
next_initializable_field in r_c_e_p.


Let's make it a public predicate, not internal to r_c_e_p.  Maybe it could be
next_subobject_field, and the current next_initializable_field change to
next_aggregate_field?


Will do.




Looks like the inner initializer {.D.2387={.m=0}, .b=0} is formed during
the subobject constructor call:

   V::V (&((struct S *) this)->D.2120);

after the evaluation of which, 'result' in cxx_eval_call_expression is NULL
(presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):

   /* This can be null for a subobject constructor call, in
  which case what we care about is the initialization
  side-effects rather than the value.  We could get at the
  value by evaluating *this, but we don't bother; there's
  no need to put such a call in the hash table.  */
   result = lval ? ctx->object : ctx->ctor;

so we end up not calling clear_no_implicit_zero for the inner initializer
directly.  We only call clear_no_implicit_zero after evaluating the
AGGR_INIT_EXPR for outermost initializer (of type W).


Maybe for constructors we could call it on ctx->ctor instead of result, or
call r_c_e_p in C++20+?


But both ctx->ctor and ->object are NULL during a subobject constructor
call (since we apparently clear these fields when entering a
STATEMENT_LIST):

So I tried instead obtaining the constructor by evaluating new_obj via

--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2993,6 +2988,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
   in order to detect reading an unitialized object in constexpr instead
   of value-initializing it.  (reduced_constant_expression_p is expected to
   take care of clearing the flag.)  */
+  if (new_obj && DECL_CONSTRUCTOR_P (fun))
+result = cxx_eval_constant_expression (ctx, new_obj, /*lval=*/false,
+  non_constant_p, overflow_p);
if (TREE_CODE (result) == CONSTRUCTOR
&& (cxx_dialect < cxx20
   || !DECL_CONSTRUCTOR_P (fun)))

but that seems to break e.g. g++.dg/cpp2a/constexpr-init12.C because
after the subobject constructor call

   S::S (&((struct W *) this)->s, NON_LVALUE_EXPR <8>);

the constructor for the subobject a.s in new_obj is still completely
missing (I suppose because S::S doesn't initialize any of its members)
so trying to obtain it causes us to complain too soon from
cxx_eval_component_reference:

constexpr-init12.C:16:24:   in ‘constexpr’ expansion of ‘W(42)’
constexpr-init12.C:10:22:   in ‘constexpr’ expansion of 
‘((W*)this)->W::s.S::S(8)’
constexpr-init12.C:16:24: error: accessing uninitialized member ‘W::s’
16 | constexpr auto a = W(42); // { dg-error "not a constant expression" }
   |^



It does seem dubious that we would clear the flag on an outer ctor when it's
still set on an inner ctor, should probably add an assert somewhere.


Makes sense, not sure where the best place would be..


On second thought, if I'm understanding your suggestion correctly, I
don't think we can generally enforce such a property for
CONSTRUCTOR_NO_CLEARING, given how cxx_eval_store_expression uses it for
unions:

   union U {
 struct { int x, y; } a;
   } u;
   u.a.x = 0;

Here after evaluating the assignment, the outer ctor for the union will
have CONSTRUCTOR_NO_CLEARING cleared to indicate we finished activating
the union member, but the inner ctor is certainly not fully initialized
so it'll have CONSTRUCTOR_NO_CLEARING set still.


Why clear the flag on the union before the inner ctor is fully 
initialized, if the intent is to prevent changing the active member 
during initialization?


In the loop over ctors in cxx_eval_store_expression, I'd think if we 
encounter a CONSTRUCTOR_NO_CLEARING ctor along the way, we shouldn't 
clear the flag on an outer ctor

[pushed] c++: empty base constexpr adjustment [PR105245]

2022-05-06 Thread Jason Merrill via Gcc-patches
While looking at PR105245 in stage 4, I wanted to reorganize the code a bit,
but it seemed prudent to defer that to stage 1.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/105245
PR c++/100111

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_store_expression): Reorganize empty base
handling.
---
 gcc/cp/constexpr.cc | 69 +++--
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 9b1e71857fc..6c204ab2265 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -5718,6 +5718,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree 
t,
   releasing_vec ctors, indexes;
   auto_vec index_pos_hints;
   bool activated_union_member_p = false;
+  bool empty_base = false;
   while (!refs->is_empty ())
 {
   if (*valp == NULL_TREE)
@@ -5759,7 +5760,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree 
t,
   no_zero_init = CONSTRUCTOR_NO_CLEARING (*valp);
 
   enum tree_code code = TREE_CODE (type);
-  type = refs->pop();
+  tree reftype = refs->pop();
   tree index = refs->pop();
 
   if (code == RECORD_TYPE && is_empty_field (index))
@@ -5768,7 +5769,12 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, 
tree t,
   fields, which confuses the middle-end.  The code below will notice
   that we don't have a CONSTRUCTOR for our inner target and just
   return init.  */
-   break;
+   {
+ empty_base = true;
+ break;
+   }
+
+  type = reftype;
 
   if (code == UNION_TYPE && CONSTRUCTOR_NELTS (*valp)
  && CONSTRUCTOR_ELT (*valp, 0)->index != index)
@@ -5902,44 +5908,41 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, 
tree t,
}
 }
 
+  if (*non_constant_p)
+return t;
+
   /* Don't share a CONSTRUCTOR that might be changed later.  */
   init = unshare_constructor (init);
 
-  if (*valp && TREE_CODE (*valp) == CONSTRUCTOR
-  && TREE_CODE (init) == CONSTRUCTOR)
+  gcc_checking_assert (!*valp || (same_type_ignoring_top_level_qualifiers_p
+ (TREE_TYPE (*valp), type)));
+  if (empty_base || !(same_type_ignoring_top_level_qualifiers_p
+ (TREE_TYPE (init), type)))
 {
-  /* An outer ctx->ctor might be pointing to *valp, so replace
-its contents.  */
-  if (!same_type_ignoring_top_level_qualifiers_p (TREE_TYPE (init),
- TREE_TYPE (*valp)))
-   {
- /* For initialization of an empty base, the original target will be
-  *(base*)this, evaluation of which resolves to the object
-  argument, which has the derived type rather than the base type.  In
-  this situation, just evaluate the initializer and return, since
-  there's no actual data to store.  */
- gcc_assert (is_empty_class (TREE_TYPE (init)));
- return lval ? target : init;
-   }
-  CONSTRUCTOR_ELTS (*valp) = CONSTRUCTOR_ELTS (init);
-  TREE_CONSTANT (*valp) = TREE_CONSTANT (init);
-  TREE_SIDE_EFFECTS (*valp) = TREE_SIDE_EFFECTS (init);
-  CONSTRUCTOR_NO_CLEARING (*valp)
-   = CONSTRUCTOR_NO_CLEARING (init);
-}
-  else if (TREE_CODE (init) == CONSTRUCTOR
-  && !same_type_ignoring_top_level_qualifiers_p (TREE_TYPE (init),
- type))
-{
-  /* See above on initialization of empty bases.  */
-  gcc_assert (is_empty_class (TREE_TYPE (init)) && !lval);
+  /* For initialization of an empty base, the original target will be
+   *(base*)this, evaluation of which resolves to the object
+   argument, which has the derived type rather than the base type.  In
+   this situation, just evaluate the initializer and return, since
+   there's no actual data to store, and we didn't build a CONSTRUCTOR.  */
+  empty_base = true;
+  gcc_assert (is_empty_class (TREE_TYPE (init)));
   if (!*valp)
{
  /* But do make sure we have something in *valp.  */
  *valp = build_constructor (type, nullptr);
  CONSTRUCTOR_NO_CLEARING (*valp) = no_zero_init;
}
-  return init;
+}
+  else if (*valp && TREE_CODE (*valp) == CONSTRUCTOR
+  && TREE_CODE (init) == CONSTRUCTOR)
+{
+  /* An outer ctx->ctor might be pointing to *valp, so replace
+its contents.  */
+  CONSTRUCTOR_ELTS (*valp) = CONSTRUCTOR_ELTS (init);
+  TREE_CONSTANT (*valp) = TREE_CONSTANT (init);
+  TREE_SIDE_EFFECTS (*valp) = TREE_SIDE_EFFECTS (init);
+  CONSTRUCTOR_NO_CLEARING (*valp)
+   = CONSTRUCTOR_NO_CLEARING (init);
 }
   else
 *valp = init;
@@ -5958,7 +5961,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree 
t,
   constructor of a delegating constructor).  Leave it up to the
   caller that set 'this' to set TREE_READONLY app

[PATCH] Guard against applying scale with 0 denominator

2022-05-06 Thread Eugene Rozenfeld via Gcc-patches
Calling count.apply_scale with a 0 denominator causes an assert.
This change guards against that.

Tested on x86_64-pc-linux-gnu.

gcc/ChangeLog:
* tree-loop-vect-manip.cc (vect_do_peeling): Guard against applying 
scale with 0 denominator.
---
 gcc/tree-vect-loop-manip.cc | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 1d4337eb261..db54ae69e45 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2989,10 +2989,11 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
 get lost if we scale down to 0.  */
  basic_block *bbs = get_loop_body (epilog);
  for (unsigned int i = 0; i < epilog->num_nodes; i++)
-   bbs[i]->count = bbs[i]->count.apply_scale
-(bbs[i]->count,
- bbs[i]->count.apply_probability
-   (prob_vector));
+   if (bbs[i]->count.nonzero_p ())
+ bbs[i]->count = bbs[i]->count.apply_scale
+  (bbs[i]->count,
+   bbs[i]->count.apply_probability
+ (prob_vector));
  free (bbs);
}

--
2.25.1


Re: [PATCH] c++: constexpr init of union sub-aggr w/ base [PR105491]

2022-05-06 Thread Patrick Palka via Gcc-patches
On Fri, 6 May 2022, Jason Merrill wrote:

> On 5/6/22 16:10, Patrick Palka wrote:
> > On Fri, 6 May 2022, Patrick Palka wrote:
> > 
> > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > 
> > > > On 5/6/22 14:00, Patrick Palka wrote:
> > > > > On Fri, 6 May 2022, Patrick Palka wrote:
> > > > > 
> > > > > > On Fri, 6 May 2022, Jason Merrill wrote:
> > > > > > 
> > > > > > > On 5/6/22 11:22, Patrick Palka wrote:
> > > > > > > > Here ever since r10-7313-gb599bf9d6d1e18,
> > > > > > > > reduced_constant_expression_p
> > > > > > > > in C++11/14 is rejecting the marked sub-aggregate initializer
> > > > > > > > (of type
> > > > > > > > S)
> > > > > > > > 
> > > > > > > >  W w = {.D.2445={.s={.D.2387={.m=0}, .b=0}}}
> > > > > > > > ^
> > > > > > > > 
> > > > > > > > ultimately because said initializer has CONSTRUCTOR_NO_CLEARING
> > > > > > > > set,
> > > > > > > > and
> > > > > > > > so the function proceeds to verify that all fields of S are
> > > > > > > > initialized.
> > > > > > > > And before C++17 we don't expect to see base class fields (since
> > > > > > > > next_initializable_field skips over the), so the base class
> > > > > > > > initializer
> > > > > > > > causes r_c_e_p to return false.
> > > > > > > 
> > > > > > > That seems like the primary bug.  I guess r_c_e_p shouldn't be
> > > > > > > using
> > > > > > > next_initializable_field.  Really that function should only be
> > > > > > > used for
> > > > > > > aggregates.
> > > > > > 
> > > > > > I see, I'll try replacing it in r_c_e_p.  Would that be in addition
> > > > > > to
> > > > > > or instead of the clear_no_implicit_zero approach?
> > > > > 
> > > > > I'm testing the following, which uses a custom predicate instead of
> > > > > next_initializable_field in r_c_e_p.
> > > > 
> > > > Let's make it a public predicate, not internal to r_c_e_p.  Maybe it
> > > > could be
> > > > next_subobject_field, and the current next_initializable_field change to
> > > > next_aggregate_field?
> > > 
> > > Will do.
> > > 
> > > > 
> > > > > Looks like the inner initializer {.D.2387={.m=0}, .b=0} is formed
> > > > > during
> > > > > the subobject constructor call:
> > > > > 
> > > > >V::V (&((struct S *) this)->D.2120);
> > > > > 
> > > > > after the evaluation of which, 'result' in cxx_eval_call_expression is
> > > > > NULL
> > > > > (presumably because it's a CALL_EXPR, not AGGR_INIT_EXPR?):
> > > > > 
> > > > >/* This can be null for a subobject constructor call, in
> > > > >   which case what we care about is the initialization
> > > > >   side-effects rather than the value.  We could get at the
> > > > >   value by evaluating *this, but we don't bother; there's
> > > > >   no need to put such a call in the hash table.  */
> > > > >result = lval ? ctx->object : ctx->ctor;
> > > > > 
> > > > > so we end up not calling clear_no_implicit_zero for the inner
> > > > > initializer
> > > > > directly.  We only call clear_no_implicit_zero after evaluating the
> > > > > AGGR_INIT_EXPR for outermost initializer (of type W).
> > > > 
> > > > Maybe for constructors we could call it on ctx->ctor instead of result,
> > > > or
> > > > call r_c_e_p in C++20+?
> > > 
> > > But both ctx->ctor and ->object are NULL during a subobject constructor
> > > call (since we apparently clear these fields when entering a
> > > STATEMENT_LIST):
> > > 
> > > So I tried instead obtaining the constructor by evaluating new_obj via
> > > 
> > > --- a/gcc/cp/constexpr.cc
> > > +++ b/gcc/cp/constexpr.cc
> > > @@ -2993,6 +2988,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx,
> > > tree t,
> > >in order to detect reading an unitialized object in constexpr
> > > instead
> > >of value-initializing it.  (reduced_constant_expression_p is
> > > expected to
> > >take care of clearing the flag.)  */
> > > +  if (new_obj && DECL_CONSTRUCTOR_P (fun))
> > > +result = cxx_eval_constant_expression (ctx, new_obj, /*lval=*/false,
> > > +  non_constant_p, overflow_p);
> > > if (TREE_CODE (result) == CONSTRUCTOR
> > > && (cxx_dialect < cxx20
> > >|| !DECL_CONSTRUCTOR_P (fun)))
> > > 
> > > but that seems to break e.g. g++.dg/cpp2a/constexpr-init12.C because
> > > after the subobject constructor call
> > > 
> > >S::S (&((struct W *) this)->s, NON_LVALUE_EXPR <8>);
> > > 
> > > the constructor for the subobject a.s in new_obj is still completely
> > > missing (I suppose because S::S doesn't initialize any of its members)
> > > so trying to obtain it causes us to complain too soon from
> > > cxx_eval_component_reference:
> > > 
> > > constexpr-init12.C:16:24:   in ‘constexpr’ expansion of ‘W(42)’
> > > constexpr-init12.C:10:22:   in ‘constexpr’ expansion of
> > > ‘((W*)this)->W::s.S::S(8)’
> > > constexpr-init12.C:16:24: error: accessing uninitialized member ‘W::s’
> > > 16 | constexpr auto a = W(42); // { dg-error "not a constant
> > > expression" }
> > >  

Re: [ping2][PATCH 0/8][RFC] Support BTF decl_tag and type_tag annotations

2022-05-06 Thread David Faust via Gcc-patches




On 5/5/22 16:00, Yonghong Song wrote:



On 5/4/22 10:03 AM, David Faust wrote:



On 5/3/22 15:32, Joseph Myers wrote:

On Mon, 2 May 2022, David Faust via Gcc-patches wrote:


Consider the following example:

     #define __typetag1 __attribute__((btf_type_tag("tag1")))
     #define __typetag2 __attribute__((btf_type_tag("tag2")))
     #define __typetag3 __attribute__((btf_type_tag("tag3")))

     int __typetag1 * __typetag2 __typetag3 * g;

The expected behavior is that 'g' is "a pointer with tags 'tag2' and
'tag3',
to a pointer with tag 'tag1' to an int". i.e.:


That's not a correct expectation for either GNU __attribute__ or C2x [[]]
attribute syntax.  In either syntax, __typetag2 __typetag3 should
apply to
the type to which g points, not to g or its type, just as if you had a
type qualifier there.  You'd need to put the attributes (or qualifier)
after the *, not before, to make them apply to the pointer type.  See
"Attribute Syntax" in the GCC manual for how the syntax is defined for
GNU
attributes and deduce in turn, for each subsequence of the tokens
matching
the syntax for some kind of declarator, what the type for "T D1" would be
as defined there and in the C standard, as deduced from the type for
"T D"
for a sub-declarator D.
  >> But GCC's attribute parsing produces a variable 'g' which is "a

pointer with

tag 'tag1' to a pointer with tags 'tag2' and 'tag3' to an int", i.e.


In GNU syntax, __typetag1 applies to the declaration, whereas in C2x
syntax it applies to int.  Again, if you wanted it to apply to the
pointer
type it would need to go after the * not before.

If you are concerned with the fine details of what construct an attribute
appertains to, I recommend using C2x syntax not GNU syntax.



Joseph, thank you! This is very helpful. My understanding of the syntax
was not correct.

(Actually, I made a bad mistake in paraphrasing this example from the
discussion of it in the series cover letter. But, the reason why it is
incorrect is the same.)


Yonghong, is the specific ordering an expectation in BPF programs or
other users of the tags?


This is probably a language writing issue. We are saying tags only
apply to pointer. We probably should say it only apply to pointee.

$ cat t.c
int const *ptr;

the llvm ir debuginfo:

!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64)
!6 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !7)
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)

We could replace 'const' with a tag like below:

int __attribute__((btf_type_tag("tag"))) *ptr;

!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64,
annotations: !7)
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!7 = !{!8}
!8 = !{!"btf_type_tag", !"tag"}

In the above IR, we generate annotations to pointer_type because
we didn't invent a new DI type for encode btf_type_tag. But it is
totally okay to have IR looks like

!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
!11 = !DIBtfTypeTagType(..., baseType: !6, name: !"Tag")
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)


OK, thanks.

There is still the question of why the DWARF generated for this case 
that I have been concerned about:


  int __typetag1 * __typetag2 __typetag3 * g;

differs between GCC (with this series) and clang. After studying it, GCC 
is doing with the attributes exactly as is described in the Attribute 
Syntax portion of the GCC manual where the GNU syntax is described. I do 
not think there is any problem here.


So the difference in DWARF suggests to me that clang is not handling the 
GNU attribute syntax in this particular case correctly, since it seems 
to be associating __typetag2 and __typetag3 to g's type rather than the 
type to which it points.


I am not sure whether for the use purposes of the tags this difference 
is very important, but it is worth noting.



As Joseph suggested, it may be better to encourage users of these tags 
to use the C2x attribute syntax if they are concerned with precisely 
which construct the tag applies.


This would also be a way around any issues in handling the attributes 
due to the GNU syntax.


I tried a few test cases using C2x syntax BTF type tags with a clang-15 
build, but ran into some issues (in particular, some of the tag 
attributes being ignored altogether). I couldn't find confirmation 
whether C2x attribute syntax is fully supported in clang yet, so maybe 
this isn't expected to work. Do you know whether the C2x syntax is fully 
supported in clang yet?






This example comes from my testing against clang to check that the BTF
generated by both toolchains is compatible. In this case we get
different results when using the GNU attribute syntax.


To avoid confusion, here is the full example (from the cover letter).
The difference in the results is clear in the DWARF.


Consider the following example:

   #define __typetag1 __attribute__((btf_type_tag("type-tag-1")))
 

[committed 2/2] libstdc++: Simplify std::normal_distribution equality operator

2022-05-06 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.

-- >8 --

libstdc++-v3/ChangeLog:

* include/bits/random.tcc (operator==): Only check
normal_distribution::_M_saved_available once.
* testsuite/26_numerics/random/normal_distribution/operators/equal.cc:
Check equality after state changes.
* testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error
lineno.
---
 libstdc++-v3/include/bits/random.tcc  | 10 +-
 .../normal_distribution/operators/equal.cc| 20 +++
 .../26_numerics/random/pr60037-neg.cc |  2 +-
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 87a16a21336..cb1d3675783 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -1907,15 +1907,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
   if (__d1._M_param == __d2._M_param
  && __d1._M_saved_available == __d2._M_saved_available)
-   {
- if (__d1._M_saved_available
- && __d1._M_saved == __d2._M_saved)
-   return true;
- else if(!__d1._M_saved_available)
-   return true;
- else
-   return false;
-   }
+   return __d1._M_saved_available ? __d1._M_saved == __d2._M_saved : true;
   else
return false;
 }
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/equal.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/equal.cc
index a3435232961..81534e95797 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/equal.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/equal.cc
@@ -34,8 +34,28 @@ test01()
   VERIFY( !(u == v) );
 }
 
+void
+test02()
+{
+  std::normal_distribution u(5.0, 2.0), v(u);
+  VERIFY( u == v );
+  u.reset();
+  VERIFY( u == v );
+
+  std::minstd_rand0 g1, g2;
+  (void) u(g1); // u._M_saved_available = true
+  VERIFY( !(u == v) );
+  (void) v(g2); // v._M_saved_available = true
+  VERIFY( u == v );
+  u.reset();// u._M_saved_available = false
+  VERIFY( !(u == v) );
+  v.reset();// v._M_saved_available = false
+  VERIFY( u == v );
+}
+
 int main()
 {
   test01();
+  test02();
   return 0;
 }
diff --git a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc 
b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
index 3ab9c44232e..c58f480640f 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
@@ -12,4 +12,4 @@ auto x = std::generate_canonical

[committed 1/2] libstdc++: Fix deserialization for std::normal_distribution [PR105502]

2022-05-06 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk. Backports to all branches
needed.

-- >8 --

This fixes a regression in std::normal_distribution deserialization that
caused the object to be left unchanged if the __state_avail value read
from the stream was false.

libstdc++-v3/ChangeLog:

PR libstdc++/105502
* include/bits/random.tcc
(operator>>(basic_istream&, normal_distribution&)):
Update state when __state_avail is false.
* 
testsuite/26_numerics/random/normal_distribution/operators/serialize.cc:
Check that deserialized object equals serialized one.
---
 libstdc++-v3/include/bits/random.tcc  |  2 +-
 .../operators/serialize.cc| 36 ++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 6c72e991007..87a16a21336 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -1961,7 +1961,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   bool __saved_avail;
   if (__is >> __mean >> __stddev >> __saved_avail)
{
- if (__saved_avail && (__is >> __x._M_saved))
+ if (!__saved_avail || (__is >> __x._M_saved))
{
  __x._M_saved_available = __saved_avail;
  __x.param(param_type(__mean, __stddev));
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/serialize.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/serialize.cc
index 9d8f8278fb3..d4f9f374643 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/serialize.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/normal_distribution/operators/serialize.cc
@@ -25,6 +25,7 @@
 
 #include 
 #include 
+#include 
 
 void
 test01()
@@ -37,10 +38,43 @@ test01()
   str << u;
 
   str >> v;
+  VERIFY( u == v );
+}
+
+void
+test_pr105502()
+{
+  // PR libstdc++/105502 std::normal_distribution deserialization issue
+  std::stringstream str;
+  std::normal_distribution<> d{1, 2}, d2;
+  std::minstd_rand0 g;
+  str << d;
+  VERIFY( str );
+  str >> d2;
+  VERIFY( str );
+  VERIFY( d == d2 );
+
+  (void) d(g); // sets d._M_saved_available = true
+  str.str("");
+  str.clear();
+  str << d;
+  VERIFY( str );
+  str >> d2;
+  VERIFY( str );
+  VERIFY( d == d2 );
+
+  (void) d(g); // sets d._M_saved_available = false
+  str.str("");
+  str.clear();
+  str << d;
+  VERIFY( str );
+  str >> d2;
+  VERIFY( str );
+  VERIFY( d == d2 );
 }
 
 int main()
 {
   test01();
-  return 0;
+  test_pr105502();
 }
-- 
2.34.1



Go patch committed: Remove Array_index_expression::is_lvalue_

2022-05-06 Thread Ian Lance Taylor via Gcc-patches
This patch to the Go fronend removes the is_lvalue_ field from
Array_index_expression.  As of https://go.dev/cl/77510
(https://gcc.gnu.org/pipermail/gcc-patches/2017-November/487697.html)
it is never used.  Bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
55e0ea53ff782a6bf2ded7bed922388928dcf456
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 4559551ab7b..3ec315f6892 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-fbadca004b1e09db177c8e071706841038d1dd64
+6a33e7e30c89edc12340dc470b44791bb1066feb
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index dce48e03bf4..734ecb9492e 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -7671,8 +7671,7 @@ Expression::comparison(Translate_context* context, Type* 
result_type,
  && left_type->array_type()->length() == NULL)
{
  Array_type* at = left_type->array_type();
-  bool is_lvalue = false;
-  left = at->get_value_pointer(context->gogo(), left, is_lvalue);
+  left = at->get_value_pointer(context->gogo(), left);
}
   else if (left_type->interface_type() != NULL)
{
@@ -9276,7 +9275,7 @@ Builtin_call_expression::flatten_append(Gogo* gogo, 
Named_object* function,
   Type* unsafe_ptr_type = Type::make_pointer_type(Type::make_void_type());
   Expression* a1 = Expression::make_type_descriptor(element_type, loc);
   Expression* a2 = Expression::make_temporary_reference(s1tmp, loc);
-  a2 = slice_type->array_type()->get_value_pointer(gogo, a2, false);
+  a2 = slice_type->array_type()->get_value_pointer(gogo, a2);
   a2 = Expression::make_cast(unsafe_ptr_type, a2, loc);
   Expression* a3 = Expression::make_temporary_reference(l1tmp, loc);
   Expression* a4 = Expression::make_temporary_reference(c1tmp, loc);
@@ -13848,9 +13847,8 @@ 
Array_index_expression::do_get_backend(Translate_context* context)
}
   else
{
- Expression* valptr =
-  array_type->get_value_pointer(gogo, this->array_,
-this->is_lvalue_);
+ Expression* valptr = array_type->get_value_pointer(gogo,
+this->array_);
  Bexpression* ptr = valptr->get_backend(context);
   ptr = gogo->backend()->pointer_offset_expression(ptr, start, loc);
 
@@ -13891,8 +13889,7 @@ 
Array_index_expression::do_get_backend(Translate_context* context)
   Bexpression* offset = gogo->backend()->conditional_expression(bfn, int_btype,
cond, zero,
start, loc);
-  Expression* valptr = array_type->get_value_pointer(gogo, this->array_,
- this->is_lvalue_);
+  Expression* valptr = array_type->get_value_pointer(gogo, this->array_);
   Bexpression* val = valptr->get_backend(context);
   val = gogo->backend()->pointer_offset_expression(val, offset, loc);
 
diff --git a/gcc/go/gofrontend/expressions.h b/gcc/go/gofrontend/expressions.h
index 92e8d8d96b4..707c19336d8 100644
--- a/gcc/go/gofrontend/expressions.h
+++ b/gcc/go/gofrontend/expressions.h
@@ -3055,7 +3055,7 @@ class Array_index_expression : public Expression
 Expression* end, Expression* cap, Location location)
 : Expression(EXPRESSION_ARRAY_INDEX, location),
   array_(array), start_(start), end_(end), cap_(cap), type_(NULL),
-  is_lvalue_(false), needs_bounds_check_(true), is_flattened_(false)
+  needs_bounds_check_(true), is_flattened_(false)
   { }
 
   // Return the array.
@@ -3087,18 +3087,6 @@ class Array_index_expression : public Expression
   end() const
   { return this->end_; }
 
-  // Return whether this array index expression appears in an lvalue
-  // (left hand side of assignment) context.
-  bool
-  is_lvalue() const
-  { return this->is_lvalue_; }
-
-  // Update this array index expression to indicate that it appears
-  // in a left-hand-side or lvalue context.
-  void
-  set_is_lvalue()
-  { this->is_lvalue_ = true; }
-
   void
   set_needs_bounds_check(bool b)
   { this->needs_bounds_check_ = b; }
@@ -3174,8 +3162,6 @@ class Array_index_expression : public Expression
   Expression* cap_;
   // The type of the expression.
   Type* type_;
-  // Whether expr appears in an lvalue context.
-  bool is_lvalue_;
   // Whether bounds check is needed.
   bool needs_bounds_check_;
   // Whether this has already been flattened.
diff --git a/gcc/go/gofrontend/types.cc b/gcc/go/gofrontend/types.cc
index 3de0bd3ae61..ef656705037 100644
--- a/gcc/go/gofrontend/types.cc
+++ b/gcc/go/gofrontend/types.cc
@@ -7815,7 +7815,7 @@ Array_type::finish_backend_element(Gogo* gogo)

RE: [PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

2022-05-06 Thread Jiang, Haochen via Gcc-patches
Hi Uros,

I understand that we always keep the old testcases there. It is always safe to 
do that.

But I have another question, if we add something new in one of the existing 
files in the future,
should we use __builtin_cpu_supports to keep the code clearer or stick to 
cpuids?

I believe __builtin_cpu_supports will be a clearer way for a coder to 
understand under current circumstance.
So if we use that in future use, why don't we change everything to the same way?

BRs,
Haochen 

-Original Message-
From: Uros Bizjak  
Sent: Friday, May 6, 2022 5:17 PM
To: Hongyu Wang 
Cc: Jiang, Haochen ; Liu, Hongtao 
; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] Reconstruct i386 testsuite with __builtin_cpu_supports

On Fri, May 6, 2022 at 11:00 AM Hongyu Wang  wrote:
>
> > I don't think *_os_support calls should be removed. IIRC,
> > __builtin_cpu_supports function checks if the feature is supported by
> > CPU, whereas *_os_supports calls check via xgetbv if OS supports
> > handling of new registers.
>
> avx_os_support is like
>
> avx_os_support (void)
> {
>   unsigned int eax, edx;
>   unsigned int ecx = XCR_XFEATURE_ENABLED_MASK;
>
>   __asm__ ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (ecx));
>
>   return (eax & (XSTATE_SSE | XSTATE_YMM)) == (XSTATE_SSE | XSTATE_YMM);
> }
>
> While in get_avaliable_features we have
>
> #define XCR_AVX_ENABLED_MASK \
>   (XSTATE_SSE | XSTATE_YMM)
>   if ((ecx & bit_OSXSAVE))
> {
>   /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
> ZMM16-ZMM31 states are supported by OSXSAVE.  */
>   unsigned int xcrlow;
>   unsigned int xcrhigh;
>   __asm__ (".byte 0x0f, 0x01, 0xd0" /* xgetbv  */
>: "=a" (xcrlow), "=d" (xcrhigh)
>: "c" (XCR_XFEATURE_ENABLED_MASK));
>   if ((xcrlow & XCR_AVX_ENABLED_MASK) == XCR_AVX_ENABLED_MASK)
> {
>   avx_usable = 1;
>
> So __builtin_cpu_supports already inherits same check

Indeed, thanks for the explanation.

OTOH, we don't change the existing tests (perhaps only dg- directives
when infrastructure improves), so I would leave the existing testcases
as they are. In future, new helper functions should be implemented
with __builtin_cpu_supports, but let's leave existing ones as they
are.

Uros.

> Uros Bizjak via Gcc-patches  于2022年5月6日周五 16:27写道:
> >
> > On Fri, May 6, 2022 at 9:57 AM Haochen Jiang  
> > wrote:
> > >
> > > Hi all,
> > >
> > > There are some check files in i386 testsuite are written before the 
> > > function __builtin_cpu_supports is introduced. All of them are using 
> > > __get_cpuid_count. This patch aims to reconstruct the i386 testsuite with 
> > > __builtin_cpu_supports so that we can have a much clearer code.
> > >
> > > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> >
> > I don't think *_os_support calls should be removed. IIRC,
> > __builtin_cpu_supports function checks if the feature is supported by
> > CPU, whereas *_os_supports calls check via xgetbv if OS supports
> > handling of new registers.
> >
> > Uros.
> >
> > >
> > > Also when writting this patch, I also find some files in testsuite that 
> > > might be useless currently. For example, in the file 
> > > gcc/testsuite/gcc.target/i386/sse-os-support.h, it always return 1. And 
> > > there are also some files will no longer be included at all with this 
> > > patch. Should we remove those files when we have time?
> > >
> > > BRs,
> > > Haochen
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/adx-check.h: Change bit check to
> > > __builtin_cpu_supports.
> > > * gcc.target/i386/aes-avx-check.h: Ditto.
> > > * gcc.target/i386/aes-check.h: Ditto.
> > > * gcc.target/i386/avx-check.h: Ditto.
> > > * gcc.target/i386/avx2-check.h: Ditto.
> > > * gcc.target/i386/avx512-check.h: Ditto.
> > > * gcc.target/i386/bmi-check.h: Ditto.
> > > * gcc.target/i386/bmi2-check.h: Ditto.
> > > * gcc.target/i386/f16c-check.h: Ditto.
> > > * gcc.target/i386/fma-check.h: Ditto.
> > > * gcc.target/i386/fma4-check.h: Ditto.
> > > * gcc.target/i386/lzcnt-check.h: Ditto.
> > > * gcc.target/i386/mmx-3dnow-check.h: Ditto.
> > > * gcc.target/i386/mmx-check.h: Ditto.
> > > * gcc.target/i386/pclmul-avx-check.h: Ditto.
> > > * gcc.target/i386/pclmul-check.h: Ditto.
> > > * gcc.target/i386/rtm-check.h: Ditto.
> > > * gcc.target/i386/sha-check.h: Ditto.
> > > * gcc.target/i386/sse-check.h: Ditto.
> > > * gcc.target/i386/sse2-check.h: Ditto.
> > > * gcc.target/i386/sse3-check.h: Ditto.
> > > * gcc.target/i386/sse4_1-check.h: Ditto.
> > > * gcc.target/i386/sse4_2-check.h: Ditto.
> > > * gcc.target/i386/sse4a-check.h: Ditto.
> > > * gcc.target/i386/ssse3-check.h: Ditto.
> > > * gcc.target/i386/xop-check.h: Ditto.
> > > ---
> > >  gcc/testsuite/gcc.target/i386/adx-check.h | 10 +---
> > >  gcc/testsuite/gcc.target/i386/aes-a

RE: [PATCH] [i386]Add combine splitter to transform pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.

2022-05-06 Thread Jiang, Haochen via Gcc-patches
> -Original Message-
> From: Hongyu Wang 
> Sent: Friday, May 6, 2022 4:50 PM
> To: Jiang, Haochen 
> Cc: GCC Patches ; Liu, Hongtao
> 
> Subject: Re: [PATCH] [i386]Add combine splitter to transform
> pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> 
> > +(define_split
> > +  [(set (reg:CCZ FLAGS_REG)
> > +   (compare:CCZ (unspec:SI
> > +   [(eq:VI1_AVX2
> > +   (match_operand:VI1_AVX2 0 "vector_operand")
> > +   (match_operand:VI1_AVX2 1 "const0_operand"))]
> > +   UNSPEC_MOVMSK)
> > +(match_operand 2 "const_int_operand")))]
> > +  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)
> 
> It looks like set_src and set_dst are all CCZmode, do we really need
> ix86_match_ccmode?
> 
> > +  && (INTVAL (operands[2]) == (int) ())"
> 
> I think (int) convert is not needed for const, and INTVAL actually
> returns HOST_WIDE_INT

It should be int convert here, because we need 0xfff become -1 in this 
compare.

Haochen.

> 
> > +#include 
> > +
> > +bool is_zero(__m128i x)
> 
> bool is not necessary here, we can use int and drop stdbool.
> 
> Haochen Jiang via Gcc-patches  于2022年5月6
> 日周五 16:01写道:
> >
> > Hi all,
> >
> > This patch aims to add a combine splitter to transform
> pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> >
> > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> >
> > BRs,
> > Haochen
> >
> > gcc/ChangeLog:
> >
> > PR target/104371
> > * config/i386/sse.md: Add new define_mode_attr and define_split.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/104371
> > * gcc.target/i386/pr104371-1.c: New test.
> > * gcc.target/i386/pr104371-2.c: Ditto.
> > ---
> >  gcc/config/i386/sse.md | 19 +++
> >  gcc/testsuite/gcc.target/i386/pr104371-1.c | 14 ++
> >  gcc/testsuite/gcc.target/i386/pr104371-2.c | 14 ++
> >  3 files changed, 47 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr104371-1.c
> >  create mode 100755 gcc/testsuite/gcc.target/i386/pr104371-2.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 7b791def542..71afda73c8f 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -20083,6 +20083,25 @@
> > (set_attr "prefix" "maybe_vex")
> > (set_attr "mode" "SI")])
> >
> > +;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> > +(define_mode_attr vi1avx2const
> > +  [(V32QI "0x") (V16QI "0x")])
> > +
> > +(define_split
> > +  [(set (reg:CCZ FLAGS_REG)
> > +   (compare:CCZ (unspec:SI
> > +   [(eq:VI1_AVX2
> > +   (match_operand:VI1_AVX2 0 "vector_operand")
> > +   (match_operand:VI1_AVX2 1 "const0_operand"))]
> > +   UNSPEC_MOVMSK)
> > +(match_operand 2 "const_int_operand")))]
> > +  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)
> > +  && (INTVAL (operands[2]) == (int) ())"
> > +  [(set (reg:CC FLAGS_REG)
> > +   (unspec:CC [(match_dup 0)
> > +   (match_dup 0)]
> > +  UNSPEC_PTEST))])
> > +
> >  (define_expand "sse2_maskmovdqu"
> >[(set (match_operand:V16QI 0 "memory_operand")
> > (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
> > diff --git a/gcc/testsuite/gcc.target/i386/pr104371-1.c
> b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> > new file mode 100644
> > index 000..df7c0b074e3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -msse4" } */
> > +/* { dg-final { scan-assembler "ptest\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "pxor\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "pcmpeqb\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "pmovmskb\[ \\t\]" } } */
> > +
> > +#include 
> > +#include 
> > +
> > +bool is_zero(__m128i x)
> > +{
> > +  return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128()))
> == 0x;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr104371-2.c
> b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> > new file mode 100755
> > index 000..f0d0afd5897
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx2" } */
> > +/* { dg-final { scan-assembler "vptest\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "vpxor\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "vpcmpeqb\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "vpmovmskb\[ \\t\]" } } */
> > +
> > +#include 
> > +#include 
> > +
> > +bool is_zero256(__m256i x)
> > +{
> > +  return _mm256_movemask_epi8(_mm256_cmpeq_epi8(x,
> _mm256_setzero_si256())) == 0x;
> > +}
> > --
> > 2.18.1
> >


RE: [PATCH] [i386]Add combine splitter to transform pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.

2022-05-06 Thread Jiang, Haochen via Gcc-patches


> -Original Message-
> From: Uros Bizjak 
> Sent: Friday, May 6, 2022 4:59 PM
> To: Jiang, Haochen 
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao 
> Subject: Re: [PATCH] [i386]Add combine splitter to transform
> pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> 
> On Fri, May 6, 2022 at 10:01 AM Haochen Jiang 
> wrote:
> >
> > Hi all,
> >
> > This patch aims to add a combine splitter to transform 
> > pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> >
> > Regtested on x86_64-pc-linux-gnu. Ok for trunk?
> >
> > BRs,
> > Haochen
> >
> > gcc/ChangeLog:
> >
> > PR target/104371
> > * config/i386/sse.md: Add new define_mode_attr and define_split.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/104371
> > * gcc.target/i386/pr104371-1.c: New test.
> > * gcc.target/i386/pr104371-2.c: Ditto.
> > ---
> >  gcc/config/i386/sse.md | 19 +++
> >  gcc/testsuite/gcc.target/i386/pr104371-1.c | 14 ++
> > gcc/testsuite/gcc.target/i386/pr104371-2.c | 14 ++
> >  3 files changed, 47 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr104371-1.c
> >  create mode 100755 gcc/testsuite/gcc.target/i386/pr104371-2.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
> > 7b791def542..71afda73c8f 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -20083,6 +20083,25 @@
> > (set_attr "prefix" "maybe_vex")
> > (set_attr "mode" "SI")])
> >
> > +;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0x to ptest.
> > +(define_mode_attr vi1avx2const
> > +  [(V32QI "0x") (V16QI "0x")])
> > +
> > +(define_split
> > +  [(set (reg:CCZ FLAGS_REG)
> > +   (compare:CCZ (unspec:SI
> > +   [(eq:VI1_AVX2
> > +   (match_operand:VI1_AVX2 0 "vector_operand")
> > +   (match_operand:VI1_AVX2 1 "const0_operand"))]
> > +   UNSPEC_MOVMSK)
> > +(match_operand 2 "const_int_operand")))]
> > +  "TARGET_SSE4_1 && ix86_match_ccmode (insn, CCmode)
> 
> No need to use ix86_match_ccmode here, the pattern is already limited to
> CCZmode,
> 
> Uros.
> 

Removed this condition in my new patch, also make the testcase change according 
to
Hongyu's review.

Is the patch Ok for trunk?

Haochen

> > +  && (INTVAL (operands[2]) == (int) ())"
> > +  [(set (reg:CC FLAGS_REG)
> > +   (unspec:CC [(match_dup 0)
> > +   (match_dup 0)]
> > +  UNSPEC_PTEST))])
> > +
> >  (define_expand "sse2_maskmovdqu"
> >[(set (match_operand:V16QI 0 "memory_operand")
> > (unspec:V16QI [(match_operand:V16QI 1 "register_operand") diff
> > --git a/gcc/testsuite/gcc.target/i386/pr104371-1.c
> > b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> > new file mode 100644
> > index 000..df7c0b074e3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr104371-1.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -msse4" } */
> > +/* { dg-final { scan-assembler "ptest\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "pxor\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "pcmpeqb\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "pmovmskb\[ \\t\]" } } */
> > +
> > +#include 
> > +#include 
> > +
> > +bool is_zero(__m128i x)
> > +{
> > +  return _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128()))
> ==
> > +0x; }
> > diff --git a/gcc/testsuite/gcc.target/i386/pr104371-2.c
> > b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> > new file mode 100755
> > index 000..f0d0afd5897
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr104371-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx2" } */
> > +/* { dg-final { scan-assembler "vptest\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "vpxor\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "vpcmpeqb\[ \\t\]" } } */
> > +/* { dg-final { scan-assembler-not "vpmovmskb\[ \\t\]" } } */
> > +
> > +#include 
> > +#include 
> > +
> > +bool is_zero256(__m256i x)
> > +{
> > +  return _mm256_movemask_epi8(_mm256_cmpeq_epi8(x,
> > +_mm256_setzero_si256())) == 0x; }
> > --
> > 2.18.1
> >


0001-i386-Add-combine-splitter-to-transform-pxor-pcmpeqb-.patch
Description: 0001-i386-Add-combine-splitter-to-transform-pxor-pcmpeqb-.patch


Re: [PATCH] PR105169 Fix references to discarded sections

2022-05-06 Thread Giuliano Belinassi via Gcc-patches
Hi,

On Tue, 2022-04-19 at 10:11 +0200, Richard Biener wrote:
> On Thu, 14 Apr 2022, Giuliano Belinassi wrote:
> 
> > When -fpatchable-function-entry= is enabled, certain C++ codes
> > fails to
> > link because of generated references to discarded sections in
> > __patchable_function_entry section. This commit fixes this problem
> > by
> > puting those references in a COMDAT section.
> > 
> > Boostrapped and regtested on x86_64 linux.
> > 
> > OK for Stage4?
> > 
> > 2022-04-13  Giuliano Belinassi  
> > 
> > PR c++/105169
> > * targhooks.cc (default_print_patchable_function_entry_1):
> > Handle COMDAT case.
> > * varasm.cc (handle_vtv_comdat_section): Rename to...
> > (switch_to_comdat_section): Generalize to also cover
> > __patchable_function_entry case.
> > (assemble_variable): Rename call from handle_vtv_comdat_section
> > to
> > switch_to_comdat_section.
> > (output_object_block): Same as above.
> > * varasm.h: Declare switch_to_comdat_section.
> > 
> > 2022-04-13  Giuliano Belinassi  
> > 
> > PR c++/105169
> > * g++.dg/modules/pr105169.h: New file.
> > * g++.dg/modules/pr105169_a.C: New test.
> > * g++.dg/modules/pr105169_b.C: New file.
> > 
> > Signed-off-by: Giuliano Belinassi 
> > ---
> >  gcc/targhooks.cc  |  8 ++--
> >  gcc/testsuite/ChangeLog   |  7 +++
> >  gcc/testsuite/g++.dg/modules/pr105169.h   | 22
> > 
> >  gcc/testsuite/g++.dg/modules/pr105169_a.C | 25
> > +++
> >  gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++
> >  gcc/varasm.cc | 25 +
> > --
> >  gcc/varasm.h  |  1 +
> >  7 files changed, 87 insertions(+), 13 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C
> >  create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C
> > 
> > diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> > index e22bc66a6c8..540460e7db9 100644
> > --- a/gcc/targhooks.cc
> > +++ b/gcc/targhooks.cc
> > @@ -1995,8 +1995,12 @@ default_print_patchable_function_entry_1
> > (FILE *file,
> >patch_area_number++;
> >ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE",
> > patch_area_number);
> >  
> > -  switch_to_section (get_section
> > ("__patchable_function_entries",
> > - flags, current_function_decl));
> > +  section *sect = get_section ("__patchable_function_entries",
> > + flags, current_function_decl);
> > +  if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP
> > (current_function_decl))
> > +   switch_to_comdat_section (sect, current_function_decl);
> 
> You are passing a decl here, but ...
> 
> > +  else
> > +   switch_to_section (sect);
> >assemble_align (POINTER_SIZE);
> >fputs (asm_op, file);
> >assemble_name_raw (file, buf);
> > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> > index 9ab7a178bf8..524a546a832 100644
> > --- a/gcc/testsuite/ChangeLog
> > +++ b/gcc/testsuite/ChangeLog
> > @@ -1,3 +1,10 @@
> > +2022-04-13  Giuliano Belinassi  
> > +
> > +   PR c++/105169
> > +   * g++.dg/modules/pr105169.h: New file.
> > +   * g++.dg/modules/pr105169_a.C: New test.
> > +   * g++.dg/modules/pr105169_b.C: New file.
> > +
> >  2022-04-12  Antoni Boucher  
> >  
> > PR jit/104293
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h
> > b/gcc/testsuite/g++.dg/modules/pr105169.h
> > new file mode 100644
> > index 000..a7e76270531
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169.h
> > @@ -0,0 +1,22 @@
> > +class IPXAddressClass
> > +{
> > +public:
> > +IPXAddressClass(void);
> > +};
> > +
> > +class WinsockInterfaceClass
> > +{
> > +
> > +public:
> > +WinsockInterfaceClass(void);
> > +
> > +virtual void Set_Broadcast_Address(void*){};
> > +
> > +virtual int Get_Protocol(void)
> > +{
> > +return 0;
> > +};
> > +
> > +protected:
> > +};
> > +
> > diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > new file mode 100644
> > index 000..66dc4b7901f
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C
> > @@ -0,0 +1,25 @@
> > +/* { dg-module-do link } */
> > +/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */
> > +/* { dg-additional-options "-std=c++11 -fpatchable-function-
> > entry=1 -O2" } */
> > +
> > +/* This test is in the "modules" package because it supports
> > multiple files
> > +   linkage.  */
> > +
> > +#include "pr105169.h"
> > +
> > +WinsockInterfaceClass* PacketTransport;
> > +
> > +IPXAddressClass::IPXAddressClass(void)
> > +{
> > +}
> > +
> > +int function()
> > +{
> > +  return PacketTransport->Get_Protocol();
> > +}
> > +
> > +int main()
> > +{
> > +  IPXAddressClass ipxaddr;
> > +  return 0;
> > +}
> > diff --git 

[PATCH] Expand __builtin_memcmp_eq with ptest for OImode.

2022-05-06 Thread liuhongt via Gcc-patches
This is adjusted patch only for OImode.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

PR target/104610
* config/i386/i386-expand.cc (ix86_expand_branch): Use ptest
for QImode when code is EQ or NE.
* config/i386/sse.md (cbranch4): Extend to OImode.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr104610.c: New test.
---
 gcc/config/i386/i386-expand.cc   | 10 +-
 gcc/config/i386/sse.md   |  8 ++--
 gcc/testsuite/gcc.target/i386/pr104610.c | 15 +++
 3 files changed, 30 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104610.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index bc806ffa283..c2f8776102c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2267,11 +2267,19 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx 
op1, rtx label)
 
   /* Handle special case - vector comparsion with boolean result, transform
  it using ptest instruction.  */
-  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+  || (mode == OImode && (code == EQ || code == NE)))
 {
   rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
   machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
 
+  if (mode == OImode)
+   {
+ op0 = lowpart_subreg (p_mode, force_reg (mode, op0), mode);
+ op1 = lowpart_subreg (p_mode, force_reg (mode, op1), mode);
+ mode = p_mode;
+   }
+
   gcc_assert (code == EQ || code == NE);
   /* Generate XOR since we can't check that one operand is zero vector.  */
   tmp = gen_reg_rtx (mode);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7b791def542..9514b8e0234 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -26034,10 +26034,14 @@ (define_expand "maskstore"
  (match_operand: 2 "register_operand")))]
   "TARGET_AVX512BW")
 
+(define_mode_iterator VI48_OI_AVX
+  [(V8SI "TARGET_AVX") (V4DI "TARGET_AVX") (OI "TARGET_AVX")
+   V4SI V2DI])
+
 (define_expand "cbranch4"
   [(set (reg:CC FLAGS_REG)
-   (compare:CC (match_operand:VI48_AVX 1 "register_operand")
-   (match_operand:VI48_AVX 2 "nonimmediate_operand")))
+   (compare:CC (match_operand:VI48_OI_AVX 1 "register_operand")
+   (match_operand:VI48_OI_AVX 2 "nonimmediate_operand")))
(set (pc) (if_then_else
   (match_operator 0 "bt_comparison_operator"
[(reg:CC FLAGS_REG) (const_int 0)])
diff --git a/gcc/testsuite/gcc.target/i386/pr104610.c 
b/gcc/testsuite/gcc.target/i386/pr104610.c
new file mode 100644
index 000..00866238bd7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104610.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmove-max=256 -mstore-max=256" } */
+/* { dg-final { scan-assembler-times {(?n)vptest.*ymm} 1 } } */
+/* { dg-final { scan-assembler-times {sete} 1 } } */
+/* { dg-final { scan-assembler-not {(?n)je.*L[0-9]} } } */
+/* { dg-final { scan-assembler-not {(?n)jne.*L[0-9]} } } */
+
+
+#include
+__attribute__((target("avx")))
+bool f256(char *a)
+{
+  char t[] = "0123456789012345678901234567890";
+  return __builtin_memcmp(a, &t[0], sizeof(t)) == 0;
+}
-- 
2.18.1