[gcc r15-1849] MIPS/testsuite: Fix umips-save-restore-1.c

2024-07-05 Thread YunQiang Su via Gcc-cvs
https://gcc.gnu.org/g:f1437b96029d78e72bd987997f5303e29ebbb9f0

commit r15-1849-gf1437b96029d78e72bd987997f5303e29ebbb9f0
Author: YunQiang Su 
Date:   Fri Jun 28 16:11:35 2024 +0800

MIPS/testsuite: Fix umips-save-restore-1.c

With some recent optimization, -O1/-O2/-O3 can archive almost same
performace/size by stack load/store.  Thus lwm/swm will save/store
less callee-saved register.  In fact only $16 is saved with swm.

To be sure that this optimization does exist, let's add 2 more
function calls.  So that lwm/swm can be much more profitable.

If we add only once more, -O1 will still use stack load/store.

gcc/testsuite
* gcc.target/mips/umips-save-restore-1.c: Be sure lwm/swm
are used for more callee-saved registers with addtional
2 more function calls.

Diff:
---
 gcc/testsuite/gcc.target/mips/umips-save-restore-1.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/mips/umips-save-restore-1.c 
b/gcc/testsuite/gcc.target/mips/umips-save-restore-1.c
index ff1ea4b339a..0e2c4dcc844 100644
--- a/gcc/testsuite/gcc.target/mips/umips-save-restore-1.c
+++ b/gcc/testsuite/gcc.target/mips/umips-save-restore-1.c
@@ -7,12 +7,14 @@ int bar (int, int, int, int, int);
 MICROMIPS int
 foo (int n, int a, int b, int c, int d)
 {
-  int i, j;
+  int i, j, k, l;
 
   i = bar (n, a, b, c, d);
   j = bar (n, a, b, c, d);
-  return i + j;
+  k = bar (n, a, b, c, d);
+  l = bar (n, a, b, c, d);
+  return i + j + k + l;
 }
 
-/* { dg-final { scan-assembler "\tswm\t\\\$16-\\\$2(0|1),\\\$31" } } */
-/* { dg-final { scan-assembler "\tlwm\t\\\$16-\\\$2(0|1),\\\$31" } } */
+/* { dg-final { scan-assembler "\tswm\t\\\$16-\\\$2(2|3),\\\$31" } } */
+/* { dg-final { scan-assembler "\tlwm\t\\\$16-\\\$2(2|3),\\\$31" } } */


[gcc r15-1850] MIPS/testsuite: Add -mfpxx to call-clobbered-1.c

2024-07-05 Thread YunQiang Su via Gcc-cvs
https://gcc.gnu.org/g:e08ed5f1c98ea8de086f5c2d1e373aec6e195735

commit r15-1850-ge08ed5f1c98ea8de086f5c2d1e373aec6e195735
Author: YunQiang Su 
Date:   Fri Jun 28 10:08:38 2024 +0800

MIPS/testsuite: Add -mfpxx to call-clobbered-1.c

The scan-assembler-times rules only fit for -mfp32 and -mfpxx.
It fails if we are configured as FP64 by default, as it has
one less sdc1/ldc1 pair.

gcc/testsuite
* gcc.target/mips/call-clobbered-1.c: Add -mfpxx.

Diff:
---
 gcc/testsuite/gcc.target/mips/call-clobbered-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-1.c 
b/gcc/testsuite/gcc.target/mips/call-clobbered-1.c
index 77294aa3c2d..8880ad13684 100644
--- a/gcc/testsuite/gcc.target/mips/call-clobbered-1.c
+++ b/gcc/testsuite/gcc.target/mips/call-clobbered-1.c
@@ -1,6 +1,6 @@
 /* Check that we handle call-clobbered FPRs correctly.  */
 /* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
-/* { dg-options "isa>=2 -mabi=32 -mhard-float -ffixed-f0 -ffixed-f1 -ffixed-f2 
-ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 -ffixed-f9 
-ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 -ffixed-f15 
-ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19" } */
+/* { dg-options "isa>=2 -mabi=32 -mfpxx -mhard-float -ffixed-f0 -ffixed-f1 
-ffixed-f2 -ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 
-ffixed-f9 -ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 
-ffixed-f15 -ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19" } */
 
 void bar (void);
 double a;


[gcc r15-1851] Testsuite/MIPS: Fix msa.c: test7_v2f64, test7_v4f32, test43_v2i64

2024-07-05 Thread YunQiang Su via Gcc-cvs
https://gcc.gnu.org/g:33dfd6798aa3e5f8be58c8810f9814d57485fe12

commit r15-1851-g33dfd6798aa3e5f8be58c8810f9814d57485fe12
Author: YunQiang Su 
Date:   Thu Jun 27 18:05:30 2024 +0800

Testsuite/MIPS: Fix msa.c: test7_v2f64, test7_v4f32, test43_v2i64

BNEGI.W/D are used for test7_v2f64 and test7_v4f32 now.  It is
an improvment since that we can save a instruction.

ILVR.D is used for test43_v2i64 now, instead of INSVE.D.

gcc/testsuite
* gcc.target/mips/msa.c: Fix test7_v2f64, test7_v4f32 and
test43_v2i64.

Diff:
---
 gcc/testsuite/gcc.target/mips/msa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/mips/msa.c 
b/gcc/testsuite/gcc.target/mips/msa.c
index b741f35556f..62d0606dfef 100644
--- a/gcc/testsuite/gcc.target/mips/msa.c
+++ b/gcc/testsuite/gcc.target/mips/msa.c
@@ -90,8 +90,8 @@
 /* { dg-final { scan-assembler-times "test7_v8u16:.*subv.h.*test7_v8u16" 1 } } 
*/
 /* { dg-final { scan-assembler-times "test7_v4u32:.*subv.w.*test7_v4u32" 1 } } 
*/
 /* { dg-final { scan-assembler-times "test7_v2u64:.*subv.d.*test7_v2u64" 1 } } 
*/
-/* { dg-final { scan-assembler-times "test7_v4f32:.*fsub.w.*test7_v4f32" 1 } } 
*/
-/* { dg-final { scan-assembler-times "test7_v2f64:.*fsub.d.*test7_v2f64" 1 } } 
*/
+/* { dg-final { scan-assembler-times "test7_v4f32:.*bnegi.w.*test7_v4f32" 1 } 
} */
+/* { dg-final { scan-assembler-times "test7_v2f64:.*bnegi.d.*test7_v2f64" 1 } 
} */
 /* { dg-final { scan-assembler-times "test8_v16i8:.*xor.v.*test8_v16i8" 1 } } 
*/
 /* { dg-final { scan-assembler-times "test8_v8i16:.*xor.v.*test8_v8i16" 1 } } 
*/
 /* { dg-final { scan-assembler-times "test8_v4i32:.*xor.v.*test8_v4i32" 1 } } 
*/
@@ -401,7 +401,7 @@
 /* { dg-final { scan-assembler-times "test43_v16i8:.*insve.b.*test43_v16i8" 1 
} } */
 /* { dg-final { scan-assembler-times "test43_v8i16:.*insve.h.*test43_v8i16" 1 
} } */
 /* { dg-final { scan-assembler-times "test43_v4i32:.*insve.w.*test43_v4i32" 1 
} } */
-/* { dg-final { scan-assembler-times "test43_v2i64:.*insve.d.*test43_v2i64" 1 
} } */
+/* { dg-final { scan-assembler-times "test43_v2i64:.*ilvr.d.*test43_v2i64" 1 } 
} */
 /* { dg-final { scan-assembler-times "test44_v16i8:.*copy_s.b.*test44_v16i8" 1 
} } */
 /* { dg-final { scan-assembler-times "test44_v8i16:.*copy_s.h.*test44_v8i16" 1 
} } */
 /* { dg-final { scan-assembler-times "test44_v4i32:.*copy_s.w.*test44_v4i32" 1 
} } */


[gcc r15-1852] MIPS: Support more cases with alien mode of SHF.DF

2024-07-05 Thread YunQiang Su via Gcc-cvs
https://gcc.gnu.org/g:320c2ed4d2b4b007bab5ebf0078e6c730ad25d3e

commit r15-1852-g320c2ed4d2b4b007bab5ebf0078e6c730ad25d3e
Author: YunQiang Su 
Date:   Thu Jun 27 18:28:27 2024 +0800

MIPS: Support more cases with alien mode of SHF.DF

Currently, we support the cases that strictly fit for the instructions.
For example, for V16QImode, we only support shuffle like
(0<=N0, N1, N2, N3<=3 here)
N0, N1, N2, N3
N0+4N1+4N2+4,   N3+4
N0+8N1+8N2+8,   N3+8
N0+12   N1+12   N2+12,  N3+12

While in fact we can support more cases to try use other SHF.DF
instructions not strictly fitting the mode.

1) We can use SHF.H to support more cases for V16QImode:
(M0/M1/M2/M3 are 0 or 2 or 4 or 6)
M0  M0+1,   M1, M1+1
M2  M2+1,   M3, M3+1
M0+8M0+9,   M1+8,   M1+9
M2+8M2+9,   M3+8,   M3+9

2) We can use SHF.W to support some cases for V16QImode:
(M0/M1/M2/M3 are 0 or 4 or 8 or 12)
M0, M0+1,   M0+2,   M0+3
M1, M1+1,   M1+2,   M1+3
M2, M2+1,   M2+2,   M2+3
M3, M3+1,   M3+2,   M3+3

3) We can use SHF.W to support some cases for V8HImode:
(M0/M1/M2/M3 are 0 or 2 or 4 or 6)
M0, M0+1
M1, M1+1
M2, M2+1
M3, M3+1

4) We can also use SHF.W to swap the 2 parts of V2DF or V2DI.

gcc
* config/mips/mips-protos.h: New function mips_msa_shf_i8.
* config/mips/mips-msa.md(MSA_WHB_W): Not used anymore;
(msa_shf_): Use mips_msa_shf_i8.
* config/mips/mips.cc(mips_const_vector_shuffle_set_p):
Support more cases try to use alien mode instruction;
(mips_msa_shf_i8): New function to get the correct MSA SHF
instruction and IMM.

Diff:
---
 gcc/config/mips/mips-msa.md   |  35 +-
 gcc/config/mips/mips-protos.h |   1 +
 gcc/config/mips/mips.cc   | 149 ++
 3 files changed, 170 insertions(+), 15 deletions(-)

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index 0081b688ce9..377c63f0d35 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -125,9 +125,6 @@
 ;; Only floating-point modes.
 (define_mode_iterator FMSA [V2DF V4SF])
 
-;; Only used for immediate set shuffle elements instruction.
-(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF])
-
 ;; The attribute gives the integer vector mode with same size.
 (define_mode_attr VIMODE
   [(V2DF "V2DI")
@@ -2520,21 +2517,29 @@
(set_attr "mode" "")])
 
 (define_insn "msa_shf_"
-  [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f")
-   (vec_select:MSA_WHB_W
- (match_operand:MSA_WHB_W 1 "register_operand" "f")
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+   (vec_select:MSA
+ (match_operand:MSA 1 "register_operand" "f")
  (match_operand 2 "par_const_vector_shf_set_operand" "")))]
   "ISA_HAS_MSA"
 {
-  HOST_WIDE_INT val = 0;
-  unsigned int i;
-
-  /* We convert the selection to an immediate.  */
-  for (i = 0; i < 4; i++)
-val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i);
-
-  operands[2] = GEN_INT (val);
-  return "shf.\t%w0,%w1,%X2";
+  HOST_WIDE_INT rval = mips_msa_shf_i8 (operands);
+  /* 0b11100100 means that there is no shf needed at all.  This RTL
+ should be optimized out in some pass.  */
+  if ((rval & 0xff) == 0xe4)
+gcc_unreachable ();
+  operands[2] = GEN_INT (rval & 0xff);
+  switch (rval & 0xff00)
+  {
+  default: gcc_unreachable ();
+  case 0x400:
+return "shf.w\t%w0,%w1,%X2";
+  case 0x200:
+return "shf.h\t%w0,%w1,%X2";
+  case 0x100:
+return "shf.b\t%w0,%w1,%X2";
+  }
+  gcc_unreachable ();
 }
   [(set_attr "type" "simd_shf")
(set_attr "mode" "")])
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 75f80984c03..90b4c87fdea 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -387,6 +387,7 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
 extern void mips_register_frame_header_opt (void);
 extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *, 
bool);
 extern void mips_expand_vec_cmp_expr (rtx *);
+extern HOST_WIDE_INT mips_msa_shf_i8 (rtx *);
 
 extern void mips_emit_speculation_barrier_function (void);
 
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 7d4791157d1..6c797b62164 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -2079,6 +2079,72 @@ mips_const_vector_shuffle_set_p (rtx op, machine_mode 
mode)
   int nsets = nunits / 4;
   int set = 0;
   int i, j;
+  int val[4];
+  bool ok;
+
+  /* We support swapping 2 Doubleword part with shf.w.  */
+  if (ISA_HAS_MSA && (mode == V2DFmode || mode == V2DImode))
+{
+  if (!IN_RANGE (INTVAL (

[gcc r15-1853] i386: Refactor ssedoublemode

2024-07-05 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:319d3956b16b1270f27e9cbf749e881c4ff7dfb4

commit r15-1853-g319d3956b16b1270f27e9cbf749e881c4ff7dfb4
Author: Hu, Lin1 
Date:   Thu Jul 4 11:18:46 2024 +0800

i386: Refactor ssedoublemode

ssedoublemode's double should mean double type, like SI -> DI.
And we need to refactor some patterns with  instead of
.

gcc/ChangeLog:

* config/i386/sse.md (ssedoublemode): Remove mappings to twice
the number of same-sized elements. Add mappings to the same
number of double-sized elements.
(define_split for vec_concat_minus_plus): Change mode_attr from
ssedoublemode to ssedoublevecmode.
(define_split for vec_concat_plus_minus): Ditto.
(avx512dq_shuf_64x2_1):
Ditto.
(avx512f_shuf_64x2_1): Ditto.
(avx512vl_shuf_32x4_1): Ditto.
(avx512f_shuf_32x4_1): Ditto.

Diff:
---
 gcc/config/i386/sse.md | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d71b0f2567e..bda66d5e121 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -808,13 +808,12 @@
(V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
(V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
 
+;; Map vector mode to the same number of double sized elements.
 (define_mode_attr ssedoublemode
-  [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
-   (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
+  [(V4SF "V4DF") (V8SF "V8DF") (V16SF "V16DF")
(V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
(V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
-   (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
-   (V4DI "V8DI") (V8DI "V16DI")])
+   (V4SI "V4DI") (V8SI "V8DI") (V16SI "V16DI")])
 
 (define_mode_attr ssebytemode
   [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
@@ -3319,7 +3318,7 @@
 (define_split
   [(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 7 "addsub_vs_operator"
- [(vec_concat:
+ [(vec_concat:
 (minus:VF_128_256
   (match_operand:VF_128_256 1 "register_operand")
   (match_operand:VF_128_256 2 "vector_operand"))
@@ -3353,7 +3352,7 @@
 (define_split
   [(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 7 "addsub_vs_operator"
- [(vec_concat:
+ [(vec_concat:
 (plus:VF_128_256
   (match_operand:VF_128_256 1 "vector_operand")
   (match_operand:VF_128_256 2 "vector_operand"))
@@ -19869,7 +19868,7 @@
 (define_insn "avx512dq_shuf_64x2_1"
   [(set (match_operand:VI8F_256 0 "register_operand" "=x,v")
(vec_select:VI8F_256
- (vec_concat:
+ (vec_concat:
(match_operand:VI8F_256 1 "register_operand" "x,v")
(match_operand:VI8F_256 2 "nonimmediate_operand" "xjm,vm"))
  (parallel [(match_operand 3 "const_0_to_3_operand")
@@ -19922,7 +19921,7 @@
 (define_insn "avx512f_shuf_64x2_1"
   [(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_select:V8FI
- (vec_concat:
+ (vec_concat:
(match_operand:V8FI 1 "register_operand" "v")
(match_operand:V8FI 2 "nonimmediate_operand" "vm"))
  (parallel [(match_operand 3 "const_0_to_7_operand")
@@ -20020,7 +20019,7 @@
 (define_insn "avx512vl_shuf_32x4_1"
   [(set (match_operand:VI4F_256 0 "register_operand" "=x,v")
(vec_select:VI4F_256
- (vec_concat:
+ (vec_concat:
(match_operand:VI4F_256 1 "register_operand" "x,v")
(match_operand:VI4F_256 2 "nonimmediate_operand" "xjm,vm"))
  (parallel [(match_operand 3 "const_0_to_7_operand")
@@ -20091,7 +20090,7 @@
 (define_insn "avx512f_shuf_32x4_1"
   [(set (match_operand:V16FI 0 "register_operand" "=v")
(vec_select:V16FI
- (vec_concat:
+ (vec_concat:
(match_operand:V16FI 1 "register_operand" "v")
(match_operand:V16FI 2 "nonimmediate_operand" "vm"))
  (parallel [(match_operand 3 "const_0_to_15_operand")


[gcc r15-1854] middle-end: Add debug functions to dump dominator tree in dot format

2024-07-05 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:ae07f62a70ee2d0fdd7d8786122ae6360cfd4ca9

commit r15-1854-gae07f62a70ee2d0fdd7d8786122ae6360cfd4ca9
Author: Alex Coplan 
Date:   Fri Jul 5 11:57:56 2024 +0100

middle-end: Add debug functions to dump dominator tree in dot format

This adds debug functions to dump the dominator tree in dot format.
There are two overloads: one which takes a FILE * and another which
takes a const char *fname and wraps the first with fopen/fclose for
convenience.

gcc/ChangeLog:

* dominance.cc (dot_dominance_tree): New.

Diff:
---
 gcc/dominance.cc | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/dominance.cc b/gcc/dominance.cc
index 0357210ed27..c14d997ded7 100644
--- a/gcc/dominance.cc
+++ b/gcc/dominance.cc
@@ -1658,6 +1658,36 @@ debug_dominance_info (enum cdi_direction dir)
   fprintf (stderr, "%i %i\n", bb->index, bb2->index);
 }
 
+/* Dump the dominance tree in direction DIR to the file F in dot form.
+   This allows easily visualizing the tree using graphviz.  */
+
+DEBUG_FUNCTION void
+dot_dominance_tree (FILE *f, enum cdi_direction dir)
+{
+  fprintf (f, "digraph {\n");
+  basic_block bb, idom;
+  FOR_EACH_BB_FN (bb, cfun)
+if ((idom = get_immediate_dominator (dir, bb)))
+  fprintf (f, "%i -> %i;\n", idom->index, bb->index);
+  fprintf (f, "}\n");
+}
+
+/* Convenience wrapper around the above that dumps the dominance tree in
+   direction DIR to the file at path FNAME in dot form.  */
+
+DEBUG_FUNCTION void
+dot_dominance_tree (const char *fname, enum cdi_direction dir)
+{
+  FILE *f = fopen (fname, "w");
+  if (f)
+{
+  dot_dominance_tree (f, dir);
+  fclose (f);
+}
+  else
+fprintf (stderr, "failed to open %s: %s\n", fname, xstrerror (errno));
+}
+
 /* Prints to stderr representation of the dominance tree (for direction DIR)
rooted in ROOT, indented by INDENT tabulators.  If INDENT_FIRST is false,
the first line of the output is not indented.  */


[gcc r15-1855] AArch64: remove aarch64_simd_vec_unpack_lo_

2024-07-05 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:6ff698106644af39da9e0eda51974fdcd111280d

commit r15-1855-g6ff698106644af39da9e0eda51974fdcd111280d
Author: Tamar Christina 
Date:   Fri Jul 5 12:09:21 2024 +0100

AArch64: remove aarch64_simd_vec_unpack_lo_

The fix for PR18127 reworked the uxtl to zip optimization.
In doing so it undid the changes in aarch64_simd_vec_unpack_lo_ and 
this now
no longer matches aarch64_simd_vec_unpack_hi_.  It still works because 
the
RTL generated by aarch64_simd_vec_unpack_lo_ overlaps with the general 
zero
extend RTL and so because that one is listed before the lo pattern recog 
picks
it instead.

This removes aarch64_simd_vec_unpack_lo_.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md
(aarch64_simd_vec_unpack_lo_): Remove.
(vec_unpack_lo__lo_"
-  [(set (match_operand: 0 "register_operand" "=w")
-(ANY_EXTEND: (vec_select:
-  (match_operand:VQW 1 "register_operand" "w")
-  (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
-   )))]
-  "TARGET_SIMD"
-  "xtl\t%0., %1."
-  [(set_attr "type" "neon_shift_imm_long")]
-)
-
 (define_insn_and_split "aarch64_simd_vec_unpack_hi_"
   [(set (match_operand: 0 "register_operand" "=w")
 (ANY_EXTEND: (vec_select:
@@ -1952,14 +1941,11 @@
 )
 
 (define_expand "vec_unpack_lo_"
-  [(match_operand: 0 "register_operand")
-   (ANY_EXTEND: (match_operand:VQW 1 "register_operand"))]
+  [(set (match_operand: 0 "register_operand")
+   (ANY_EXTEND: (match_operand:VQW 1 "register_operand")))]
   "TARGET_SIMD"
   {
-rtx p = aarch64_simd_vect_par_cnst_half (mode, , false);
-emit_insn (gen_aarch64_simd_vec_unpack_lo_ (operands[0],
- operands[1], p));
-DONE;
+operands[1] = lowpart_subreg (mode, operands[1], mode);
   }
 )
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6b106a72e49..469eb938953 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23188,7 +23188,8 @@ aarch64_gen_shareable_zero (machine_mode mode)
to split without that restriction and instead recombine shared zeros
if they turn out not to be worthwhile.  This would allow splits in
single-block functions and would also cope more naturally with
-   rematerialization.  */
+   rematerialization.  The downside of not doing this is that we lose the
+   optimizations for vector epilogues as well.  */
 
 bool
 aarch64_split_simd_shift_p (rtx_insn *insn)


[gcc r15-1856] AArch64: lower 2 reg TBL permutes with one zero register to 1 reg TBL.

2024-07-05 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:97fcfeac3dcc433b792711fd840b92fa3e860733

commit r15-1856-g97fcfeac3dcc433b792711fd840b92fa3e860733
Author: Tamar Christina 
Date:   Fri Jul 5 12:10:39 2024 +0100

AArch64: lower 2 reg TBL permutes with one zero register to 1 reg TBL.

When a two reg TBL is performed with one operand being a zero vector we can
instead use a single reg TBL and map the indices for accessing the zero 
vector
to an out of range constant.

On AArch64 out of range indices into a TBL have a defined semantics of 
setting
the element to zero.  Many uArches have a slower 2-reg TBL than 1-reg TBL.

Before this change we had:

typedef unsigned int v4si __attribute__ ((vector_size (16)));

v4si f1 (v4si a)
{
  v4si zeros = {0,0,0,0};
  return __builtin_shufflevector (a, zeros, 0, 5, 1, 6);
}

which generates:

f1:
mov v30.16b, v0.16b
moviv31.4s, 0
adrpx0, .LC0
ldr q0, [x0, #:lo12:.LC0]
tbl v0.16b, {v30.16b - v31.16b}, v0.16b
ret

.LC0:
.byte   0
.byte   1
.byte   2
.byte   3
.byte   20
.byte   21
.byte   22
.byte   23
.byte   4
.byte   5
.byte   6
.byte   7
.byte   24
.byte   25
.byte   26
.byte   27

and with the patch:

f1:
adrpx0, .LC0
ldr q31, [x0, #:lo12:.LC0]
tbl v0.16b, {v0.16b}, v31.16b
ret

.LC0:
.byte   0
.byte   1
.byte   2
.byte   3
.byte   -1
.byte   -1
.byte   -1
.byte   -1
.byte   4
.byte   5
.byte   6
.byte   7
.byte   -1
.byte   -1
.byte   -1
.byte   -1

This sequence is generated often by openmp and aside from the
strict performance impact of this change, it also gives better
register allocation as we no longer have the consecutive
register limitation.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (struct expand_vec_perm_d): Add 
zero_op0_p
and zero_op_p1.
(aarch64_evpc_tbl): Implement register value remapping.
(aarch64_vectorize_vec_perm_const): Detect if operand is a zero dup
before it's forced to a reg.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/tbl_with_zero_1.c: New test.
* gcc.target/aarch64/tbl_with_zero_2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64.cc  | 40 ++
 gcc/testsuite/gcc.target/aarch64/tbl_with_zero_1.c | 40 ++
 gcc/testsuite/gcc.target/aarch64/tbl_with_zero_2.c | 20 +++
 3 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 469eb938953..7f0cc47d0f0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25413,6 +25413,7 @@ struct expand_vec_perm_d
   unsigned int vec_flags;
   unsigned int op_vec_flags;
   bool one_vector_p;
+  bool zero_op0_p, zero_op1_p;
   bool testing_p;
 };
 
@@ -25909,13 +25910,38 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d)
   /* to_constant is safe since this routine is specific to Advanced SIMD
  vectors.  */
   unsigned int nelt = d->perm.length ().to_constant ();
+
+  /* If one register is the constant vector of 0 then we only need
+ a one reg TBL and we map any accesses to the vector of 0 to -1.  We can't
+ do this earlier since vec_perm_indices clamps elements to within range so
+ we can only do it during codegen.  */
+  if (d->zero_op0_p)
+d->op0 = d->op1;
+  else if (d->zero_op1_p)
+d->op1 = d->op0;
+
   for (unsigned int i = 0; i < nelt; ++i)
-/* If big-endian and two vectors we end up with a weird mixed-endian
-   mode on NEON.  Reverse the index within each word but not the word
-   itself.  to_constant is safe because we checked is_constant above.  */
-rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
-   ? d->perm[i].to_constant () ^ (nelt - 1)
-   : d->perm[i].to_constant ());
+{
+  auto val = d->perm[i].to_constant ();
+
+  /* If we're selecting from a 0 vector, we can just use an out of range
+index instead.  */
+  if ((d->zero_op0_p && val < nelt) || (d->zero_op1_p && val >= nelt))
+   rperm[i] = constm1_rtx;
+  else
+   {
+ /* If we are remapping a zero register as the first parameter we need
+to adjust the indices of the non-zero register.  */
+ if (d->zero_op0_p)
+   val = val % nelt;
+
+ /* If big-endian and two vectors we end up with a

[gcc r15-1857] libstdc++: Use memchr to optimize std::find [PR88545]

2024-07-05 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:de19b516edbf919d31e9d22fdbf6066342d904a2

commit r15-1857-gde19b516edbf919d31e9d22fdbf6066342d904a2
Author: Jonathan Wakely 
Date:   Wed Jun 5 16:01:26 2024 +0100

libstdc++: Use memchr to optimize std::find [PR88545]

This optimizes std::find to use memchr when searching for an integer in
a range of bytes.

libstdc++-v3/ChangeLog:

PR libstdc++/88545
PR libstdc++/115040
* include/bits/cpp_type_traits.h (__can_use_memchr_for_find):
New variable template.
* include/bits/ranges_util.h (__find_fn): Use memchr when
possible.
* include/bits/stl_algo.h (find): Likewise.
* testsuite/25_algorithms/find/bytes.cc: New test.

Diff:
---
 libstdc++-v3/include/bits/cpp_type_traits.h|  13 ++
 libstdc++-v3/include/bits/ranges_util.h|  21 
 libstdc++-v3/include/bits/stl_algo.h   |  35 +-
 libstdc++-v3/testsuite/25_algorithms/find/bytes.cc | 135 +
 4 files changed, 202 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h 
b/libstdc++-v3/include/bits/cpp_type_traits.h
index abe0c7603e3..4bfb4521e06 100644
--- a/libstdc++-v3/include/bits/cpp_type_traits.h
+++ b/libstdc++-v3/include/bits/cpp_type_traits.h
@@ -35,6 +35,10 @@
 #pragma GCC system_header
 
 #include 
+#include 
+#if __glibcxx_type_trait_variable_templates
+# include  // is_same_v, is_integral_v
+#endif
 
 //
 // This file provides some compile-time information about various types.
@@ -547,6 +551,15 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
 { static constexpr bool __value = false; };
 #endif
 
+#if __glibcxx_type_trait_variable_templates
+  template
+constexpr bool __can_use_memchr_for_find
+// Can only use memchr to search for narrow characters and std::byte.
+  = __is_byte<_ValT>::__value
+   // And only if the value to find is an integer (or is also std::byte).
+ && (is_same_v<_Tp, _ValT> || is_integral_v<_Tp>);
+#endif
+
   //
   // Move iterator type
   //
diff --git a/libstdc++-v3/include/bits/ranges_util.h 
b/libstdc++-v3/include/bits/ranges_util.h
index 9b79c3a229d..186acae4f70 100644
--- a/libstdc++-v3/include/bits/ranges_util.h
+++ b/libstdc++-v3/include/bits/ranges_util.h
@@ -34,6 +34,7 @@
 # include 
 # include 
 # include 
+# include  // __can_use_memchr_for_find
 
 #ifdef __glibcxx_ranges
 namespace std _GLIBCXX_VISIBILITY(default)
@@ -494,6 +495,26 @@ namespace ranges
   operator()(_Iter __first, _Sent __last,
 const _Tp& __value, _Proj __proj = {}) const
   {
+   if constexpr (is_same_v<_Proj, identity>)
+ if constexpr(__can_use_memchr_for_find, _Tp>)
+   if constexpr (sized_sentinel_for<_Sent, _Iter>)
+ if constexpr (contiguous_iterator<_Iter>)
+   if (!is_constant_evaluated())
+ {
+   if (static_cast>(__value) != __value)
+ return __last;
+
+   auto __n = __last - __first;
+   if (__n > 0)
+ {
+   const int __ival = static_cast(__value);
+   const void* __p0 = std::to_address(__first);
+   if (auto __p1 = __builtin_memchr(__p0, __ival, __n))
+ __n = (const char*)__p1 - (const char*)__p0;
+ }
+   return __first + __n;
+ }
+
while (__first != __last
&& !(std::__invoke(__proj, *__first) == __value))
  ++__first;
diff --git a/libstdc++-v3/include/bits/stl_algo.h 
b/libstdc++-v3/include/bits/stl_algo.h
index 1a996aa61da..45c3b591326 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -3838,14 +3838,45 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
   template
 _GLIBCXX20_CONSTEXPR
 inline _InputIterator
-find(_InputIterator __first, _InputIterator __last,
-const _Tp& __val)
+find(_InputIterator __first, _InputIterator __last, const _Tp& __val)
 {
   // concept requirements
   __glibcxx_function_requires(_InputIteratorConcept<_InputIterator>)
   __glibcxx_function_requires(_EqualOpConcept<
typename iterator_traits<_InputIterator>::value_type, _Tp>)
   __glibcxx_requires_valid_range(__first, __last);
+
+#if __cpp_if_constexpr && __glibcxx_type_trait_variable_templates
+  using _ValT = typename iterator_traits<_InputIterator>::value_type;
+  if constexpr (__can_use_memchr_for_find<_ValT, _Tp>)
+   {
+ // If converting the value to the 1-byte value_type alters its value,
+ // then it would not be found by std::find using equality comparison.
+ // We need to check this here, because otherwise something like
+ // memchr("a", 'a'+256, 1) would give a false positive match.
+ if (!(static_cast<_ValT>(__val) == __val))
+   retu

[gcc r15-1858] libstdc++: Use RAII in

2024-07-05 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:6025256d47b36fed455cc4f8dd4becf53208bbae

commit r15-1858-g6025256d47b36fed455cc4f8dd4becf53208bbae
Author: Jonathan Wakely 
Date:   Thu Jun 20 15:53:13 2024 +0100

libstdc++: Use RAII in 

This adds an _UninitDestroyGuard class template, similar to
ranges::_DestroyGuard used in . This allows
us to remove all the try-catch blocks and rethrows, because any required
cleanup gets done in the guard destructor.

libstdc++-v3/ChangeLog:

* include/bits/stl_uninitialized.h (_UninitDestroyGuard): New
class template and partial specialization.
(__do_uninit_copy, __do_uninit_fill, __do_uninit_fill_n)
(__uninitialized_copy_a, __uninitialized_fill_a)
(__uninitialized_fill_n_a, __uninitialized_copy_move)
(__uninitialized_move_copy, __uninitialized_fill_move)
(__uninitialized_move_fill, __uninitialized_default_1)
(__uninitialized_default_n_a, __uninitialized_default_novalue_1)
(__uninitialized_default_novalue_n_1, __uninitialized_copy_n)
(__uninitialized_copy_n_pair): Use it.

Diff:
---
 libstdc++-v3/include/bits/stl_uninitialized.h | 365 +++---
 1 file changed, 156 insertions(+), 209 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h 
b/libstdc++-v3/include/bits/stl_uninitialized.h
index 3c405d8fbe8..a9965f26269 100644
--- a/libstdc++-v3/include/bits/stl_uninitialized.h
+++ b/libstdc++-v3/include/bits/stl_uninitialized.h
@@ -107,24 +107,70 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 __is_trivial(T) && __is_assignable(T&, U)
 #endif
 
+  template
+struct _UninitDestroyGuard
+{
+  _GLIBCXX20_CONSTEXPR
+  explicit
+  _UninitDestroyGuard(_ForwardIterator& __first, _Alloc& __a)
+  : _M_first(__first), _M_cur(__builtin_addressof(__first)), _M_alloc(__a)
+  { }
+
+  _GLIBCXX20_CONSTEXPR
+  ~_UninitDestroyGuard()
+  {
+   if (__builtin_expect(_M_cur != 0, 0))
+ std::_Destroy(_M_first, *_M_cur, _M_alloc);
+  }
+
+  _GLIBCXX20_CONSTEXPR
+  void release() { _M_cur = 0; }
+
+private:
+  _ForwardIterator const _M_first;
+  _ForwardIterator* _M_cur;
+  _Alloc& _M_alloc;
+
+  _UninitDestroyGuard(const _UninitDestroyGuard&);
+};
+
+  template
+struct _UninitDestroyGuard<_ForwardIterator, void>
+{
+  _GLIBCXX20_CONSTEXPR
+  explicit
+  _UninitDestroyGuard(_ForwardIterator& __first)
+  : _M_first(__first), _M_cur(__builtin_addressof(__first))
+  { }
+
+  _GLIBCXX20_CONSTEXPR
+  ~_UninitDestroyGuard()
+  {
+   if (__builtin_expect(_M_cur != 0, 0))
+ std::_Destroy(_M_first, *_M_cur);
+  }
+
+  _GLIBCXX20_CONSTEXPR
+  void release() { _M_cur = 0; }
+
+  _ForwardIterator const _M_first;
+  _ForwardIterator* _M_cur;
+
+private:
+  _UninitDestroyGuard(const _UninitDestroyGuard&);
+};
+
   template
 _GLIBCXX20_CONSTEXPR
 _ForwardIterator
 __do_uninit_copy(_InputIterator __first, _InputIterator __last,
 _ForwardIterator __result)
 {
-  _ForwardIterator __cur = __result;
-  __try
-   {
- for (; __first != __last; ++__first, (void)++__cur)
-   std::_Construct(std::__addressof(*__cur), *__first);
- return __cur;
-   }
-  __catch(...)
-   {
- std::_Destroy(__result, __cur);
- __throw_exception_again;
-   }
+  _UninitDestroyGuard<_ForwardIterator> __guard(__result);
+  for (; __first != __last; ++__first, (void)++__result)
+   std::_Construct(std::__addressof(*__result), *__first);
+  __guard.release();
+  return __result;
 }
 
   template
@@ -192,17 +238,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 __do_uninit_fill(_ForwardIterator __first, _ForwardIterator __last,
 const _Tp& __x)
 {
-  _ForwardIterator __cur = __first;
-  __try
-   {
- for (; __cur != __last; ++__cur)
-   std::_Construct(std::__addressof(*__cur), __x);
-   }
-  __catch(...)
-   {
- std::_Destroy(__first, __cur);
- __throw_exception_again;
-   }
+  _UninitDestroyGuard<_ForwardIterator> __guard(__first);
+  for (; __first != __last; ++__first)
+   std::_Construct(std::__addressof(*__first), __x);
+  __guard.release();
 }
 
   template
@@ -260,18 +299,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _ForwardIterator
 __do_uninit_fill_n(_ForwardIterator __first, _Size __n, const _Tp& __x)
 {
-  _ForwardIterator __cur = __first;
-  __try
-   {
- for (; __n > 0; --__n, (void) ++__cur)
-   std::_Construct(std::__addressof(*__cur), __x);
- return __cur;
-   }
-  __catch(...)
-   {
- std::_Destroy(__first, __cur);
- __throw_exception_again;
-   }
+  _UninitDestroyGuard<_ForwardIterator> __guard(__first);
+ 

[gcc r15-1859] libstdc++: Add dg-error for new -Wdelete-incomplete diagnostics [PR115747]

2024-07-05 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:f63896ff5a4fa4fe451f9ec7f16026eb97ed8e6d

commit r15-1859-gf63896ff5a4fa4fe451f9ec7f16026eb97ed8e6d
Author: Jonathan Wakely 
Date:   Fri Jul 5 12:16:46 2024 +0100

libstdc++: Add dg-error for new -Wdelete-incomplete diagnostics [PR115747]

Since r15-1794-gbeb7a418aaef2e the -Wdelete-incomplete diagnostic is a
permerror instead of a (suppressed in system headers) warning. Add
dg-error directives.

libstdc++-v3/ChangeLog:

PR c++/115747
* testsuite/tr1/2_general_utilities/shared_ptr/cons/43820_neg.cc:
Add dg-error for new C++26 diagnostics.

Diff:
---
 .../testsuite/tr1/2_general_utilities/shared_ptr/cons/43820_neg.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git 
a/libstdc++-v3/testsuite/tr1/2_general_utilities/shared_ptr/cons/43820_neg.cc 
b/libstdc++-v3/testsuite/tr1/2_general_utilities/shared_ptr/cons/43820_neg.cc
index d4cb45d0e06..a4c99ca1775 100644
--- 
a/libstdc++-v3/testsuite/tr1/2_general_utilities/shared_ptr/cons/43820_neg.cc
+++ 
b/libstdc++-v3/testsuite/tr1/2_general_utilities/shared_ptr/cons/43820_neg.cc
@@ -39,6 +39,9 @@ void test01()
   // { dg-error "incomplete" "" { target *-*-* } 600 }
 }
 
+// { dg-error "-Wdelete-incomplete" "" { target c++26 } 283 }
+// { dg-error "-Wdelete-incomplete" "" { target c++26 } 305 }
+
 // Ignore additional diagnostic given with -Wsystem-headers:
 // { dg-prune-output "has incomplete type" }
 // { dg-prune-output "possible problem detected" }


[gcc r15-1860] AVR: target/87376 - Use nop_general_operand for DImode inputs.

2024-07-05 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:23a0935262d6817097406578b1c70563f424804b

commit r15-1860-g23a0935262d6817097406578b1c70563f424804b
Author: Georg-Johann Lay 
Date:   Fri Jul 5 13:22:12 2024 +0200

AVR: target/87376 - Use nop_general_operand for DImode inputs.

The avr-dimode.md expanders have code like  emit_move_insn(acc_a, 
operands[1])
where acc_a is a hard register and operands[1] might be a non-generic
address-space memory reference.  Such loads may clobber hard regs since
some of them are implemented as libgcc calls /and/ 64-moves are
expanded as eight byte-moves, so that acc_a or acc_b might be clobbered
by such a load.

This patch simply denies non-generic address-space references by using
nop_general_operand for all avr-dimode.md input predicates.
With the patch, all memory loads that require library calls are issued
before the expander codes from avr-dimode.md are run.

PR target/87376
gcc/
* config/avr/avr-dimode.md: Use "nop_general_operand" instead
of "general_operand" as predicate for all input operands.

gcc/testsuite/
* gcc.target/avr/torture/pr87376.c: New test.

Diff:
---
 gcc/config/avr/avr-dimode.md   | 26 +--
 gcc/testsuite/gcc.target/avr/torture/pr87376.c | 60 ++
 2 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 4b74e77e5e5..c357213e211 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -62,8 +62,8 @@
 ;; "addta3" "adduta3"
 (define_expand "add3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -178,8 +178,8 @@
 ;; "subta3" "subuta3"
 (define_expand "sub3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -259,8 +259,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8S 0 "general_operand" "")
-(ss_addsub:ALL8S (match_operand:ALL8S 1 "general_operand" "")
- (match_operand:ALL8S 2 "general_operand" "")))]
+(ss_addsub:ALL8S (match_operand:ALL8S 1 "nop_general_operand")
+ (match_operand:ALL8S 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -332,8 +332,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8U 0 "general_operand" "")
-(us_addsub:ALL8U (match_operand:ALL8U 1 "general_operand" "")
- (match_operand:ALL8U 2 "general_operand" "")))]
+(us_addsub:ALL8U (match_operand:ALL8U 1 "nop_general_operand")
+ (match_operand:ALL8U 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -405,7 +405,7 @@
 
 (define_expand "negdi2"
   [(parallel [(match_operand:DI 0 "general_operand" "")
-  (match_operand:DI 1 "general_operand" "")])]
+  (match_operand:DI 1 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (DImode, ACC_A);
@@ -602,8 +602,8 @@
 ;; "ashluta3"  "ashruta3"  "lshruta3"  "rotluta3"
 (define_expand "3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (di_shifts:ALL8 (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:QI 2 "general_operand" ""))])]
+  (di_shifts:ALL8 (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:QI 2 "nop_general_operand"))])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -648,8 +648,8 @@
 ;; "mulsidi3"
 (define_expand "mulsidi3"
   [(parallel [(match_operand:DI 0 "register_operand" "")
-  (match_operand:SI 1 "general_operand" "")
-  (match_operand:SI 2 "general_operand" "")
+  (match_operand:SI 1 "nop_general_operand")
+  (match_operand:SI 2 "nop_general_operand")
   ;; Just to mention the iterator 
   (clobber (any_extend:SI (match_dup 1)))])]
   "avr_have_dimode
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr87376.c 
b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
new file mode 100644
index 000..c31a4a9dda5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
@@ -0,0 +1,60 @@
+/* { dg-do run { target { ! avr_tiny } } } */
+/* { dg-additional-opt

[gcc r14-10382] AVR: target/87376 - Use nop_general_operand for DImode inputs.

2024-07-05 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:9f5620aed3976716e20e13c285dade56578d5bab

commit r14-10382-g9f5620aed3976716e20e13c285dade56578d5bab
Author: Georg-Johann Lay 
Date:   Fri Jul 5 13:22:12 2024 +0200

AVR: target/87376 - Use nop_general_operand for DImode inputs.

The avr-dimode.md expanders have code like  emit_move_insn(acc_a, 
operands[1])
where acc_a is a hard register and operands[1] might be a non-generic
address-space memory reference.  Such loads may clobber hard regs since
some of them are implemented as libgcc calls /and/ 64-moves are
expanded as eight byte-moves, so that acc_a or acc_b might be clobbered
by such a load.

This patch simply denies non-generic address-space references by using
nop_general_operand for all avr-dimode.md input predicates.
With the patch, all memory loads that require library calls are issued
before the expander codes from avr-dimode.md are run.

PR target/87376
gcc/
* config/avr/avr-dimode.md: Use "nop_general_operand" instead
of "general_operand" as predicate for all input operands.

gcc/testsuite/
* gcc.target/avr/torture/pr87376.c: New test.

(cherry picked from commit 23a0935262d6817097406578b1c70563f424804b)

Diff:
---
 gcc/config/avr/avr-dimode.md   | 26 +--
 gcc/testsuite/gcc.target/avr/torture/pr87376.c | 60 ++
 2 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 4b74e77e5e5..c357213e211 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -62,8 +62,8 @@
 ;; "addta3" "adduta3"
 (define_expand "add3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -178,8 +178,8 @@
 ;; "subta3" "subuta3"
 (define_expand "sub3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -259,8 +259,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8S 0 "general_operand" "")
-(ss_addsub:ALL8S (match_operand:ALL8S 1 "general_operand" "")
- (match_operand:ALL8S 2 "general_operand" "")))]
+(ss_addsub:ALL8S (match_operand:ALL8S 1 "nop_general_operand")
+ (match_operand:ALL8S 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -332,8 +332,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8U 0 "general_operand" "")
-(us_addsub:ALL8U (match_operand:ALL8U 1 "general_operand" "")
- (match_operand:ALL8U 2 "general_operand" "")))]
+(us_addsub:ALL8U (match_operand:ALL8U 1 "nop_general_operand")
+ (match_operand:ALL8U 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -405,7 +405,7 @@
 
 (define_expand "negdi2"
   [(parallel [(match_operand:DI 0 "general_operand" "")
-  (match_operand:DI 1 "general_operand" "")])]
+  (match_operand:DI 1 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (DImode, ACC_A);
@@ -602,8 +602,8 @@
 ;; "ashluta3"  "ashruta3"  "lshruta3"  "rotluta3"
 (define_expand "3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (di_shifts:ALL8 (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:QI 2 "general_operand" ""))])]
+  (di_shifts:ALL8 (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:QI 2 "nop_general_operand"))])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -648,8 +648,8 @@
 ;; "mulsidi3"
 (define_expand "mulsidi3"
   [(parallel [(match_operand:DI 0 "register_operand" "")
-  (match_operand:SI 1 "general_operand" "")
-  (match_operand:SI 2 "general_operand" "")
+  (match_operand:SI 1 "nop_general_operand")
+  (match_operand:SI 2 "nop_general_operand")
   ;; Just to mention the iterator 
   (clobber (any_extend:SI (match_dup 1)))])]
   "avr_have_dimode
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr87376.c 
b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
new file mode 100644
index 000..c31a4a9dda5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
@@ -0,0 

[gcc r13-8893] AVR: target/87376 - Use nop_general_operand for DImode inputs.

2024-07-05 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:4a49d212b54ab152737e3209b7f80af3a80966b8

commit r13-8893-g4a49d212b54ab152737e3209b7f80af3a80966b8
Author: Georg-Johann Lay 
Date:   Fri Jul 5 13:22:12 2024 +0200

AVR: target/87376 - Use nop_general_operand for DImode inputs.

The avr-dimode.md expanders have code like  emit_move_insn(acc_a, 
operands[1])
where acc_a is a hard register and operands[1] might be a non-generic
address-space memory reference.  Such loads may clobber hard regs since
some of them are implemented as libgcc calls /and/ 64-moves are
expanded as eight byte-moves, so that acc_a or acc_b might be clobbered
by such a load.

This patch simply denies non-generic address-space references by using
nop_general_operand for all avr-dimode.md input predicates.
With the patch, all memory loads that require library calls are issued
before the expander codes from avr-dimode.md are run.

PR target/87376
gcc/
* config/avr/avr-dimode.md: Use "nop_general_operand" instead
of "general_operand" as predicate for all input operands.

gcc/testsuite/
* gcc.target/avr/torture/pr87376.c: New test.

(cherry picked from commit 23a0935262d6817097406578b1c70563f424804b)

Diff:
---
 gcc/config/avr/avr-dimode.md   | 26 +--
 gcc/testsuite/gcc.target/avr/torture/pr87376.c | 60 ++
 2 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 91f0d395761..ba444be1b33 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -62,8 +62,8 @@
 ;; "addta3" "adduta3"
 (define_expand "add3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -178,8 +178,8 @@
 ;; "subta3" "subuta3"
 (define_expand "sub3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -259,8 +259,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8S 0 "general_operand" "")
-(ss_addsub:ALL8S (match_operand:ALL8S 1 "general_operand" "")
- (match_operand:ALL8S 2 "general_operand" "")))]
+(ss_addsub:ALL8S (match_operand:ALL8S 1 "nop_general_operand")
+ (match_operand:ALL8S 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -332,8 +332,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8U 0 "general_operand" "")
-(us_addsub:ALL8U (match_operand:ALL8U 1 "general_operand" "")
- (match_operand:ALL8U 2 "general_operand" "")))]
+(us_addsub:ALL8U (match_operand:ALL8U 1 "nop_general_operand")
+ (match_operand:ALL8U 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -405,7 +405,7 @@
 
 (define_expand "negdi2"
   [(parallel [(match_operand:DI 0 "general_operand" "")
-  (match_operand:DI 1 "general_operand" "")])]
+  (match_operand:DI 1 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (DImode, ACC_A);
@@ -602,8 +602,8 @@
 ;; "ashluta3"  "ashruta3"  "lshruta3"  "rotluta3"
 (define_expand "3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (di_shifts:ALL8 (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:QI 2 "general_operand" ""))])]
+  (di_shifts:ALL8 (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:QI 2 "nop_general_operand"))])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -648,8 +648,8 @@
 ;; "mulsidi3"
 (define_expand "mulsidi3"
   [(parallel [(match_operand:DI 0 "register_operand" "")
-  (match_operand:SI 1 "general_operand" "")
-  (match_operand:SI 2 "general_operand" "")
+  (match_operand:SI 1 "nop_general_operand")
+  (match_operand:SI 2 "nop_general_operand")
   ;; Just to mention the iterator 
   (clobber (any_extend:SI (match_dup 1)))])]
   "avr_have_dimode
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr87376.c 
b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
new file mode 100644
index 000..c31a4a9dda5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
@@ -0,0 +

[gcc r12-10602] AVR: target/87376 - Use nop_general_operand for DImode inputs.

2024-07-05 Thread Georg-Johann Lay via Gcc-cvs
https://gcc.gnu.org/g:5f4a60c43d5cd805add6529b4528c35893c283ae

commit r12-10602-g5f4a60c43d5cd805add6529b4528c35893c283ae
Author: Georg-Johann Lay 
Date:   Fri Jul 5 13:22:12 2024 +0200

AVR: target/87376 - Use nop_general_operand for DImode inputs.

The avr-dimode.md expanders have code like  emit_move_insn(acc_a, 
operands[1])
where acc_a is a hard register and operands[1] might be a non-generic
address-space memory reference.  Such loads may clobber hard regs since
some of them are implemented as libgcc calls /and/ 64-moves are
expanded as eight byte-moves, so that acc_a or acc_b might be clobbered
by such a load.

This patch simply denies non-generic address-space references by using
nop_general_operand for all avr-dimode.md input predicates.
With the patch, all memory loads that require library calls are issued
before the expander codes from avr-dimode.md are run.

PR target/87376
gcc/
* config/avr/avr-dimode.md: Use "nop_general_operand" instead
of "general_operand" as predicate for all input operands.

gcc/testsuite/
* gcc.target/avr/torture/pr87376.c: New test.

(cherry picked from commit 23a0935262d6817097406578b1c70563f424804b)

Diff:
---
 gcc/config/avr/avr-dimode.md   | 26 +--
 gcc/testsuite/gcc.target/avr/torture/pr87376.c | 60 ++
 2 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 28e97da0516..78a7a745d34 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -62,8 +62,8 @@
 ;; "addta3" "adduta3"
 (define_expand "add3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -178,8 +178,8 @@
 ;; "subta3" "subuta3"
 (define_expand "sub3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:ALL8 2 "general_operand" "")])]
+  (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:ALL8 2 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -259,8 +259,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8S 0 "general_operand" "")
-(ss_addsub:ALL8S (match_operand:ALL8S 1 "general_operand" "")
- (match_operand:ALL8S 2 "general_operand" "")))]
+(ss_addsub:ALL8S (match_operand:ALL8S 1 "nop_general_operand")
+ (match_operand:ALL8S 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -332,8 +332,8 @@
 
 (define_expand "3"
   [(set (match_operand:ALL8U 0 "general_operand" "")
-(us_addsub:ALL8U (match_operand:ALL8U 1 "general_operand" "")
- (match_operand:ALL8U 2 "general_operand" "")))]
+(us_addsub:ALL8U (match_operand:ALL8U 1 "nop_general_operand")
+ (match_operand:ALL8U 2 "nop_general_operand")))]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -405,7 +405,7 @@
 
 (define_expand "negdi2"
   [(parallel [(match_operand:DI 0 "general_operand" "")
-  (match_operand:DI 1 "general_operand" "")])]
+  (match_operand:DI 1 "nop_general_operand")])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (DImode, ACC_A);
@@ -602,8 +602,8 @@
 ;; "ashluta3"  "ashruta3"  "lshruta3"  "rotluta3"
 (define_expand "3"
   [(parallel [(match_operand:ALL8 0 "general_operand" "")
-  (di_shifts:ALL8 (match_operand:ALL8 1 "general_operand" "")
-  (match_operand:QI 2 "general_operand" ""))])]
+  (di_shifts:ALL8 (match_operand:ALL8 1 "nop_general_operand")
+  (match_operand:QI 2 "nop_general_operand"))])]
   "avr_have_dimode"
   {
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
@@ -648,8 +648,8 @@
 ;; "mulsidi3"
 (define_expand "mulsidi3"
   [(parallel [(match_operand:DI 0 "register_operand" "")
-  (match_operand:SI 1 "general_operand" "")
-  (match_operand:SI 2 "general_operand" "")
+  (match_operand:SI 1 "nop_general_operand")
+  (match_operand:SI 2 "nop_general_operand")
   ;; Just to mention the iterator 
   (clobber (any_extend:SI (match_dup 1)))])]
   "avr_have_dimode
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr87376.c 
b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
new file mode 100644
index 000..c31a4a9dda5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr87376.c
@@ -0,0 

[gcc r14-10383] Arm: Fix disassembly error in Thumb-1 relaxed load/store [PR115188]

2024-07-05 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:100d353e545564931efaac90a089a4e8f3d42e6e

commit r14-10383-g100d353e545564931efaac90a089a4e8f3d42e6e
Author: Wilco Dijkstra 
Date:   Tue Jul 2 17:37:04 2024 +0100

Arm: Fix disassembly error in Thumb-1 relaxed load/store [PR115188]

A Thumb-1 memory operand allows single-register LDMIA/STMIA. This doesn't 
get
printed as LDR/STR with writeback in unified syntax, resulting in strange
assembler errors if writeback is selected.  To work around this, use the 
'Uw'
constraint that blocks writeback.  Also use a new 'mem_and_no_t1_wback_op'
which is a general memory operand that disallows writeback in Thumb-1.
A few other patterns were using 'm' for Thumb-1 in a similar way, update 
these
to also use 'mem_and_no_t1_wback_op' and 'Uw'.

gcc:
PR target/115188
* config/arm/arm.md (unaligned_loadsi): Use 'Uw' constraint and
'mem_and_no_t1_wback_op'.
(unaligned_loadhiu): Likewise.
(unaligned_storesi): Likewise.
(unaligned_storehi): Likewise.
* config/arm/predicates.md (mem_and_no_t1_wback_op): Add new 
predicate.
* config/arm/sync.md (arm_atomic_load): Use 'Uw' constraint.
(arm_atomic_store): Likewise.

gcc/testsuite:
PR target/115188
* gcc.target/arm/pr115188.c: Add new test.

(cherry picked from commit d04c5537f5ae4a3acd3f5135347d7e2d8c218811)

Diff:
---
 gcc/config/arm/arm.md   |  8 
 gcc/config/arm/predicates.md|  5 +
 gcc/config/arm/sync.md  |  4 ++--
 gcc/testsuite/gcc.target/arm/pr115188.c | 10 ++
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 1fd00146ca9..13a8fbf7a14 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5011,7 +5011,7 @@
 
 (define_insn "unaligned_loadsi"
   [(set (match_operand:SI 0 "s_register_operand" "=l,l,r")
-   (unspec:SI [(match_operand:SI 1 "memory_operand" "m,Uw,m")]
+   (unspec:SI [(match_operand:SI 1 "mem_and_no_t1_wback_op" "Uw,Uw,m")]
   UNSPEC_UNALIGNED_LOAD))]
   "unaligned_access"
   "@
@@ -5041,7 +5041,7 @@
 (define_insn "unaligned_loadhiu"
   [(set (match_operand:SI 0 "s_register_operand" "=l,l,r")
(zero_extend:SI
- (unspec:HI [(match_operand:HI 1 "memory_operand" "m,Uw,m")]
+ (unspec:HI [(match_operand:HI 1 "mem_and_no_t1_wback_op" "Uw,Uw,m")]
 UNSPEC_UNALIGNED_LOAD)))]
   "unaligned_access"
   "@
@@ -5066,7 +5066,7 @@
(set_attr "type" "store_8")])
 
 (define_insn "unaligned_storesi"
-  [(set (match_operand:SI 0 "memory_operand" "=m,Uw,m")
+  [(set (match_operand:SI 0 "mem_and_no_t1_wback_op" "=Uw,Uw,m")
(unspec:SI [(match_operand:SI 1 "s_register_operand" "l,l,r")]
   UNSPEC_UNALIGNED_STORE))]
   "unaligned_access"
@@ -5081,7 +5081,7 @@
(set_attr "type" "store_4")])
 
 (define_insn "unaligned_storehi"
-  [(set (match_operand:HI 0 "memory_operand" "=m,Uw,m")
+  [(set (match_operand:HI 0 "mem_and_no_t1_wback_op" "=Uw,Uw,m")
(unspec:HI [(match_operand:HI 1 "s_register_operand" "l,l,r")]
   UNSPEC_UNALIGNED_STORE))]
   "unaligned_access"
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 4994c0c57d6..197054b6118 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -907,3 +907,8 @@
 ;; A special predicate that doesn't match a particular mode.
 (define_special_predicate "arm_any_register_operand"
   (match_code "reg"))
+
+;; General memory operand that disallows Thumb-1 POST_INC.
+(define_predicate "mem_and_no_t1_wback_op"
+  (and (match_operand 0 "memory_operand")
+   (match_test "!(TARGET_THUMB1 && GET_CODE (XEXP (op, 0)) == POST_INC)")))
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index df8dbe170ca..0a8347fc598 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -65,7 +65,7 @@
 (define_insn "arm_atomic_load"
   [(set (match_operand:QHSI 0 "register_operand" "=r,l")
 (unspec_volatile:QHSI
-  [(match_operand:QHSI 1 "memory_operand" "m,m")]
+  [(match_operand:QHSI 1 "mem_and_no_t1_wback_op" "m,Uw")]
   VUNSPEC_LDR))]
   ""
   "ldr\t%0, %1"
@@ -81,7 +81,7 @@
 )
 
 (define_insn "arm_atomic_store"
-  [(set (match_operand:QHSI 0 "memory_operand" "=m,m")
+  [(set (match_operand:QHSI 0 "mem_and_no_t1_wback_op" "=m,Uw")
 (unspec_volatile:QHSI
   [(match_operand:QHSI 1 "register_operand" "r,l")]
   VUNSPEC_STR))]
diff --git a/gcc/testsuite/gcc.target/arm/pr115188.c 
b/gcc/testsuite/gcc.target/arm/pr115188.c
new file mode 100644
index 000..9a4022b5679
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr115188.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_arch_v6m_ok }
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_arch_v6m } */
+
+void init (int

[gcc r12-10603] AArch64: Fix strict-align cpymem/setmem [PR103100]

2024-07-05 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:b9d16d8361a9e3a82a2f21e759e760d235d43322

commit r12-10603-gb9d16d8361a9e3a82a2f21e759e760d235d43322
Author: Wilco Dijkstra 
Date:   Wed Oct 25 16:28:04 2023 +0100

AArch64: Fix strict-align cpymem/setmem [PR103100]

The cpymemdi/setmemdi implementation doesn't fully support strict alignment.
Block the expansion if the alignment is less than 16 with STRICT_ALIGNMENT.
Clean up the condition when to use MOPS.

gcc/ChangeLog/
PR target/103100
* config/aarch64/aarch64.md (cpymemdi): Remove pattern condition.
(setmemdi): Likewise.
* config/aarch64/aarch64.cc (aarch64_expand_cpymem): Support
strict-align.  Cleanup condition for using MOPS.
(aarch64_expand_setmem): Likewise.

(cherry picked from commit 318f5232cfb3e0c9694889565e1f5424d0354463)

Diff:
---
 gcc/config/aarch64/aarch64.cc | 52 ++-
 gcc/config/aarch64/aarch64.md |  4 ++--
 2 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f8082c4035e..cd2f4053a1a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24782,27 +24782,23 @@ aarch64_expand_cpymem (rtx *operands)
   int mode_bits;
   rtx dst = operands[0];
   rtx src = operands[1];
+  unsigned align = UINTVAL (operands[3]);
   rtx base;
   machine_mode cur_mode = BLKmode;
+  bool size_p = optimize_function_for_size_p (cfun);
 
-  /* Variable-sized memcpy can go through the MOPS expansion if available.  */
-  if (!CONST_INT_P (operands[2]))
+  /* Variable-sized or strict-align copies may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[2]) || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_cpymem_mops (operands);
 
-  unsigned HOST_WIDE_INT size = INTVAL (operands[2]);
-
-  /* Try to inline up to 256 bytes or use the MOPS threshold if available.  */
-  unsigned HOST_WIDE_INT max_copy_size
-= TARGET_MOPS ? aarch64_mops_memcpy_size_threshold : 256;
+  unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
 
-  bool size_p = optimize_function_for_size_p (cfun);
+  /* Try to inline up to 256 bytes.  */
+  unsigned max_copy_size = 256;
+  unsigned mops_threshold = aarch64_mops_memcpy_size_threshold;
 
-  /* Large constant-sized cpymem should go through MOPS when possible.
- It should be a win even for size optimization in the general case.
- For speed optimization the choice between MOPS and the SIMD sequence
- depends on the size of the copy, rather than number of instructions,
- alignment etc.  */
-  if (size > max_copy_size)
+  /* Large copies use MOPS when available or a library call.  */
+  if (size > max_copy_size || (TARGET_MOPS && size > mops_threshold))
 return aarch64_expand_cpymem_mops (operands);
 
   int copy_bits = 256;
@@ -24966,12 +24962,13 @@ aarch64_expand_setmem (rtx *operands)
   unsigned HOST_WIDE_INT len;
   rtx dst = operands[0];
   rtx val = operands[2], src;
+  unsigned align = UINTVAL (operands[3]);
   rtx base;
   machine_mode cur_mode = BLKmode, next_mode;
 
-  /* If we don't have SIMD registers or the size is variable use the MOPS
- inlined sequence if possible.  */
-  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD)
+  /* Variable-sized or strict-align memset may use the MOPS expansion.  */
+  if (!CONST_INT_P (operands[1]) || !TARGET_SIMD
+  || (STRICT_ALIGNMENT && align < 16))
 return aarch64_expand_setmem_mops (operands);
 
   bool size_p = optimize_function_for_size_p (cfun);
@@ -24979,10 +24976,13 @@ aarch64_expand_setmem (rtx *operands)
   /* Default the maximum to 256-bytes when considering only libcall vs
  SIMD broadcast sequence.  */
   unsigned max_set_size = 256;
+  unsigned mops_threshold = aarch64_mops_memset_size_threshold;
 
-  len = INTVAL (operands[1]);
-  if (len > max_set_size && !TARGET_MOPS)
-return false;
+  len = UINTVAL (operands[1]);
+
+  /* Large memset uses MOPS when available or a library call.  */
+  if (len > max_set_size || (TARGET_MOPS && len > mops_threshold))
+return aarch64_expand_setmem_mops (operands);
 
   int cst_val = !!(CONST_INT_P (val) && (INTVAL (val) != 0));
   /* The MOPS sequence takes:
@@ -24995,12 +24995,6 @@ aarch64_expand_setmem (rtx *operands)
  the arguments + 1 for the call.  */
   unsigned libcall_cost = 4;
 
-  /* Upper bound check.  For large constant-sized setmem use the MOPS sequence
- when available.  */
-  if (TARGET_MOPS
-  && len >= (unsigned HOST_WIDE_INT) aarch64_mops_memset_size_threshold)
-return aarch64_expand_setmem_mops (operands);
-
   /* Attempt a sequence with a vector broadcast followed by stores.
  Count the number of operations involved to see if it's worth it
  against the alternatives.  A simple counter simd_ops on the
@@ -25042,10 +25036,8 @@ aarch64_expand_setmem (rtx *operands)
   simd_ops++;
   n -= mode_bits;
 
-  /* Do ce

[gcc r15-1861] RISC-V: Use tu policy for first-element vec_set [PR115725].

2024-07-05 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:acc3b703c05debc6276451f9daae5d0ffc797eac

commit r15-1861-gacc3b703c05debc6276451f9daae5d0ffc797eac
Author: Robin Dapp 
Date:   Mon Jul 1 13:37:17 2024 +0200

RISC-V: Use tu policy for first-element vec_set [PR115725].

This patch changes the tail policy for vmv.s.x from ta to tu.
By default the bug does not show up with qemu because qemu's
current vmv.s.x implementation always uses the tail-undisturbed
policy.  With a local qemu version that overwrites the tail
with ones when the tail-agnostic policy is specified, the bug
shows.

gcc/ChangeLog:

* config/riscv/autovec.md: Add TU policy.
* config/riscv/riscv-protos.h (enum insn_type): Define
SCALAR_MOVE_MERGED_OP_TU.

gcc/testsuite/ChangeLog:

PR target/115725

* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Adjust
test expectation.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Ditto.

Diff:
---
 gcc/config/riscv/autovec.md  |  3 ++-
 gcc/config/riscv/riscv-protos.h  |  4 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c   | 12 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c   | 12 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c   | 12 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c   | 12 
 6 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 66d70f678a6..0fb6316a2cf 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1341,7 +1341,8 @@
 {
   rtx ops[] = {operands[0], operands[0], operands[1]};
   riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast (mode),
-   riscv_vector::SCALAR_MOVE_MERGED_OP, 
ops, CONST1_RTX (Pmode));
+   riscv_vector::SCALAR_MOVE_MERGED_OP_TU,
+   ops, CONST1_RTX (Pmode));
 }
   else
 {
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a8b76173fa0..abf6e34b5cc 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -524,6 +524,10 @@ enum insn_type : unsigned int
   SCALAR_MOVE_MERGED_OP = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P
  | HAS_MERGE_P | TDEFAULT_POLICY_P | MDEFAULT_POLICY_P
  | UNARY_OP_P,
+
+  SCALAR_MOVE_MERGED_OP_TU = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P
+ | HAS_MERGE_P | TU_POLICY_P | MDEFAULT_POLICY_P
+ | UNARY_OP_P,
 };
 
 enum vlmul_type
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
index ecb160933d6..99b0f625c83 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
@@ -64,14 +64,10 @@ typedef double vnx2df __attribute__((vector_size (16)));
 TEST_ALL1 (VEC_SET)
 TEST_ALL_VAR1 (VEC_SET_VAR1)
 
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 5 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*ta,\s*ma} 2 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 6 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*ta,\s*ma} 2 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 6 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*ta,\s*ma} 2 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 4 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 8 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 8 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 6 } } */
 
 /* { dg-final { scan-assembler-times {\tvmv.v.x} 13 } } */
 /* { dg-final { scan-assembler-times {\tvfmv.v.f} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c
index 194abff77cc..64a40308eb1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c
+++ b/

[gcc r15-1862] MAINTAINERS: Fix order in DCO

2024-07-05 Thread Filip Kastl via Gcc-cvs
https://gcc.gnu.org/g:4da5dc4be81b2797943fea44b0d40ac04700baee

commit r15-1862-g4da5dc4be81b2797943fea44b0d40ac04700baee
Author: Filip Kastl 
Date:   Fri Jul 5 15:17:58 2024 +0200

MAINTAINERS: Fix order in DCO

ChangeLog:

* MAINTAINERS: Fix order in Contributing under the DCO.

Signed-off-by: Filip Kastl 

Diff:
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b4739f29107..762b91256c4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -766,6 +766,7 @@ Robin Dapp  

 Robin Dapp 
 Michal Jires   
 Matthias Kretz 
+Prathamesh Kulkarni
 Tim Lange  
 Jeff Law   
 Jeff Law   
@@ -791,4 +792,3 @@ Jonathan Wakely 

 Alexander Westbrooks   
 Chung-Ju Wu
 Pengxuan Zheng 
-Prathamesh Kulkarni


[gcc r15-1863] libgccjit: Add support for the type bfloat16

2024-07-05 Thread Antoni Boucher via Gcc-cvs
https://gcc.gnu.org/g:1c314247aab43aaa278ecc51d666f8c5896d8bbb

commit r15-1863-g1c314247aab43aaa278ecc51d666f8c5896d8bbb
Author: Antoni Boucher 
Date:   Thu Nov 16 10:59:22 2023 -0500

libgccjit: Add support for the type bfloat16

gcc/jit/ChangeLog:

PR jit/112574
* docs/topics/types.rst: Document GCC_JIT_TYPE_BFLOAT16.
* jit-common.h: Update NUM_GCC_JIT_TYPES.
* jit-playback.cc (get_tree_node_for_type): Support bfloat16.
* jit-recording.cc (recording::memento_of_get_type::get_size,
recording::memento_of_get_type::dereference,
recording::memento_of_get_type::is_int,
recording::memento_of_get_type::is_signed,
recording::memento_of_get_type::is_float,
recording::memento_of_get_type::is_bool): Support bfloat16.
* libgccjit.h (enum gcc_jit_types): Add GCC_JIT_TYPE_BFLOAT16.

gcc/testsuite/ChangeLog:

PR jit/112574
* jit.dg/all-non-failing-tests.h: New test test-bfloat16.c.
* jit.dg/test-types.c: Test GCC_JIT_TYPE_BFLOAT16.
* jit.dg/test-bfloat16.c: New test.

Diff:
---
 gcc/jit/docs/topics/types.rst|  2 ++
 gcc/jit/jit-common.h |  2 +-
 gcc/jit/jit-playback.cc  |  6 +
 gcc/jit/jit-recording.cc | 11 +
 gcc/jit/libgccjit.h  |  4 ++-
 gcc/testsuite/jit.dg/all-non-failing-tests.h |  3 +++
 gcc/testsuite/jit.dg/test-bfloat16.c | 37 
 gcc/testsuite/jit.dg/test-types.c|  4 +++
 8 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/gcc/jit/docs/topics/types.rst b/gcc/jit/docs/topics/types.rst
index bb51f037b7e..6a7a35280ed 100644
--- a/gcc/jit/docs/topics/types.rst
+++ b/gcc/jit/docs/topics/types.rst
@@ -113,6 +113,8 @@ Standard types
- C99's ``__int128_t``
  * - :c:data:`GCC_JIT_TYPE_FLOAT`
-
+ * - :c:data:`GCC_JIT_TYPE_BFLOAT16`
+   - C's ``__bfloat16``
  * - :c:data:`GCC_JIT_TYPE_DOUBLE`
-
  * - :c:data:`GCC_JIT_TYPE_LONG_DOUBLE`
diff --git a/gcc/jit/jit-common.h b/gcc/jit/jit-common.h
index 1e335878b56..afb41763e46 100644
--- a/gcc/jit/jit-common.h
+++ b/gcc/jit/jit-common.h
@@ -36,7 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 #endif
 
-const int NUM_GCC_JIT_TYPES = GCC_JIT_TYPE_INT128_T + 1;
+const int NUM_GCC_JIT_TYPES = GCC_JIT_TYPE_BFLOAT16 + 1;
 
 /* This comment is included by the docs.
 
diff --git a/gcc/jit/jit-playback.cc b/gcc/jit/jit-playback.cc
index b3f54da24ab..1b5445d6101 100644
--- a/gcc/jit/jit-playback.cc
+++ b/gcc/jit/jit-playback.cc
@@ -281,6 +281,12 @@ get_tree_node_for_type (enum gcc_jit_types type_)
 
 case GCC_JIT_TYPE_FLOAT:
   return float_type_node;
+case GCC_JIT_TYPE_BFLOAT16:
+#ifndef HAVE_BFmode
+  add_error (NULL, "gcc_jit_types value unsupported on this target: %i",
+type_);
+#endif
+  return bfloat16_type_node;
 case GCC_JIT_TYPE_DOUBLE:
   return double_type_node;
 case GCC_JIT_TYPE_LONG_DOUBLE:
diff --git a/gcc/jit/jit-recording.cc b/gcc/jit/jit-recording.cc
index 43a6795f8f3..cc7f529c9e8 100644
--- a/gcc/jit/jit-recording.cc
+++ b/gcc/jit/jit-recording.cc
@@ -2418,6 +2418,10 @@ recording::memento_of_get_type::get_size ()
   m = targetm.c.mode_for_floating_type (TI_FLOAT_TYPE);
   size = GET_MODE_PRECISION (m).to_constant ();
   break;
+#ifdef HAVE_BFmode
+case GCC_JIT_TYPE_BFLOAT16:
+  return GET_MODE_UNIT_SIZE (BFmode);
+#endif
 case GCC_JIT_TYPE_DOUBLE:
   m = targetm.c.mode_for_floating_type (TI_DOUBLE_TYPE);
   size = GET_MODE_PRECISION (m).to_constant ();
@@ -2479,6 +2483,7 @@ recording::memento_of_get_type::dereference ()
 case GCC_JIT_TYPE_INT64_T:
 case GCC_JIT_TYPE_INT128_T:
 case GCC_JIT_TYPE_FLOAT:
+case GCC_JIT_TYPE_BFLOAT16:
 case GCC_JIT_TYPE_DOUBLE:
 case GCC_JIT_TYPE_LONG_DOUBLE:
 case GCC_JIT_TYPE_COMPLEX_FLOAT:
@@ -2543,6 +2548,7 @@ recording::memento_of_get_type::is_int () const
   return true;
 
 case GCC_JIT_TYPE_FLOAT:
+case GCC_JIT_TYPE_BFLOAT16:
 case GCC_JIT_TYPE_DOUBLE:
 case GCC_JIT_TYPE_LONG_DOUBLE:
   return false;
@@ -2601,6 +2607,7 @@ recording::memento_of_get_type::is_signed () const
 case GCC_JIT_TYPE_UINT128_T:
 
 case GCC_JIT_TYPE_FLOAT:
+case GCC_JIT_TYPE_BFLOAT16:
 case GCC_JIT_TYPE_DOUBLE:
 case GCC_JIT_TYPE_LONG_DOUBLE:
 
@@ -2660,6 +2667,7 @@ recording::memento_of_get_type::is_float () const
   return false;
 
 case GCC_JIT_TYPE_FLOAT:
+case GCC_JIT_TYPE_BFLOAT16:
 case GCC_JIT_TYPE_DOUBLE:
 case GCC_JIT_TYPE_LONG_DOUBLE:
   return true;
@@ -2723,6 +2731,7 @@ recording::memento_of_get_type::is_bool () const
   return false;
 
 case GCC_JIT_TYPE_FLOAT:
+case GCC_JIT_TYPE_BFLOAT16:
 case GCC_JIT_TYPE_DOUBLE:
 

[gcc r15-1864] libgccjit: Allow comparing array types

2024-07-05 Thread Antoni Boucher via Gcc-cvs
https://gcc.gnu.org/g:533f807e17034b20c586eeb480c989a42869bb36

commit r15-1864-g533f807e17034b20c586eeb480c989a42869bb36
Author: Antoni Boucher 
Date:   Tue Jan 2 16:04:10 2024 -0500

libgccjit: Allow comparing array types

gcc/jit/ChangeLog:

* jit-common.h: Add array_type class.
* jit-recording.h (type::dyn_cast_array_type,
memento_of_get_aligned::dyn_cast_array_type,
array_type::dyn_cast_array_type, array_type::is_same_type_as):
New methods.

gcc/testsuite/ChangeLog:

* jit.dg/test-types.c: Add array type comparison to the test.

Diff:
---
 gcc/jit/jit-common.h  |  1 +
 gcc/jit/jit-recording.h   | 17 +
 gcc/testsuite/jit.dg/test-types.c |  5 +
 3 files changed, 23 insertions(+)

diff --git a/gcc/jit/jit-common.h b/gcc/jit/jit-common.h
index afb41763e46..655d94e0bab 100644
--- a/gcc/jit/jit-common.h
+++ b/gcc/jit/jit-common.h
@@ -118,6 +118,7 @@ namespace recording {
 class struct_;
class union_;
   class vector_type;
+  class array_type;
 class field;
   class bitfield;
 class fields;
diff --git a/gcc/jit/jit-recording.h b/gcc/jit/jit-recording.h
index cce25f1fc07..abd4f6f8bb3 100644
--- a/gcc/jit/jit-recording.h
+++ b/gcc/jit/jit-recording.h
@@ -560,6 +560,7 @@ public:
   virtual function_type *as_a_function_type() { gcc_unreachable (); return 
NULL; }
   virtual struct_ *dyn_cast_struct () { return NULL; }
   virtual vector_type *dyn_cast_vector_type () { return NULL; }
+  virtual array_type *dyn_cast_array_type () { return NULL; }
 
   /* Is it typesafe to copy to this type from rtype?  */
   virtual bool accepts_writes_from (type *rtype)
@@ -829,6 +830,11 @@ public:
 
   void replay_into (replayer *) final override;
 
+  array_type *dyn_cast_array_type () final override
+  {
+return m_other_type->dyn_cast_array_type ();
+  }
+
 private:
   string * make_debug_string () final override;
   void write_reproducer (reproducer &r) final override;
@@ -895,6 +901,17 @@ class array_type : public type
 
   type *dereference () final override;
 
+  bool is_same_type_as (type *other) final override
+  {
+array_type *other_array_type = other->dyn_cast_array_type ();
+if (!other_array_type)
+  return false;
+return m_num_elements == other_array_type->m_num_elements
+  && m_element_type->is_same_type_as (other_array_type->m_element_type);
+  }
+
+  array_type *dyn_cast_array_type () final override { return this; }
+
   bool is_int () const final override { return false; }
   bool is_float () const final override { return false; }
   bool is_bool () const final override { return false; }
diff --git a/gcc/testsuite/jit.dg/test-types.c 
b/gcc/testsuite/jit.dg/test-types.c
index f51252e0da0..bfdb76383c5 100644
--- a/gcc/testsuite/jit.dg/test-types.c
+++ b/gcc/testsuite/jit.dg/test-types.c
@@ -496,4 +496,9 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
 #ifdef HAVE_BFmode
   CHECK_VALUE (gcc_jit_type_get_size (gcc_jit_context_get_type (ctxt, 
GCC_JIT_TYPE_BFLOAT16)), sizeof (__bfloat16));
 #endif
+
+  gcc_jit_type *int_type = gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT);
+  gcc_jit_type *array_type1 = gcc_jit_context_new_array_type (ctxt, NULL, 
int_type, 2);
+  gcc_jit_type *array_type2 = gcc_jit_context_new_array_type (ctxt, NULL, 
int_type, 2);
+  CHECK (gcc_jit_compatible_types (array_type1, array_type2));
 }


[gcc r15-1865] Arm: Fix ldrd offset range [PR115153]

2024-07-05 Thread Wilco Dijkstra via Gcc-cvs
https://gcc.gnu.org/g:44e5ecfd261afe72aa04eba4bf1a9ec782579cab

commit r15-1865-g44e5ecfd261afe72aa04eba4bf1a9ec782579cab
Author: Wilco Dijkstra 
Date:   Fri Jul 5 17:31:25 2024 +0100

Arm: Fix ldrd offset range [PR115153]

The valid offset range of LDRD in arm_legitimate_index_p is increased to
-1024..1020 if NEON is enabled since VALID_NEON_DREG_MODE includes DImode.
Fix this by moving the LDRD check earlier.

gcc:
PR target/115153
* config/arm/arm.cc (arm_legitimate_index_p): Move LDRD case before
NEON.
(thumb2_legitimate_index_p): Update comments.
(output_move_neon): Use DFmode for vldr/vstr and non-checking
adjust_address.

gcc/testsuite:
PR target/115153
* gcc.target/arm/pr115153.c: Add new test.
* lib/target-supports.exp: Add arm_arch_v7ve_neon target support.

Diff:
---
 gcc/config/arm/arm.cc   | 59 +
 gcc/testsuite/gcc.target/arm/pr115153.c | 16 +
 gcc/testsuite/lib/target-supports.exp   |  2 ++
 3 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index bb9c7c3b5c4..459b7e648ab 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -8858,6 +8858,28 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
RTX_CODE outer,
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
 
+  if (arm_address_register_rtx_p (index, strict_p)
+  && (GET_MODE_SIZE (mode) <= 4))
+return 1;
+
+  /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
+  if (mode == DImode || mode == DFmode)
+{
+  if (code == CONST_INT)
+   {
+ HOST_WIDE_INT val = INTVAL (index);
+
+ /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
+If vldr is selected it uses arm_coproc_mem_operand.  */
+ if (TARGET_LDRD)
+   return val > -256 && val < 256;
+ else
+   return val > -4096 && val < 4092;
+   }
+
+  return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+}
+
   /* For quad modes, we restrict the constant offset to be slightly less
  than what the instruction format permits.  We do this because for
  quad mode moves, we will actually decompose them into two separate
@@ -8870,7 +8892,7 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
RTX_CODE outer,
&& (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
- full range of the instruction format.  */
+ full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
 return (code == CONST_INT
&& INTVAL (index) < 1024
@@ -8883,27 +8905,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, 
RTX_CODE outer,
&& INTVAL (index) > -1024
&& (INTVAL (index) & 3) == 0);
 
-  if (arm_address_register_rtx_p (index, strict_p)
-  && (GET_MODE_SIZE (mode) <= 4))
-return 1;
-
-  if (mode == DImode || mode == DFmode)
-{
-  if (code == CONST_INT)
-   {
- HOST_WIDE_INT val = INTVAL (index);
-
- /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
-If vldr is selected it uses arm_coproc_mem_operand.  */
- if (TARGET_LDRD)
-   return val > -256 && val < 256;
- else
-   return val > -4096 && val < 4092;
-   }
-
-  return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
-}
-
   if (GET_MODE_SIZE (mode) <= 4
   && ! (arm_arch4
&& (mode == HImode
@@ -9006,7 +9007,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, 
int strict_p)
&& (INTVAL (index) & 3) == 0);
 
   /* We have no such constraint on double mode offsets, so we permit the
- full range of the instruction format.  */
+ full range of the instruction format.  Note DImode is included here.  */
   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
 return (code == CONST_INT
&& INTVAL (index) < 1024
@@ -9017,6 +9018,7 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, 
int strict_p)
   && (GET_MODE_SIZE (mode) <= 4))
 return 1;
 
+  /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
   if (mode == DImode || mode == DFmode)
 {
   if (code == CONST_INT)
@@ -20865,10 +20867,9 @@ output_move_neon (rtx *operands)
int overlap = -1;
for (i = 0; i < nregs; i++)
  {
-   /* We're only using DImode here because it's a convenient
-  size.  */
-   ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
-   ops[1] = adjust_address (mem, DImode, 8 * i);
+   /* Use DFmode for vldr/vstr.  */
+   ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
+   ops[1] = adjust_address_nv (mem, DFmode, 8 * i);

[gcc r15-1866] Fortran: switch test to use issignaling() built-in

2024-07-05 Thread François-Xavier Coudert via Gcc-cvs
https://gcc.gnu.org/g:eec30733bba305b02ba3c368289ef935f17c87e6

commit r15-1866-geec30733bba305b02ba3c368289ef935f17c87e6
Author: Francois-Xavier Coudert 
Date:   Sat Jul 6 00:02:03 2024 +0200

Fortran: switch test to use issignaling() built-in

The macro may not be present in all libc's, but the built-in
is always available.

gcc/testsuite/ChangeLog:

* gfortran.dg/ieee/signaling_2.f90: Adjust test.
* gfortran.dg/ieee/signaling_2_c.c: Adjust test.

Diff:
---
 gcc/testsuite/gfortran.dg/ieee/signaling_2.f90 |  3 ---
 gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c | 10 +++---
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/ieee/signaling_2.f90 
b/gcc/testsuite/gfortran.dg/ieee/signaling_2.f90
index 03b04c783eb..79a85edefd1 100644
--- a/gcc/testsuite/gfortran.dg/ieee/signaling_2.f90
+++ b/gcc/testsuite/gfortran.dg/ieee/signaling_2.f90
@@ -1,9 +1,6 @@
 ! { dg-do run { target { ! ia32 } } }
 ! x87 / x86-32 ABI is unsuitable for signaling NaNs
 !
-! { dg-require-effective-target issignaling } */
-! The companion C source needs access to the issignaling macro.
-!
 ! { dg-additional-sources signaling_2_c.c }
 ! { dg-additional-options "-w" }
 ! The -w option is needed to make cc1 not report a warning for
diff --git a/gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c 
b/gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c
index ea7fc0467bd..dde09638c6f 100644
--- a/gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c
+++ b/gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c
@@ -1,8 +1,4 @@
-#define _GNU_SOURCE
-#include 
-#include 
-
-int isnansf (float x)   { return issignaling (x) ? 1 : 0; }
-int isnans  (double x)  { return issignaling (x) ? 1 : 0; }
-int isnansl (long double x) { return issignaling (x) ? 1 : 0; }
+int isnansf (float x)   { return __builtin_issignaling (x) ? 1 : 0; }
+int isnans  (double x)  { return __builtin_issignaling (x) ? 1 : 0; }
+int isnansl (long double x) { return __builtin_issignaling (x) ? 1 : 0; }


[gcc r15-1867] x86, Darwin: Fix bootstrap for 32b multilibs/hosts.

2024-07-05 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:807e36d76e5105015afe0cf20e9a8837bb550f4b

commit r15-1867-g807e36d76e5105015afe0cf20e9a8837bb550f4b
Author: Iain Sandoe 
Date:   Fri Jul 5 09:26:40 2024 +0100

x86, Darwin: Fix bootstrap for 32b multilibs/hosts.

r15-1735-ge62ea4fb8ffcab06ddd  contained changes that altered the
codegen for 32b Darwin (whether hosted on 64b or as 32b host) such
that the per function picbase load is called multiple times in some
cases.  Darwin's back end is not expecting this (and indeed some of
the handling depends on a single instance).

The fixes the issue by marking those instructions as not copyable
(as suggested by Andrew Pinski).

The change is Darwin-specific.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_cannot_copy_insn_p): New.
(TARGET_CANNOT_COPY_INSN_P): New.

Signed-off-by: Iain Sandoe 

Diff:
---
 gcc/config/i386/i386.cc | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 99def8d4a77..f75250f79de 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -27025,6 +27025,29 @@ ix86_libm_function_max_error (unsigned cfn, 
machine_mode mode,
 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
 
+#if TARGET_MACHO
+static bool
+ix86_cannot_copy_insn_p (rtx_insn *insn)
+{
+  if (TARGET_64BIT)
+return false;
+
+  rtx set = single_set (insn);
+  if (set)
+{
+  rtx src = SET_SRC (set);
+  if (GET_CODE (src) == UNSPEC
+ && XINT (src, 1) == UNSPEC_SET_GOT)
+   return true;
+}
+  return false;
+}
+
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
+
+#endif
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests


[gcc r15-1869] PR target/115751: Avoid force_reg in ix86_expand_ternlog.

2024-07-05 Thread Roger Sayle via Gcc-cvs
https://gcc.gnu.org/g:9a7e3f57e1ab8e6e4cf5ea3c0998aa50c6220579

commit r15-1869-g9a7e3f57e1ab8e6e4cf5ea3c0998aa50c6220579
Author: Roger Sayle 
Date:   Sat Jul 6 05:24:39 2024 +0100

PR target/115751: Avoid force_reg in ix86_expand_ternlog.

This patch fixes a problem with splitting of complex AVX512 ternlog
instructions on x86_64.  A recent change allows the ternlog pattern
to have multiple mem-like operands prior to reload, by emitting any
"reloads" as necessary during split1, before register allocation.
The issue is that this code calls force_reg to place the mem-like
operand into a register, but unfortunately the vec_duplicate (broadcast)
form of operands supported by ternlog isn't considered a "general_operand",
i.e. supported by all instructions.  This mismatch triggers an ICE in
the middle-end's force_reg, even though the x86 supports loading these
vec_duplicate operands into a vector register in a single (move)
instruction.

This patch resolves this problem by replacing force_reg with calls
to gen_reg_rtx and emit_move (as the i386 backend, unlike the middle-end,
knows these will be recognized by recog).

2024-07-06  Roger Sayle  

gcc/ChangeLog
PR target/115751
* config/i386/i386-expand.cc (ix86_expand_ternlog): Avoid use of
force_reg to "reload" non-register operands, as these may contain
vec_duplicate (broadcast) operands that aren't supported by
force_reg.  Use (safer) gen_reg_rtx and emit_move instead.

Diff:
---
 gcc/config/i386/i386-expand.cc | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a773b45bf03..bf79e59f811 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -26050,14 +26050,25 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx 
op1, rtx op2, int idx,
   break;
 }
 
-  tmp0 = register_operand (op0, mode) ? op0 : force_reg (mode, op0);
+  if (!register_operand (op0, mode))
+{
+  /* We can't use force_reg (mode, op0).  */
+  tmp0 = gen_reg_rtx (GET_MODE (op0));
+  emit_move_insn (tmp0,op0);
+}
+  else
+tmp0 = op0;
   if (GET_MODE (tmp0) != mode)
 tmp0 = gen_lowpart (mode, tmp0);
 
   if (!op1 || rtx_equal_p (op0, op1))
 tmp1 = copy_rtx (tmp0);
   else if (!register_operand (op1, mode))
-tmp1 = force_reg (mode, op1);
+{
+  /* We can't use force_reg (mode, op1).  */
+  tmp1 = gen_reg_rtx (GET_MODE (op1));
+  emit_move_insn (tmp1, op1);
+}
   else
 tmp1 = op1;
   if (GET_MODE (tmp1) != mode)