[gcc r15-4397] Support andn_optab for x86

2024-10-16 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:70f59d2a1c51bde085d8fc7df002918851e76c9c

commit r15-4397-g70f59d2a1c51bde085d8fc7df002918851e76c9c
Author: Cui, Lili 
Date:   Thu Oct 17 08:50:38 2024 +0800

Support andn_optab for x86

Add new andn pattern to match the new optab added by
r15-1890-gf379596e0ba99d. Only enable 64bit, 128bit and
256bit vector ANDN, X86-64 has mask mov instruction when
avx512 is enabled.

gcc/ChangeLog:

* config/i386/sse.md (andn3): New.
* config/i386/mmx.md (andn3): New.

gcc/testsuite/ChangeLog:

* g++.target/i386/vect-cmp.C: New test.

Diff:
---
 gcc/config/i386/mmx.md   |  7 +++
 gcc/config/i386/sse.md   |  7 +++
 gcc/testsuite/g++.target/i386/vect-cmp.C | 23 +++
 3 files changed, 37 insertions(+)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9d2a82c598e5..ef4ed8b501a1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -4467,6 +4467,13 @@
   operands[0] = lowpart_subreg (V16QImode, operands[0], mode);
 })
 
+(define_expand "andn3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+(and:MMXMODEI
+  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand"))
+  (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_SSE2")
+
 (define_insn "mmx_andnot3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(and:MMXMODEI
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a45b50ad7324..7be313346677 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18438,6 +18438,13 @@
  (match_operand:VI_AVX2 2 "vector_operand")))]
   "TARGET_SSE2")
 
+(define_expand "andn3"
+  [(set (match_operand:VI 0 "register_operand")
+   (and:VI
+ (not:VI (match_operand:VI 2 "register_operand"))
+ (match_operand:VI 1 "register_operand")))]
+  "TARGET_SSE2")
+
 (define_expand "_andnot3_mask"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand")
(vec_merge:VI48_AVX512VL
diff --git a/gcc/testsuite/g++.target/i386/vect-cmp.C 
b/gcc/testsuite/g++.target/i386/vect-cmp.C
new file mode 100644
index ..c154474fa51c
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/vect-cmp.C
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -fdump-tree-optimized" } */
+
+#define vect8 __attribute__((vector_size(8) ))
+#define vect16 __attribute__((vector_size(16) ))
+#define vect32 __attribute__((vector_size(32) ))
+
+vect8 int bar0 (vect8 float a, vect8 float b, vect8 int c)
+{
+  return (a > b) ? 0 : c;
+}
+
+vect16 int bar1 (vect16 float a, vect16 float b, vect16 int c)
+{
+  return (a > b) ? 0 : c;
+}
+
+vect32 int bar2 (vect32 float a, vect32 float b, vect32 int c)
+{
+  return (a > b) ? 0 : c;
+}
+
+/* { dg-final { scan-tree-dump-times ".BIT_ANDN " 3 "optimized" { target { ! 
ia32 } } } } */


[gcc r15-5666] Optimize 128-bit vector permutation with pand, pandn and por.

2024-11-25 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:60b708a9c878aff9a76ec0d446ae63e6527327a6

commit r15-5666-g60b708a9c878aff9a76ec0d446ae63e6527327a6
Author: Cui, Lili 
Date:   Tue Nov 26 15:10:23 2024 +0800

Optimize 128-bit vector permutation with pand, pandn and por.

This patch introduces a new subroutine in ix86_expand_vec_perm_const_1.
On x86, use mixed constant permutation for V8HImode and V16QImode when
SSE2 is supported. This patch handles certain vector shuffle operations
more efficiently using pand, pandn, and por. This change is intended to
improve assembly code generation for configurations that support SSE2.

gcc/ChangeLog:

PR target/116675
* config/i386/i386-expand.cc (expand_vec_perm_pand_pandn_por):
New subroutine.
(ix86_expand_vec_perm_const_1): Call expand_vec_perm_pand_pandn_por.

gcc/testsuite/ChangeLog:

PR target/116675
* gcc.target/i386/pr116675.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc   | 50 +
 gcc/testsuite/gcc.target/i386/pr116675.c | 75 
 2 files changed, 125 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index f8dcce465e9a..2eb619725047 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -23102,6 +23102,53 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct 
expand_vec_perm_d *d)
   return true;
 }
 
+/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement a
+   permutation (which is a bland) with and, andnot and or when pshufb is not 
available.
+
+   It handles case:
+   __builtin_shufflevector (v1, v2, 0, 9, 2, 11, 4, 13, 6, 15);
+   __builtin_shufflevector (v1, v2, 8, 1, 2, 11, 4, 13, 6, 15);
+
+   An element[i] must be chosen between op0[i] and op1[i] to satisfy the
+   requirement.
+ */
+
+static bool
+expand_vec_perm_pand_pandn_por (struct expand_vec_perm_d *d)
+{
+  rtx rperm[16], vperm;
+  unsigned int i, nelt = d->nelt;
+
+  if (!TARGET_SSE2
+  || d->one_operand_p
+  || (d->vmode != V16QImode && d->vmode != V8HImode))
+return false;
+
+  if (d->perm[0] != 0)
+return false;
+
+  /* The dest[i] must select an element between op0[i] and op1[i].  */
+  for (i = 1; i < nelt; i++)
+if ((d->perm[i] % nelt) != i)
+  return false;
+
+  if (d->testing_p)
+ return true;
+
+  /* Generates a blend mask for the operators AND and ANDNOT.  */
+  machine_mode inner_mode = GET_MODE_INNER (d->vmode);
+  for (i = 0; i < nelt; i++)
+rperm[i] = (d->perm[i] <  nelt) ? CONSTM1_RTX (inner_mode)
+  : CONST0_RTX (inner_mode);
+
+  vperm = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (nelt, rperm));
+  vperm = force_reg (d->vmode, vperm);
+
+  ix86_expand_sse_movcc (d->target, vperm, d->op0, d->op1);
+
+  return true;
+}
+
 /* Implement permutation with pslldq + psrldq + por when pshufb is not
available.  */
 static bool
@@ -24161,6 +24208,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d 
*d)
   if (expand_vec_perm_psrlw_psllw_por (d))
 return true;
 
+  if (expand_vec_perm_pand_pandn_por (d))
+return true;
+
   /* Try sequences of four instructions.  */
 
   if (expand_vec_perm_even_odd_trunc (d))
diff --git a/gcc/testsuite/gcc.target/i386/pr116675.c 
b/gcc/testsuite/gcc.target/i386/pr116675.c
new file mode 100644
index ..e463dd8415f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116675.c
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2 -mno-ssse3" } */
+/* { dg-final { scan-assembler-times "pand" 4 } } */
+/* { dg-final { scan-assembler-times "pandn" 4 } } */
+/* { dg-final { scan-assembler-times "por" 4 } } */
+
+#include 
+
+__attribute__((noinline, noclone, target("sse2")))
+static __v8hi foo1 (__v8hi a, __v8hi b)
+{
+  return __builtin_shufflevector (a, b, 0, 9, 2, 11, 4, 13, 6, 15);
+}
+
+__attribute__((noinline, noclone, target("sse2")))
+static __v8hi foo2 (__v8hi a, __v8hi b)
+{
+  return __builtin_shufflevector (a, b, 8, 9, 2, 3, 4, 13, 14, 15);
+}
+
+__attribute__((noinline, noclone, target("sse2")))
+static __v16qi foo3 (__v16qi a, __v16qi b)
+{
+  return __builtin_shufflevector (a, b, 0, 17, 2, 19, 4, 21, 6, 23,
+ 8, 25, 10, 27, 12, 29, 14, 31);
+}
+
+__attribute__((noinline, noclone, target("sse2")))
+static __v16qi foo4 (__v16qi a, __v16qi b)
+{
+  return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 21, 6, 23,
+8, 25, 10, 27,12,29,14,31);
+}
+
+__attribute__((noinline, noclone)) void
+compare_v8hi (__v8hi a,  __v8hi b)
+{
+  for (int i = 0; i < 8; i++) 
+if (a[i] != b[i]) 
+  __builtin_abort ();
+}
+
+__attribute__((noinline, noclone)) void
+compare_v16qi (__v16qi a,  __v16qi b)
+{
+  for (int i = 0; i < 16; i++)
+if (a[i] != b[i])
+  __builtin_abort ();
+}
+
+int main (void)
+{
+  __v8hi s1, s2, s3, s4, s5, s6;
+  __v16qi s7, s8, s9, s10, s11, s

[gcc r16-1569] x86: Fix shrink wrap separate ICE under -fstack-clash-protection [PR120697]

2025-06-18 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:1f2e4058e57c68b5ea91ab2bac469d5e57b6ff46

commit r16-1569-g1f2e4058e57c68b5ea91ab2bac469d5e57b6ff46
Author: Lili Cui 
Date:   Thu Jun 19 08:39:54 2025 +0800

x86: Fix shrink wrap separate ICE under -fstack-clash-protection [PR120697]

gcc/ChangeLog:

PR target/120697
* config/i386/i386.cc (ix86_expand_prologue):
Remove 3 assertions and associated code.

gcc/testsuite/ChangeLog:

PR target/120697
* gcc.target/i386/stack-clash-protection.c: New test.

Diff:
---
 gcc/config/i386/i386.cc   | 14 +-
 .../gcc.target/i386/stack-clash-protection.c  | 19 +++
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 9bf198c7416c..77853297a2fa 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9234,10 +9234,9 @@ ix86_expand_prologue (void)
 the stack frame saving one cycle of the prologue.  However, avoid
 doing this if we have to probe the stack; at least on x86_64 the
 stack probe can turn into a call that clobbers a red zone location. */
-  else if ((ix86_using_red_zone ()
+  else if (ix86_using_red_zone ()
&& (! TARGET_STACK_PROBE
|| frame.stack_pointer_offset < CHECK_STACK_LIMIT))
-  || crtl->shrink_wrapped_separate)
{
  HOST_WIDE_INT allocate_offset;
  if (crtl->shrink_wrapped_separate)
@@ -9253,11 +9252,6 @@ ix86_expand_prologue (void)
 
  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
  int_registers_saved = true;
-
- if (ix86_using_red_zone ()
- && (! TARGET_STACK_PROBE
- || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
-   cfun->machine->red_zone_used = true;
}
 }
 
@@ -9377,8 +9371,6 @@ ix86_expand_prologue (void)
   && flag_stack_clash_protection
   && !ix86_target_stack_probe ())
 {
-  gcc_assert (!crtl->shrink_wrapped_separate);
-
   ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
   allocate = 0;
 }
@@ -9389,8 +9381,6 @@ ix86_expand_prologue (void)
 {
   const HOST_WIDE_INT probe_interval = get_probe_interval ();
 
-  gcc_assert (!crtl->shrink_wrapped_separate);
-
   if (STACK_CHECK_MOVING_SP)
{
  if (crtl->is_leaf
@@ -9447,8 +9437,6 @@ ix86_expand_prologue (void)
   else if (!ix86_target_stack_probe ()
   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
 {
-  gcc_assert (!crtl->shrink_wrapped_separate);
-
   pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 GEN_INT (-allocate), -1,
 m->fs.cfa_reg == stack_pointer_rtx);
diff --git a/gcc/testsuite/gcc.target/i386/stack-clash-protection.c 
b/gcc/testsuite/gcc.target/i386/stack-clash-protection.c
new file mode 100644
index ..5be28cb3ac7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/stack-clash-protection.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection" } */
+
+int flag;
+void open();
+int getChar();
+typedef enum { QUOTE } CharType;
+typedef enum { UNQ } State;
+CharType getCharType();
+void expand() {
+  open();
+  if (flag)
+return;
+  int ch = getChar();
+  State nextState = getCharType();
+  if (nextState)
+while (ch)
+  ;
+}


[gcc r16-1551] x86: Enable separate shrink wrapping

2025-06-17 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:2c30f828e4507863713cff44cd30c88aa7f27865

commit r16-1551-g2c30f828e4507863713cff44cd30c88aa7f27865
Author: Lili Cui 
Date:   Tue Jun 17 21:39:38 2025 +0800

x86: Enable separate shrink wrapping

This commit implements the target macros (TARGET_SHRINK_WRAP_*) that
enable separate shrink wrapping for function prologues/epilogues in
x86.

When performing separate shrink wrapping, we choose to use mov instead
of push/pop, because using push/pop is more complicated to handle rsp
adjustment and may lose performance, so here we choose to use mov, which
has a small impact on code size, but guarantees performance.

Using mov means we need to use sub/add to maintain the stack frame. In
some special cases, we need to use lea to prevent affecting EFlags.

Avoid inserting sub between test-je-jle to change EFlags, lea should be
used here.

foo:
xorl%eax, %eax
testl   %edi, %edi
je  .L11
sub $16, %rsp  --> leaq-16(%rsp), %rsp
movq%r13, 8(%rsp)
movl$1, %r13d
jle .L4

Tested against SPEC CPU 2017, this change always has a net-positive
effect on the dynamic instruction count.  See the following table for
the breakdown on how this reduces the number of dynamic instructions
per workload on a like-for-like (with/without this commit):

instruction count   basewith commit (commit-base)/commit
502.gcc_r   98666845943 96891561634 -1.80%
526.blender_r   6.21226E+11 6.12992E+11 -1.33%
520.omnetpp_r   1.1241E+11  1.11093E+11 -1.17%
500.perlbench_r 1271558717  1263268350  -0.65%
523.xalancbmk_r 2.20103E+11 2.18836E+11 -0.58%
531.deepsjeng_r 2.73591E+11 2.72114E+11 -0.54%
500.perlbench_r 64195557393 63881512409 -0.49%
541.leela_r 2.99097E+11 2.98245E+11 -0.29%
548.exchange2_r 1.27976E+11 1.27784E+11 -0.15%
527.cam4_r  88981458425 7334679 -0.11%
554.roms_r  2.60072E+11 2.59809E+11 -0.10%

Collected spec2017 performance on ZNVER5, EMR and ICELAKE. No performance 
regression was observed.

For O2 multi-copy :
511.povray_r improved by 2.8% on ZNVER5.
511.povray_r improved by 4% on EMR
511.povray_r improved by 3.3 % ~ 4.6% on ICELAKE.

gcc/ChangeLog:

* config/i386/i386-protos.h (ix86_get_separate_components):
New function.
(ix86_components_for_bb): Likewise.
(ix86_disqualify_components): Likewise.
(ix86_emit_prologue_components): Likewise.
(ix86_emit_epilogue_components): Likewise.
(ix86_set_handled_components): Likewise.
* config/i386/i386.cc (save_regs_using_push_pop):
Split from ix86_compute_frame_layout.
(ix86_compute_frame_layout):
Use save_regs_using_push_pop.
(pro_epilogue_adjust_stack):
Use gen_pro_epilogue_adjust_stack_add_nocc.
(ix86_expand_prologue): Add some assertions and adjust
the stack frame at the beginning of the prolog for shrink
wrapping separate.
(ix86_emit_save_regs_using_mov):
Skip registers that are wrapped separately.
(ix86_emit_restore_regs_using_mov): Likewise.
(ix86_expand_epilogue): Add some assertions and set
restore_regs_via_mov to true for shrink wrapping separate.
(ix86_get_separate_components): New function.
(ix86_components_for_bb): Likewise.
(ix86_disqualify_components): Likewise.
(ix86_emit_prologue_components): Likewise.
(ix86_emit_epilogue_components): Likewise.
(ix86_set_handled_components): Likewise.
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
(TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
(TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
(TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
(TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
(TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.
* config/i386/i386.h (struct machine_function):Add
reg_is_wrapped_separately array for register wrapping
information.
* config/i386/i386.md
(@pro_epilogue_adjust_stack_add_nocc): New.

gcc/testsuite/ChangeLog:

* gcc.target/x86_64/abi/callabi/leaf-2.c: Adjust the test.
* gcc.target/i386/interrupt-16.c: Likewise.
* gfortran.dg/guality/arg1.f90: Likewise.
* gcc.target/i386/avx10_2-comibf-1.c: Likewise.
* g++.target/i386/shrink_wrap_separa

[gcc r16-1640] Fix shrink wrap separate ICE for mingw [PR120741]

2025-06-23 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:4b739c020a90dfe2569a292c44b2293a94d4bff5

commit r16-1640-g4b739c020a90dfe2569a292c44b2293a94d4bff5
Author: Lili Cui 
Date:   Tue Jun 24 10:49:43 2025 +0800

Fix shrink wrap separate ICE for mingw [PR120741]

gcc/ChangeLog:

PR target/120741
* config/i386/i386.cc (ix86_expand_prologue):
Remove 1 assertion.

gcc/testsuite/ChangeLog:

PR target/120741
* gcc.target/i386/pr120741.c: New test.
* gcc.target/i386/shrink-wrap-separate-mingw.c: Likewise.

Diff:
---
 gcc/config/i386/i386.cc|  2 --
 gcc/testsuite/gcc.target/i386/pr120741.c   | 22 ++
 .../gcc.target/i386/shrink-wrap-separate-mingw.c   | 22 ++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fc3105919f45..84081ab12670 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -9443,8 +9443,6 @@ ix86_expand_prologue (void)
 }
   else
 {
-  gcc_assert (!crtl->shrink_wrapped_separate);
-
   rtx eax = gen_rtx_REG (Pmode, AX_REG);
   rtx r10 = NULL;
   const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
diff --git a/gcc/testsuite/gcc.target/i386/pr120741.c 
b/gcc/testsuite/gcc.target/i386/pr120741.c
new file mode 100644
index ..b59a58c48b89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120741.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mstack-arg-probe" } */
+
+short __mingw_swformat_format;
+__builtin_va_list __mingw_swformat_arg;
+int __mingw_swformat_fc;
+typedef struct {
+  void *fp;
+  int bch[1024];
+} _IFP;
+void __mingw_swformat(_IFP *s) {
+  if (s->fp)
+while (__mingw_swformat_format)
+  if (__mingw_swformat_fc == 'A')
+   *__builtin_va_arg(__mingw_swformat_arg, double *) = 0;
+}
+void
+__mingw_vswscanf (void)
+{
+  _IFP ifp;
+  __mingw_swformat(&ifp);
+}
diff --git a/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c 
b/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c
new file mode 100644
index ..58635e49647a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target *-*-mingw* *-*-cygwin* } } */
+/* { dg-options "-std=gnu99 -O2" } */
+
+short __mingw_swformat_format;
+__builtin_va_list __mingw_swformat_arg;
+int __mingw_swformat_fc;
+typedef struct {
+  void *fp;
+  int bch[1024];
+} _IFP;
+void __mingw_swformat(_IFP *s) {
+  if (s->fp)
+while (__mingw_swformat_format)
+  if (__mingw_swformat_fc == 'A')
+*__builtin_va_arg(__mingw_swformat_arg, double *) = 0;
+}
+void
+__mingw_vswscanf (void)
+{
+  _IFP ifp;
+  __mingw_swformat(&ifp);
+}


[gcc r16-1757] Relax the testcase check for Solaris [PR120818]

2025-06-28 Thread Lili Cui via Gcc-cvs
https://gcc.gnu.org/g:e7fb2459b00cde4fb14062076df29320efafdb98

commit r16-1757-ge7fb2459b00cde4fb14062076df29320efafdb98
Author: Lili Cui 
Date:   Sat Jun 28 06:19:19 2025 -0700

Relax the testcase check for Solaris [PR120818]

gcc/testsuite/ChangeLog:

PR target/120818
* g++.target/i386/shrink_wrap_separate.C: Relax the check.

Diff:
---
 gcc/testsuite/g++.target/i386/shrink_wrap_separate.C | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.target/i386/shrink_wrap_separate.C 
b/gcc/testsuite/g++.target/i386/shrink_wrap_separate.C
index 294dccde5d31..b924fd02ca94 100644
--- a/gcc/testsuite/g++.target/i386/shrink_wrap_separate.C
+++ b/gcc/testsuite/g++.target/i386/shrink_wrap_separate.C
@@ -21,5 +21,4 @@ bool k() {
   b *n;
  return h(l->g, n);
 }
-/* { dg-final { scan-rtl-dump "The components we wrap separately are \\\[sep 3 
4\\\]" "pro_and_epilogue" { target { ia32 } } } } */
-/* { dg-final { scan-rtl-dump "The components we wrap separately are \\\[sep 
40 41 42 43\\\]" "pro_and_epilogue" { target { ! ia32 } } } } */
+/* { dg-final { scan-rtl-dump "The components we wrap separately are" 
"pro_and_epilogue" } } */