[AArch64] Merge stores of D register values of different modes

Jackson Woodruff Wed, 06 Sep 2017 03:53:28 -0700

Hi all,

This patch merges loads and stores from D-registers that are ofdifferent modes.


Code like this:

    typedef int __attribute__((vector_size(8))) vec;
    struct pair
    {
      vec v;
      double d;
    }

    void
    assign (struct pair *p, vec v)
    {
      p->v = v;
      p->d = 1.0;
    }

Now generates a stp instruction whereas previously it generated two`str` instructions. Likewise for loads.

I have taken the opportunity to merge some of the patterns into a singlepattern. Previously, we had different patterns for DI, DF, SI, SF modes.The patch uses the new iterators to reduce these to two patterns.



This patch also merges storing of double zero values with
long integer values:

    struct pair
    {
      long long l;
      double d;
    }

    void
    foo (struct pair *p)
    {
      p->l = 10;
      p->d = 0.0;
    }

Now generates a single store pair instruction rather than two `str`instructions.


Bootstrap and testsuite run OK. OK for trunk?

Jackson

gcc/

2017-07-21  Jackson Woodruff  <[email protected]>

        * config/aarch64/aarch64.md: New patterns to generate stp
        and ldp.
        * config/aarch64/aarch64-ldpstp.md: Modified peephole
        for different mode ldpstp and added peephole for merge zero
        stores. Likewise for loads.
        * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp):
        Added size check.
        (aarch64_gen_store_pair): Rename calls to match new patterns.
        (aarch64_gen_load_pair): Rename calls to match new patterns.
        * config/aarch64/aarch64-simd.md (store_pair<mode>): Updated
        pattern to match two modes.
        (store_pair_sw, store_pair_dw): New patterns to generate stp for
        single words and double words.
        (load_pair_sw, load_pair_dw): Likewise.
        (store_pair_sf, store_pair_df, store_pair_si, store_pair_di):
        Removed.
        (load_pair_sf, load_pair_df, load_pair_si, load_pair_di):
        Removed.
        * config/aarch64/iterators.md: New mode iterators for
        types in d registers and duplicate DX and SX modes.
        New iterator for DI, DF, SI, SF.
        * config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero):
        New.


gcc/testsuite/

2017-07-21  Jackson Woodruff  <[email protected]>

        * gcc.target/aarch64/ldp_stp_6.c: New.
        * gcc.target/aarch64/ldp_stp_7.c: New.
        * gcc.target/aarch64/ldp_stp_8.c: New.

diff --git a/gcc/config/aarch64/aarch64-ldpstp.md 
b/gcc/config/aarch64/aarch64-ldpstp.md
index 
e8dda42c2dd1e30c4607c67a2156ff7813bd89ea..14e860d258e548d4118d957675f8bdbb74615337
 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -99,10 +99,10 @@
 })
 
 (define_peephole2
-  [(set (match_operand:VD 0 "register_operand" "")
-       (match_operand:VD 1 "aarch64_mem_pair_operand" ""))
-   (set (match_operand:VD 2 "register_operand" "")
-       (match_operand:VD 3 "memory_operand" ""))]
+  [(set (match_operand:DREG 0 "register_operand" "")
+       (match_operand:DREG 1 "aarch64_mem_pair_operand" ""))
+   (set (match_operand:DREG2 2 "register_operand" "")
+       (match_operand:DREG2 3 "memory_operand" ""))]
   "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
   [(parallel [(set (match_dup 0) (match_dup 1))
              (set (match_dup 2) (match_dup 3))])]
@@ -119,11 +119,12 @@
 })
 
 (define_peephole2
-  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
-       (match_operand:VD 1 "register_operand" ""))
-   (set (match_operand:VD 2 "memory_operand" "")
-       (match_operand:VD 3 "register_operand" ""))]
-  "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "")
+       (match_operand:DREG 1 "register_operand" ""))
+   (set (match_operand:DREG2 2 "memory_operand" "")
+       (match_operand:DREG2 3 "register_operand" ""))]
+  "TARGET_SIMD
+   && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
   [(parallel [(set (match_dup 0) (match_dup 1))
              (set (match_dup 2) (match_dup 3))])]
 {
@@ -138,7 +139,6 @@
     }
 })
 
-
 ;; Handle sign/zero extended consecutive load/store.
 
 (define_peephole2
@@ -181,6 +181,30 @@
     }
 })
 
+;; Handle storing of a floating point zero.
+;; We can match modes that won't work for a stp instruction
+;; as aarch64_operands_ok_for_ldpstp checks that the modes are
+;; compatible.
+(define_peephole2
+  [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "")
+       (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" ""))
+   (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "")
+       (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))]
+  "aarch64_operands_ok_for_ldpstp (operands, false, DImode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+             (set (match_dup 2) (match_dup 3))])]
+{
+  rtx base, offset_1, offset_2;
+
+  extract_base_offset_in_addr (operands[0], &base, &offset_1);
+  extract_base_offset_in_addr (operands[2], &base, &offset_2);
+  if (INTVAL (offset_1) > INTVAL (offset_2))
+    {
+      std::swap (operands[0], operands[2]);
+      std::swap (operands[1], operands[3]);
+    }
+})
+
 ;; Handle consecutive load/store whose offset is out of the range
 ;; supported by ldp/ldpsw/stp.  We firstly adjust offset in a scratch
 ;; register, then merge them into ldp/ldpsw/stp by using the adjusted
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
f3e084f8778d70c82823b92fa80ff96021ad26db..34f321a117cb96211a69119939fc518504bbf1a4
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -172,11 +172,11 @@
   [(set_attr "type" "neon_store1_1reg<q>")]
 )
 
-(define_insn "load_pair<mode>"
-  [(set (match_operand:VD 0 "register_operand" "=w")
-       (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
-   (set (match_operand:VD 2 "register_operand" "=w")
-       (match_operand:VD 3 "memory_operand" "m"))]
+(define_insn "load_pair<DREG:mode><DREG2:mode>"
+  [(set (match_operand:DREG 0 "register_operand" "=w")
+       (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:DREG2 2 "register_operand" "=w")
+       (match_operand:DREG2 3 "memory_operand" "m"))]
   "TARGET_SIMD
    && rtx_equal_p (XEXP (operands[3], 0),
                   plus_constant (Pmode,
@@ -186,11 +186,11 @@
   [(set_attr "type" "neon_ldp")]
 )
 
-(define_insn "store_pair<mode>"
-  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
-       (match_operand:VD 1 "register_operand" "w"))
-   (set (match_operand:VD 2 "memory_operand" "=m")
-       (match_operand:VD 3 "register_operand" "w"))]
+(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+       (match_operand:DREG 1 "register_operand" "w"))
+   (set (match_operand:DREG2 2 "memory_operand" "=m")
+       (match_operand:DREG2 3 "register_operand" "w"))]
   "TARGET_SIMD
    && rtx_equal_p (XEXP (operands[2], 0),
                   plus_constant (Pmode,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
28c4e0e64766060851c0c7cd6b86995fae25353d..a3bd1b1180903703d33ca822d06afc74f1748c44
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3179,10 +3179,10 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, 
rtx reg1, rtx mem2,
   switch (mode)
     {
     case DImode:
-      return gen_store_pairdi (mem1, reg1, mem2, reg2);
+      return gen_store_pair_dw_DIDI (mem1, reg1, mem2, reg2);
 
     case DFmode:
-      return gen_store_pairdf (mem1, reg1, mem2, reg2);
+      return gen_store_pair_dw_DFDF (mem1, reg1, mem2, reg2);
 
     default:
       gcc_unreachable ();
@@ -3199,10 +3199,10 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx 
mem1, rtx reg2,
   switch (mode)
     {
     case DImode:
-      return gen_load_pairdi (reg1, mem1, reg2, mem2);
+      return gen_load_pair_dw_DIDI (reg1, mem1, reg2, mem2);
 
     case DFmode:
-      return gen_load_pairdf (reg1, mem1, reg2, mem2);
+      return gen_load_pair_dw_DFDF (reg1, mem1, reg2, mem2);
 
     default:
       gcc_unreachable ();
@@ -14712,6 +14712,11 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool 
load,
   if (!rtx_equal_p (base_1, base_2))
     return false;
 
+  /* Check that the operands are of the same size.  */
+  if (GET_MODE_SIZE (GET_MODE (mem_1))
+      != GET_MODE_SIZE (GET_MODE (mem_2)))
+    return false;
+
   offval_1 = INTVAL (offset_1);
   offval_2 = INTVAL (offset_2);
   msize = GET_MODE_SIZE (mode);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
c1bca07308d84f50a6fa5af116f0fa20589882db..46affe8c63a58bd60b993349555e81c4c5008113
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1220,141 +1220,76 @@
 
 ;; Operands 1 and 3 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsi"
-  [(set (match_operand:SI 0 "register_operand" "=r,*w")
-       (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:SI 2 "register_operand" "=r,*w")
-       (match_operand:SI 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[1], 0),
-                              GET_MODE_SIZE (SImode)))"
+(define_insn "load_pair_sw_<SX:MODE><SX2:MODE>"
+  [(set (match_operand:SX 0 "register_operand" "=r,w")
+       (match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:SX2 2 "register_operand" "=r,w")
+       (match_operand:SX2 3 "memory_operand" "m,m"))]
+   "rtx_equal_p (XEXP (operands[3], 0),
+                plus_constant (Pmode,
+                               XEXP (operands[1], 0),
+                               GET_MODE_SIZE (<MODE>mode)))"
   "@
-   ldp\\t%w0, %w2, %1
-   ldp\\t%s0, %s2, %1"
+  ldp\t%w0, %w2, %1
+  ldp\t%s0, %s2, %1"
   [(set_attr "type" "load2,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
-(define_insn "load_pairdi"
-  [(set (match_operand:DI 0 "register_operand" "=r,*w")
-       (match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:DI 2 "register_operand" "=r,*w")
-       (match_operand:DI 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[1], 0),
-                              GET_MODE_SIZE (DImode)))"
+;; Storing different modes that can still be merged
+(define_insn "load_pair_dw_<DX:MODE><DX2:MODE>"
+  [(set (match_operand:DX 0 "register_operand" "=r,w")
+       (match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+   (set (match_operand:DX2 2 "register_operand" "=r,w")
+       (match_operand:DX2 3 "memory_operand" "m,m"))]
+   "rtx_equal_p (XEXP (operands[3], 0),
+                plus_constant (Pmode,
+                               XEXP (operands[1], 0),
+                               GET_MODE_SIZE (<MODE>mode)))"
   "@
-   ldp\\t%x0, %x2, %1
-   ldp\\t%d0, %d2, %1"
+  ldp\t%x0, %x2, %1
+  ldp\t%d0, %d2, %1"
   [(set_attr "type" "load2,neon_load1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
 
+
 ;; Operands 0 and 2 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsi"
-  [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-       (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w"))
-   (set (match_operand:SI 2 "memory_operand" "=m,m")
-       (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[0], 0),
-                              GET_MODE_SIZE (SImode)))"
+(define_insn "store_pair_sw_<SX:MODE><SX2:MODE>"
+  [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+       (match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+   (set (match_operand:SX2 2 "memory_operand" "=m,m")
+       (match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+   "rtx_equal_p (XEXP (operands[2], 0),
+                plus_constant (Pmode,
+                               XEXP (operands[0], 0),
+                               GET_MODE_SIZE (<MODE>mode)))"
   "@
-   stp\\t%w1, %w3, %0
-   stp\\t%s1, %s3, %0"
+  stp\t%w1, %w3, %0
+  stp\t%s1, %s3, %0"
   [(set_attr "type" "store2,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
-(define_insn "store_pairdi"
-  [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-       (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w"))
-   (set (match_operand:DI 2 "memory_operand" "=m,m")
-       (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[0], 0),
-                              GET_MODE_SIZE (DImode)))"
+;; Storing different modes that can still be merged
+(define_insn "store_pair_dw_<DX:MODE><DX2:MODE>"
+  [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+       (match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+   (set (match_operand:DX2 2 "memory_operand" "=m,m")
+       (match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+   "rtx_equal_p (XEXP (operands[2], 0),
+                plus_constant (Pmode,
+                               XEXP (operands[0], 0),
+                               GET_MODE_SIZE (<MODE>mode)))"
   "@
-   stp\\t%x1, %x3, %0
-   stp\\t%d1, %d3, %0"
+  stp\t%x1, %x3, %0
+  stp\t%d1, %d3, %0"
   [(set_attr "type" "store2,neon_store1_2reg")
    (set_attr "fp" "*,yes")]
 )
 
-;; Operands 1 and 3 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsf"
-  [(set (match_operand:SF 0 "register_operand" "=w,*r")
-       (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:SF 2 "register_operand" "=w,*r")
-       (match_operand:SF 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[1], 0),
-                              GET_MODE_SIZE (SFmode)))"
-  "@
-   ldp\\t%s0, %s2, %1
-   ldp\\t%w0, %w2, %1"
-  [(set_attr "type" "neon_load1_2reg,load2")
-   (set_attr "fp" "yes,*")]
-)
-
-(define_insn "load_pairdf"
-  [(set (match_operand:DF 0 "register_operand" "=w,*r")
-       (match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
-   (set (match_operand:DF 2 "register_operand" "=w,*r")
-       (match_operand:DF 3 "memory_operand" "m,m"))]
-  "rtx_equal_p (XEXP (operands[3], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[1], 0),
-                              GET_MODE_SIZE (DFmode)))"
-  "@
-   ldp\\t%d0, %d2, %1
-   ldp\\t%x0, %x2, %1"
-  [(set_attr "type" "neon_load1_2reg,load2")
-   (set_attr "fp" "yes,*")]
-)
-
-;; Operands 0 and 2 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsf"
-  [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-       (match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,*rY"))
-   (set (match_operand:SF 2 "memory_operand" "=m,m")
-       (match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,*rY"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[0], 0),
-                              GET_MODE_SIZE (SFmode)))"
-  "@
-   stp\\t%s1, %s3, %0
-   stp\\t%w1, %w3, %0"
-  [(set_attr "type" "neon_store1_2reg,store2")
-   (set_attr "fp" "yes,*")]
-)
-
-(define_insn "store_pairdf"
-  [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
-       (match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,*rY"))
-   (set (match_operand:DF 2 "memory_operand" "=m,m")
-       (match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,*rY"))]
-  "rtx_equal_p (XEXP (operands[2], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[0], 0),
-                              GET_MODE_SIZE (DFmode)))"
-  "@
-   stp\\t%d1, %d3, %0
-   stp\\t%x1, %x3, %0"
-  [(set_attr "type" "neon_store1_2reg,store2")
-   (set_attr "fp" "yes,*")]
-)
-
 ;; Load pair with post-index writeback.  This is primarily used in function
 ;; epilogues.
 (define_insn "loadwb_pair<GPI:mode>_<P:mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
cceb57525c7aa44933419bd317b1f03a7b76f4c4..6147d93f56649cbc9fe577a433bca610e476ab2c
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -69,6 +69,12 @@
 ;; Double vector modes.
 (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
 
+;; All modes stored in registers d0-d31.
+(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
+
+;; Copy of the above.
+(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF])
+
 ;; vector, 64-bit container, all integer modes
 (define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
 
@@ -235,6 +241,18 @@
 ;; Double scalar modes
 (define_mode_iterator DX [DI DF])
 
+;; Duplicate of the above
+(define_mode_iterator DX2 [DI DF])
+
+;; Single scalar modes
+(define_mode_iterator SX [SI SF])
+
+;; Duplicate of the above
+(define_mode_iterator SX2 [SI SF])
+
+;; Single and double integer and float modes
+(define_mode_iterator DSX [DF DI SF SI])
+
 ;; Modes available for <f>mul lane operations.
 (define_mode_iterator VMUL [V4HI V8HI V2SI V4SI
                            (V4HF "TARGET_SIMD_F16INST")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 
11243c4ce00aa7d16a886bb24b01180801c68f4e..ee6e050dd839c329baa05bdfe878b786f1def969
 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -62,6 +62,10 @@
        (and (match_code "const_double")
             (match_test "aarch64_float_const_zero_rtx_p (op)"))))
 
+(define_predicate "aarch64_reg_zero_or_fp_zero"
+  (ior (match_operand 0 "aarch64_reg_or_fp_zero")
+       (match_operand 0 "aarch64_reg_or_zero")))
+
 (define_predicate "aarch64_reg_zero_or_m1_or_1"
   (and (match_code "reg,subreg,const_int")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c 
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c
new file mode 100644
index 
0000000000000000000000000000000000000000..2d982f3389b668f2042d48ba3db04e619fd999f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2" } */
+
+typedef float __attribute__ ((vector_size (8))) vec;
+
+struct pair
+{
+  vec e1;
+  double e2;
+};
+
+vec tmp;
+
+void
+stp (struct pair *p)
+{
+  p->e1 = tmp;
+  p->e2 = 1.0;
+
+  /* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, 
\\\[x\[0-9\]+\\\]" } } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c 
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c
new file mode 100644
index 
0000000000000000000000000000000000000000..06607de6b3e36a4d759d915a9f7880284391aa08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O2" } */
+
+struct pair
+{
+  double a;
+  long int b;
+};
+
+void
+stp (struct pair *p)
+{
+  p->a = 0.0;
+  p->b = 1;
+}
+
+/* { dg-final { scan-assembler "stp\txzr, x\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */
+
+void
+stp2 (struct pair *p)
+{
+  p->a = 0.0;
+  p->b = 0;
+}
+
+struct reverse_pair
+{
+  long int a;
+  double b;
+};
+
+void
+stp_reverse (struct reverse_pair *p)
+{
+  p->a = 1;
+  p->b = 0.0;
+}
+
+/* { dg-final { scan-assembler "stp\tx\[0-9\]+, xzr, \\\[x\[0-9\]+\\\]" } } */
+
+void
+stp_reverse2 (struct reverse_pair *p)
+{
+  p->a = 0;
+  p->b = 0.0;
+}
+
+/* { dg-final { scan-assembler-times "stp\txzr, xzr, \\\[x\[0-9\]+\\\]" 2 } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c 
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c
new file mode 100644
index 
0000000000000000000000000000000000000000..1a47e233814e564d549245683a4e59fdb422bdad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c
@@ -0,0 +1,30 @@
+/* { dg-options "-O2" } */
+
+typedef float __attribute__ ((vector_size (8))) fvec;
+typedef int __attribute__ ((vector_size (8))) ivec;
+
+struct pair
+{
+  double a;
+  fvec b;
+};
+
+void ldp (double *a, fvec *b, struct pair *p)
+{
+  *a = p->a;
+  *b = p->b;
+}
+
+struct vec_pair
+{
+  fvec a;
+  ivec b;
+};
+
+void ldp2 (fvec *a, ivec *b, struct vec_pair *p)
+{
+  *a = p->a;
+  *b = p->b;
+}
+
+/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 2 } } */

[AArch64] Merge stores of D register values of different modes

Reply via email to