Hi all,
This patch merges loads and stores from D-registers that are of
different modes.
Code like this:
typedef int __attribute__((vector_size(8))) vec;
struct pair
{
vec v;
double d;
}
void
assign (struct pair *p, vec v)
{
p->v = v;
p->d = 1.0;
}
Now generates a stp instruction whereas previously it generated two
`str` instructions. Likewise for loads.
I have taken the opportunity to merge some of the patterns into a single
pattern. Previously, we had different patterns for DI, DF, SI, SF modes.
The patch uses the new iterators to reduce these to two patterns.
This patch also merges storing of double zero values with
long integer values:
struct pair
{
long long l;
double d;
}
void
foo (struct pair *p)
{
p->l = 10;
p->d = 0.0;
}
Now generates a single store pair instruction rather than two `str`
instructions.
Bootstrap and testsuite run OK. OK for trunk?
Jackson
gcc/
2017-07-21 Jackson Woodruff <jackson.woodr...@arm.com>
* config/aarch64/aarch64.md: New patterns to generate stp
and ldp.
* config/aarch64/aarch64-ldpstp.md: Modified peephole
for different mode ldpstp and added peephole for merge zero
stores. Likewise for loads.
* config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp):
Added size check.
(aarch64_gen_store_pair): Rename calls to match new patterns.
(aarch64_gen_load_pair): Rename calls to match new patterns.
* config/aarch64/aarch64-simd.md (store_pair<mode>): Updated
pattern to match two modes.
(store_pair_sw, store_pair_dw): New patterns to generate stp for
single words and double words.
(load_pair_sw, load_pair_dw): Likewise.
(store_pair_sf, store_pair_df, store_pair_si, store_pair_di):
Removed.
(load_pair_sf, load_pair_df, load_pair_si, load_pair_di):
Removed.
* config/aarch64/iterators.md: New mode iterators for
types in d registers and duplicate DX and SX modes.
New iterator for DI, DF, SI, SF.
* config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero):
New.
gcc/testsuite/
2017-07-21 Jackson Woodruff <jackson.woodr...@arm.com>
* gcc.target/aarch64/ldp_stp_6.c: New.
* gcc.target/aarch64/ldp_stp_7.c: New.
* gcc.target/aarch64/ldp_stp_8.c: New.
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md
b/gcc/config/aarch64/aarch64-ldpstp.md
index
e8dda42c2dd1e30c4607c67a2156ff7813bd89ea..14e860d258e548d4118d957675f8bdbb74615337
100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -99,10 +99,10 @@
})
(define_peephole2
- [(set (match_operand:VD 0 "register_operand" "")
- (match_operand:VD 1 "aarch64_mem_pair_operand" ""))
- (set (match_operand:VD 2 "register_operand" "")
- (match_operand:VD 3 "memory_operand" ""))]
+ [(set (match_operand:DREG 0 "register_operand" "")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" ""))
+ (set (match_operand:DREG2 2 "register_operand" "")
+ (match_operand:DREG2 3 "memory_operand" ""))]
"aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
@@ -119,11 +119,12 @@
})
(define_peephole2
- [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
- (match_operand:VD 1 "register_operand" ""))
- (set (match_operand:VD 2 "memory_operand" "")
- (match_operand:VD 3 "register_operand" ""))]
- "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "")
+ (match_operand:DREG 1 "register_operand" ""))
+ (set (match_operand:DREG2 2 "memory_operand" "")
+ (match_operand:DREG2 3 "register_operand" ""))]
+ "TARGET_SIMD
+ && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
@@ -138,7 +139,6 @@
}
})
-
;; Handle sign/zero extended consecutive load/store.
(define_peephole2
@@ -181,6 +181,30 @@
}
})
+;; Handle storing of a floating point zero.
+;; We can match modes that won't work for a stp instruction
+;; as aarch64_operands_ok_for_ldpstp checks that the modes are
+;; compatible.
+(define_peephole2
+ [(set (match_operand:DSX 0 "aarch64_mem_pair_operand" "")
+ (match_operand:DSX 1 "aarch64_reg_zero_or_fp_zero" ""))
+ (set (match_operand:<FCVT_TARGET> 2 "memory_operand" "")
+ (match_operand:<FCVT_TARGET> 3 "aarch64_reg_zero_or_fp_zero" ""))]
+ "aarch64_operands_ok_for_ldpstp (operands, false, DImode)"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+ rtx base, offset_1, offset_2;
+
+ extract_base_offset_in_addr (operands[0], &base, &offset_1);
+ extract_base_offset_in_addr (operands[2], &base, &offset_2);
+ if (INTVAL (offset_1) > INTVAL (offset_2))
+ {
+ std::swap (operands[0], operands[2]);
+ std::swap (operands[1], operands[3]);
+ }
+})
+
;; Handle consecutive load/store whose offset is out of the range
;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch
;; register, then merge them into ldp/ldpsw/stp by using the adjusted
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
f3e084f8778d70c82823b92fa80ff96021ad26db..34f321a117cb96211a69119939fc518504bbf1a4
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -172,11 +172,11 @@
[(set_attr "type" "neon_store1_1reg<q>")]
)
-(define_insn "load_pair<mode>"
- [(set (match_operand:VD 0 "register_operand" "=w")
- (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
- (set (match_operand:VD 2 "register_operand" "=w")
- (match_operand:VD 3 "memory_operand" "m"))]
+(define_insn "load_pair<DREG:mode><DREG2:mode>"
+ [(set (match_operand:DREG 0 "register_operand" "=w")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+ (set (match_operand:DREG2 2 "register_operand" "=w")
+ (match_operand:DREG2 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
@@ -186,11 +186,11 @@
[(set_attr "type" "neon_ldp")]
)
-(define_insn "store_pair<mode>"
- [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
- (match_operand:VD 1 "register_operand" "w"))
- (set (match_operand:VD 2 "memory_operand" "=m")
- (match_operand:VD 3 "register_operand" "w"))]
+(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+ (match_operand:DREG 1 "register_operand" "w"))
+ (set (match_operand:DREG2 2 "memory_operand" "=m")
+ (match_operand:DREG2 3 "register_operand" "w"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index
28c4e0e64766060851c0c7cd6b86995fae25353d..a3bd1b1180903703d33ca822d06afc74f1748c44
100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3179,10 +3179,10 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1,
rtx reg1, rtx mem2,
switch (mode)
{
case DImode:
- return gen_store_pairdi (mem1, reg1, mem2, reg2);
+ return gen_store_pair_dw_DIDI (mem1, reg1, mem2, reg2);
case DFmode:
- return gen_store_pairdf (mem1, reg1, mem2, reg2);
+ return gen_store_pair_dw_DFDF (mem1, reg1, mem2, reg2);
default:
gcc_unreachable ();
@@ -3199,10 +3199,10 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx
mem1, rtx reg2,
switch (mode)
{
case DImode:
- return gen_load_pairdi (reg1, mem1, reg2, mem2);
+ return gen_load_pair_dw_DIDI (reg1, mem1, reg2, mem2);
case DFmode:
- return gen_load_pairdf (reg1, mem1, reg2, mem2);
+ return gen_load_pair_dw_DFDF (reg1, mem1, reg2, mem2);
default:
gcc_unreachable ();
@@ -14712,6 +14712,11 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool
load,
if (!rtx_equal_p (base_1, base_2))
return false;
+ /* Check that the operands are of the same size. */
+ if (GET_MODE_SIZE (GET_MODE (mem_1))
+ != GET_MODE_SIZE (GET_MODE (mem_2)))
+ return false;
+
offval_1 = INTVAL (offset_1);
offval_2 = INTVAL (offset_2);
msize = GET_MODE_SIZE (mode);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index
c1bca07308d84f50a6fa5af116f0fa20589882db..46affe8c63a58bd60b993349555e81c4c5008113
100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1220,141 +1220,76 @@
;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsi"
- [(set (match_operand:SI 0 "register_operand" "=r,*w")
- (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:SI 2 "register_operand" "=r,*w")
- (match_operand:SI 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (SImode)))"
+(define_insn "load_pair_sw_<SX:MODE><SX2:MODE>"
+ [(set (match_operand:SX 0 "register_operand" "=r,w")
+ (match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+ (set (match_operand:SX2 2 "register_operand" "=r,w")
+ (match_operand:SX2 3 "memory_operand" "m,m"))]
+ "rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
"@
- ldp\\t%w0, %w2, %1
- ldp\\t%s0, %s2, %1"
+ ldp\t%w0, %w2, %1
+ ldp\t%s0, %s2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
(set_attr "fp" "*,yes")]
)
-(define_insn "load_pairdi"
- [(set (match_operand:DI 0 "register_operand" "=r,*w")
- (match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:DI 2 "register_operand" "=r,*w")
- (match_operand:DI 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (DImode)))"
+;; Storing different modes that can still be merged
+(define_insn "load_pair_dw_<DX:MODE><DX2:MODE>"
+ [(set (match_operand:DX 0 "register_operand" "=r,w")
+ (match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+ (set (match_operand:DX2 2 "register_operand" "=r,w")
+ (match_operand:DX2 3 "memory_operand" "m,m"))]
+ "rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
"@
- ldp\\t%x0, %x2, %1
- ldp\\t%d0, %d2, %1"
+ ldp\t%x0, %x2, %1
+ ldp\t%d0, %d2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
(set_attr "fp" "*,yes")]
)
+
;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsi"
- [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w"))
- (set (match_operand:SI 2 "memory_operand" "=m,m")
- (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (SImode)))"
+(define_insn "store_pair_sw_<SX:MODE><SX2:MODE>"
+ [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+ (match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+ (set (match_operand:SX2 2 "memory_operand" "=m,m")
+ (match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+ "rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
"@
- stp\\t%w1, %w3, %0
- stp\\t%s1, %s3, %0"
+ stp\t%w1, %w3, %0
+ stp\t%s1, %s3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
(set_attr "fp" "*,yes")]
)
-(define_insn "store_pairdi"
- [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w"))
- (set (match_operand:DI 2 "memory_operand" "=m,m")
- (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (DImode)))"
+;; Storing different modes that can still be merged
+(define_insn "store_pair_dw_<DX:MODE><DX2:MODE>"
+ [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+ (match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
+ (set (match_operand:DX2 2 "memory_operand" "=m,m")
+ (match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
+ "rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
"@
- stp\\t%x1, %x3, %0
- stp\\t%d1, %d3, %0"
+ stp\t%x1, %x3, %0
+ stp\t%d1, %d3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
(set_attr "fp" "*,yes")]
)
-;; Operands 1 and 3 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "load_pairsf"
- [(set (match_operand:SF 0 "register_operand" "=w,*r")
- (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:SF 2 "register_operand" "=w,*r")
- (match_operand:SF 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (SFmode)))"
- "@
- ldp\\t%s0, %s2, %1
- ldp\\t%w0, %w2, %1"
- [(set_attr "type" "neon_load1_2reg,load2")
- (set_attr "fp" "yes,*")]
-)
-
-(define_insn "load_pairdf"
- [(set (match_operand:DF 0 "register_operand" "=w,*r")
- (match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
- (set (match_operand:DF 2 "register_operand" "=w,*r")
- (match_operand:DF 3 "memory_operand" "m,m"))]
- "rtx_equal_p (XEXP (operands[3], 0),
- plus_constant (Pmode,
- XEXP (operands[1], 0),
- GET_MODE_SIZE (DFmode)))"
- "@
- ldp\\t%d0, %d2, %1
- ldp\\t%x0, %x2, %1"
- [(set_attr "type" "neon_load1_2reg,load2")
- (set_attr "fp" "yes,*")]
-)
-
-;; Operands 0 and 2 are tied together by the final condition; so we allow
-;; fairly lax checking on the second memory operation.
-(define_insn "store_pairsf"
- [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,*rY"))
- (set (match_operand:SF 2 "memory_operand" "=m,m")
- (match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,*rY"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (SFmode)))"
- "@
- stp\\t%s1, %s3, %0
- stp\\t%w1, %w3, %0"
- [(set_attr "type" "neon_store1_2reg,store2")
- (set_attr "fp" "yes,*")]
-)
-
-(define_insn "store_pairdf"
- [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
- (match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,*rY"))
- (set (match_operand:DF 2 "memory_operand" "=m,m")
- (match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,*rY"))]
- "rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (DFmode)))"
- "@
- stp\\t%d1, %d3, %0
- stp\\t%x1, %x3, %0"
- [(set_attr "type" "neon_store1_2reg,store2")
- (set_attr "fp" "yes,*")]
-)
-
;; Load pair with post-index writeback. This is primarily used in function
;; epilogues.
(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index
cceb57525c7aa44933419bd317b1f03a7b76f4c4..6147d93f56649cbc9fe577a433bca610e476ab2c
100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -69,6 +69,12 @@
;; Double vector modes.
(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
+;; All modes stored in registers d0-d31.
+(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
+
+;; Copy of the above.
+(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF])
+
;; vector, 64-bit container, all integer modes
(define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
@@ -235,6 +241,18 @@
;; Double scalar modes
(define_mode_iterator DX [DI DF])
+;; Duplicate of the above
+(define_mode_iterator DX2 [DI DF])
+
+;; Single scalar modes
+(define_mode_iterator SX [SI SF])
+
+;; Duplicate of the above
+(define_mode_iterator SX2 [SI SF])
+
+;; Single and double integer and float modes
+(define_mode_iterator DSX [DF DI SF SI])
+
;; Modes available for <f>mul lane operations.
(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI
(V4HF "TARGET_SIMD_F16INST")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index
11243c4ce00aa7d16a886bb24b01180801c68f4e..ee6e050dd839c329baa05bdfe878b786f1def969
100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -62,6 +62,10 @@
(and (match_code "const_double")
(match_test "aarch64_float_const_zero_rtx_p (op)"))))
+(define_predicate "aarch64_reg_zero_or_fp_zero"
+ (ior (match_operand 0 "aarch64_reg_or_fp_zero")
+ (match_operand 0 "aarch64_reg_or_zero")))
+
(define_predicate "aarch64_reg_zero_or_m1_or_1"
(and (match_code "reg,subreg,const_int")
(ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c
new file mode 100644
index
0000000000000000000000000000000000000000..2d982f3389b668f2042d48ba3db04e619fd999f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_6.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2" } */
+
+typedef float __attribute__ ((vector_size (8))) vec;
+
+struct pair
+{
+ vec e1;
+ double e2;
+};
+
+vec tmp;
+
+void
+stp (struct pair *p)
+{
+ p->e1 = tmp;
+ p->e2 = 1.0;
+
+ /* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+,
\\\[x\[0-9\]+\\\]" } } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c
new file mode 100644
index
0000000000000000000000000000000000000000..06607de6b3e36a4d759d915a9f7880284391aa08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_7.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O2" } */
+
+struct pair
+{
+ double a;
+ long int b;
+};
+
+void
+stp (struct pair *p)
+{
+ p->a = 0.0;
+ p->b = 1;
+}
+
+/* { dg-final { scan-assembler "stp\txzr, x\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */
+
+void
+stp2 (struct pair *p)
+{
+ p->a = 0.0;
+ p->b = 0;
+}
+
+struct reverse_pair
+{
+ long int a;
+ double b;
+};
+
+void
+stp_reverse (struct reverse_pair *p)
+{
+ p->a = 1;
+ p->b = 0.0;
+}
+
+/* { dg-final { scan-assembler "stp\tx\[0-9\]+, xzr, \\\[x\[0-9\]+\\\]" } } */
+
+void
+stp_reverse2 (struct reverse_pair *p)
+{
+ p->a = 0;
+ p->b = 0.0;
+}
+
+/* { dg-final { scan-assembler-times "stp\txzr, xzr, \\\[x\[0-9\]+\\\]" 2 } }
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c
new file mode 100644
index
0000000000000000000000000000000000000000..1a47e233814e564d549245683a4e59fdb422bdad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c
@@ -0,0 +1,30 @@
+/* { dg-options "-O2" } */
+
+typedef float __attribute__ ((vector_size (8))) fvec;
+typedef int __attribute__ ((vector_size (8))) ivec;
+
+struct pair
+{
+ double a;
+ fvec b;
+};
+
+void ldp (double *a, fvec *b, struct pair *p)
+{
+ *a = p->a;
+ *b = p->b;
+}
+
+struct vec_pair
+{
+ fvec a;
+ ivec b;
+};
+
+void ldp2 (fvec *a, ivec *b, struct vec_pair *p)
+{
+ *a = p->a;
+ *b = p->b;
+}
+
+/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+,
\\\[x\[0-9\]+\\\]" 2 } } */