Use LDP/STP for large struct types as they have useful immediate offsets and
are typically faster.
This removes differences between little and big endian and allows use of
LDP/STP without UNSPEC.
Passes regress and bootstrap, OK for commit?
gcc:
* config/aarch64/aarch64.cc (aarch64_classify_address): Treat SIMD
structs identically
in little and bigendian.
* config/aarch64/aarch64.md (aarch64_mov<mode>): Remove VSTRUCT
instructions.
(aarch64_be_mov<mode>): Allow little-endian, rename to
aarch64_mov<mode>.
(aarch64_be_movoi): Allow little-endian, rename to aarch64_movoi.
(aarch64_be_movci): Allow little-endian, rename to aarch64_movci.
(aarch64_be_movxi): Allow little-endian, rename to aarch64_movxi.
Remove big-endian special case in define_split variants.
gcc/testsuite:
* gcc.target/aarch64/torture/simd-abi-8.c: Update to check for LDP/STP.
---
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
16b7445d9f72f77a98ab262e21fd24e6cc97eba0..bb8b6963fd5117be82afe6ccd7154ae5302c3691
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7917,32 +7917,6 @@
[(set_attr "type" "neon_store1_4reg<q>")]
)
-(define_insn "*aarch64_mov<mode>"
- [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
- (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN
- && (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[1], <MODE>mode))"
- {@ [ cons: =0 , 1 ; attrs: type , length ]
- [ w , w ; multiple , <insn_count> ] #
- [ Utv , w ; neon_store<nregs>_<nregs>reg_q , 4 ]
st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
- [ w , Utv ; neon_load<nregs>_<nregs>reg_q , 4 ]
ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
- }
-)
-
-(define_insn "*aarch64_mov<mode>"
- [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
- (match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN
- && (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[1], <MODE>mode))"
- {@ [ cons: =0 , 1 ; attrs: type , length ]
- [ w , w ; multiple , <insn_count> ] #
- [ Utv , w ; neon_store<nregs>_<nregs>reg_q , 4 ]
st1\t{%S1.16b - %<Vendreg>1.16b}, %0
- [ w , Utv ; neon_load<nregs>_<nregs>reg_q , 4 ]
ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
- }
-)
-
(define_insn "*aarch64_movv8di"
[(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
(match_operand:V8DI 1 "general_operand" " r,r,m"))]
@@ -7972,11 +7946,10 @@
[(set_attr "type" "neon_store1_1reg<q>")]
)
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
(match_operand:VSTRUCT_2D 1 "general_operand"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{@ [ cons: =0 , 1 ; attrs: type , length ]
@@ -7986,11 +7959,10 @@
}
)
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
(match_operand:VSTRUCT_2Q 1 "general_operand"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8000,11 +7972,10 @@
}
)
-(define_insn "*aarch64_be_movoi"
+(define_insn "*aarch64_movoi"
[(set (match_operand:OI 0 "nonimmediate_operand")
(match_operand:OI 1 "general_operand"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], OImode)
|| register_operand (operands[1], OImode))"
{@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8014,11 +7985,10 @@
}
)
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
@@ -8027,11 +7997,10 @@
(set_attr "length" "12,8,8")]
)
-(define_insn "*aarch64_be_movci"
+(define_insn "*aarch64_movci"
[(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:CI 1 "general_operand" " w,w,o"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], CImode)
|| register_operand (operands[1], CImode))"
"#"
@@ -8040,11 +8009,10 @@
(set_attr "length" "12,8,8")]
)
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
@@ -8053,11 +8021,10 @@
(set_attr "length" "16,8,8")]
)
-(define_insn "*aarch64_be_movxi"
+(define_insn "*aarch64_movxi"
[(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:XI 1 "general_operand" " w,w,o"))]
"TARGET_FLOAT
- && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
"#"
@@ -8094,11 +8061,8 @@
{
if (register_operand (operands[0], <MODE>mode)
&& register_operand (operands[1], <MODE>mode))
- {
- aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
- DONE;
- }
- else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+ aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
+ else
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8116,10 +8080,8 @@
operands[1],
<MODE>mode,
2 * elt_size)));
- DONE;
}
- else
- FAIL;
+ DONE;
})
(define_split
@@ -8130,11 +8092,8 @@
{
if (register_operand (operands[0], CImode)
&& register_operand (operands[1], CImode))
- {
- aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
- DONE;
- }
- else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+ aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
+ else
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
simplify_gen_subreg (OImode, operands[1], CImode, 0));
@@ -8144,10 +8103,8 @@
gen_lowpart (V16QImode,
simplify_gen_subreg (TImode, operands[1],
CImode, 32)));
- DONE;
}
- else
- FAIL;
+ DONE;
})
(define_split
@@ -8158,11 +8115,8 @@
{
if (register_operand (operands[0], <MODE>mode)
&& register_operand (operands[1], <MODE>mode))
- {
- aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
- DONE;
- }
- else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+ aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
+ else
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8174,10 +8128,8 @@
<MODE>mode, 2 * elt_size),
simplify_gen_subreg (pair_mode, operands[1],
<MODE>mode, 2 * elt_size));
- DONE;
}
- else
- FAIL;
+ DONE;
})
(define_split
@@ -8188,20 +8140,15 @@
{
if (register_operand (operands[0], XImode)
&& register_operand (operands[1], XImode))
- {
- aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
- DONE;
- }
- else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+ aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
+ else
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
simplify_gen_subreg (OImode, operands[1], XImode, 0));
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
simplify_gen_subreg (OImode, operands[1], XImode, 32));
- DONE;
}
- else
- FAIL;
+ DONE;
})
(define_split
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
8751d8741b8888a7c4ff24b2cd1fb3848c3f8e06..284e0a8cfa55b5d5e2dfac459fd3eb59acf6cb75
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10439,7 +10439,7 @@ aarch64_classify_address (struct aarch64_address_info
*info,
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
vec_flags &= ~VEC_PARTIAL;
- /* On BE, we use load/store pair for all large int mode load/stores.
+ /* We use load/store pair for all large int mode load/stores.
TI/TF/TDmode may also use a load/store pair. */
bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
@@ -10447,8 +10447,7 @@ aarch64_classify_address (struct aarch64_address_info
*info,
|| mode == TImode
|| mode == TFmode
|| mode == TDmode
- || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
- && advsimd_struct_p));
+ || advsimd_struct_p);
/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
corresponds to the actual size of the memory being loaded/stored and the
mode of the corresponding addressing mode is half of that. */
@@ -10482,14 +10481,6 @@ aarch64_classify_address (struct aarch64_address_info
*info,
&& (code != REG && code != PLUS))
return false;
- /* On LE, for AdvSIMD, don't support anything other than POST_INC or
- REG addressing. */
- if (advsimd_struct_p
- && TARGET_SIMD
- && !BYTES_BIG_ENDIAN
- && (code != POST_INC && code != REG))
- return false;
-
gcc_checking_assert (GET_MODE (x) == VOIDmode
|| SCALAR_INT_MODE_P (GET_MODE (x)));
diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
index
2b278caaa4e5815869cf35e88c121d79a7b1d647..e8871c7f22134de8ced14a5a8d9669c33efda4f8
100644
--- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
@@ -17,7 +17,5 @@ g (int64x2x4_t *ptr)
*ptr = save;
}
-/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */
-/* { dg-final { scan-assembler-not {\tld[pr]\tq} } } */
-/* { dg-final { scan-assembler-not {\tst[pr]\tq} } } */
+/* { dg-final { scan-assembler {\tld[pr]\tq} } } */
+/* { dg-final { scan-assembler {\tst[pr]\tq} } } */