https://gcc.gnu.org/g:27d9b6d312678c7b9b09104eb0f48dc46e0f8ca2
commit r15-6085-g27d9b6d312678c7b9b09104eb0f48dc46e0f8ca2 Author: Wilco Dijkstra <wdijk...@ip-10-252-53-150.eu-west-1.compute.internal> Date: Fri May 10 17:13:40 2024 +0000 AArch64: Use LDP/STP for large struct types Use LDP/STP for large struct types as they have useful immediate offsets and are typically faster. This removes differences between little and big endian and allows use of LDP/STP without UNSPEC. gcc: * config/aarch64/aarch64.cc (aarch64_classify_address): Treat SIMD structs identically in little and bigendian. * config/aarch64/aarch64-simd.md (aarch64_mov<mode>): Remove VSTRUCT instructions. (aarch64_be_mov<mode>): Allow little-endian, rename to aarch64_mov<mode>. (aarch64_be_movoi): Allow little-endian, rename to aarch64_movoi. (aarch64_be_movci): Allow little-endian, rename to aarch64_movci. (aarch64_be_movxi): Allow little-endian, rename to aarch64_movxi. Remove big-endian special case in define_split variants. gcc/testsuite: * gcc.target/aarch64/torture/simd-abi-8.c: Update to check for LDP/STP. Diff: --- gcc/config/aarch64/aarch64-simd.md | 91 +++++----------------- gcc/config/aarch64/aarch64.cc | 13 +--- .../gcc.target/aarch64/torture/simd-abi-8.c | 6 +- 3 files changed, 23 insertions(+), 87 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6228065c9f28..7959cca520a6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7873,32 +7873,6 @@ [(set_attr "type" "neon_store1_4reg<q>")] ) -(define_insn "*aarch64_mov<mode>" - [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand") - (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN - && (register_operand (operands[0], <MODE>mode) - || register_operand (operands[1], <MODE>mode))" - {@ [ cons: =0 , 1 ; attrs: type , length ] - [ w , w ; multiple , <insn_count> ] # - [ Utv , w ; neon_store<nregs>_<nregs>reg_q , 4 ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0 - [ w , Utv ; neon_load<nregs>_<nregs>reg_q , 4 ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1 - } -) - -(define_insn "*aarch64_mov<mode>" - [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand") - (match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN - && (register_operand (operands[0], <MODE>mode) - || register_operand (operands[1], <MODE>mode))" - {@ [ cons: =0 , 1 ; attrs: type , length ] - [ w , w ; multiple , <insn_count> ] # - [ Utv , w ; neon_store<nregs>_<nregs>reg_q , 4 ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0 - [ w , Utv ; neon_load<nregs>_<nregs>reg_q , 4 ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1 - } -) - (define_insn "*aarch64_movv8di" [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r") (match_operand:V8DI 1 "general_operand" " r,r,m"))] @@ -7928,11 +7902,10 @@ [(set_attr "type" "neon_store1_1reg<q>")] ) -(define_insn "*aarch64_be_mov<mode>" +(define_insn "*aarch64_mov<mode>" [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand") (match_operand:VSTRUCT_2D 1 "general_operand"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" {@ [ cons: =0 , 1 ; attrs: type , length ] @@ -7942,11 +7915,10 @@ } ) -(define_insn "*aarch64_be_mov<mode>" +(define_insn "*aarch64_mov<mode>" [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand") (match_operand:VSTRUCT_2Q 1 "general_operand"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" {@ [ cons: =0 , 1 ; attrs: type , arch , length ] @@ -7956,11 +7928,10 @@ } ) -(define_insn "*aarch64_be_movoi" +(define_insn "*aarch64_movoi" [(set (match_operand:OI 0 "nonimmediate_operand") (match_operand:OI 1 "general_operand"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], OImode) || register_operand (operands[1], OImode))" {@ [ cons: =0 , 1 ; attrs: type , arch , length ] @@ -7970,11 +7941,10 @@ } ) -(define_insn "*aarch64_be_mov<mode>" +(define_insn "*aarch64_mov<mode>" [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w") (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" "#" @@ -7983,11 +7953,10 @@ (set_attr "length" "12,8,8")] ) -(define_insn "*aarch64_be_movci" +(define_insn "*aarch64_movci" [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") (match_operand:CI 1 "general_operand" " w,w,o"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], CImode) || register_operand (operands[1], CImode))" "#" @@ -7996,11 +7965,10 @@ (set_attr "length" "12,8,8")] ) -(define_insn "*aarch64_be_mov<mode>" +(define_insn "*aarch64_mov<mode>" [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w") (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" "#" @@ -8009,11 +7977,10 @@ (set_attr "length" "16,8,8")] ) -(define_insn "*aarch64_be_movxi" +(define_insn "*aarch64_movxi" [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") (match_operand:XI 1 "general_operand" " w,w,o"))] "TARGET_FLOAT - && (!TARGET_SIMD || BYTES_BIG_ENDIAN) && (register_operand (operands[0], XImode) || register_operand (operands[1], XImode))" "#" @@ -8050,11 +8017,8 @@ { if (register_operand (operands[0], <MODE>mode) && register_operand (operands[1], <MODE>mode)) - { - aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3); - DONE; - } - else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3); + else { int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>; machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode; @@ -8072,10 +8036,8 @@ operands[1], <MODE>mode, 2 * elt_size))); - DONE; } - else - FAIL; + DONE; }) (define_split @@ -8086,11 +8048,8 @@ { if (register_operand (operands[0], CImode) && register_operand (operands[1], CImode)) - { - aarch64_simd_emit_reg_reg_move (operands, TImode, 3); - DONE; - } - else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + aarch64_simd_emit_reg_reg_move (operands, TImode, 3); + else { emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), simplify_gen_subreg (OImode, operands[1], CImode, 0)); @@ -8100,10 +8059,8 @@ gen_lowpart (V16QImode, simplify_gen_subreg (TImode, operands[1], CImode, 32))); - DONE; } - else - FAIL; + DONE; }) (define_split @@ -8114,11 +8071,8 @@ { if (register_operand (operands[0], <MODE>mode) && register_operand (operands[1], <MODE>mode)) - { - aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4); - DONE; - } - else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4); + else { int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>; machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode; @@ -8130,10 +8084,8 @@ <MODE>mode, 2 * elt_size), simplify_gen_subreg (pair_mode, operands[1], <MODE>mode, 2 * elt_size)); - DONE; } - else - FAIL; + DONE; }) (define_split @@ -8144,20 +8096,15 @@ { if (register_operand (operands[0], XImode) && register_operand (operands[1], XImode)) - { - aarch64_simd_emit_reg_reg_move (operands, TImode, 4); - DONE; - } - else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) + aarch64_simd_emit_reg_reg_move (operands, TImode, 4); + else { emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), simplify_gen_subreg (OImode, operands[1], XImode, 0)); emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), simplify_gen_subreg (OImode, operands[1], XImode, 32)); - DONE; } - else - FAIL; + DONE; }) (define_split diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index ac93d076bd11..29b034a25c0e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -10650,7 +10650,7 @@ aarch64_classify_address (struct aarch64_address_info *info, unsigned int vec_flags = aarch64_classify_vector_memory_mode (mode); vec_flags &= ~VEC_PARTIAL; - /* On BE, we use load/store pair for all large int mode load/stores. + /* We use load/store pair for all large int mode load/stores. TI/TF/TDmode may also use a load/store pair. */ bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP @@ -10658,8 +10658,7 @@ aarch64_classify_address (struct aarch64_address_info *info, || mode == TImode || mode == TFmode || mode == TDmode - || ((!TARGET_SIMD || BYTES_BIG_ENDIAN) - && advsimd_struct_p)); + || advsimd_struct_p); /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode corresponds to the actual size of the memory being loaded/stored and the mode of the corresponding addressing mode is half of that. */ @@ -10692,14 +10691,6 @@ aarch64_classify_address (struct aarch64_address_info *info, && (code != REG && code != PLUS)) return false; - /* On LE, for AdvSIMD, don't support anything other than POST_INC or - REG addressing. */ - if (advsimd_struct_p - && TARGET_SIMD - && !BYTES_BIG_ENDIAN - && (code != POST_INC && code != REG)) - return false; - gcc_checking_assert (GET_MODE (x) == VOIDmode || SCALAR_INT_MODE_P (GET_MODE (x))); diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c index 2b278caaa4e5..e8871c7f2213 100644 --- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c +++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c @@ -17,7 +17,5 @@ g (int64x2x4_t *ptr) *ptr = save; } -/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */ -/* { dg-final { scan-assembler-not {\tld[pr]\tq} } } */ -/* { dg-final { scan-assembler-not {\tst[pr]\tq} } } */ +/* { dg-final { scan-assembler {\tld[pr]\tq} } } */ +/* { dg-final { scan-assembler {\tst[pr]\tq} } } */