Sorry, fixed a couple of typos that prevented the patch from actually working. Here's the updated version. I'll be building on ADDR_QUERY_STR for identifying and preventing pre/post incrementing addresses for stores for falkor.
Siddhesh 2018-xx-xx Jim Wilson <jim.wil...@linaro.org> Kugan Vivenakandarajah <kugan.vivekanandara...@linaro.org> Siddhesh Poyarekar <siddh...@sourceware.org> gcc/ * gcc/config/aarch64/aarch64-protos.h (aarch64_addr_query_type): New member ADDR_QUERY_STR. * gcc/config/aarch64/aarch64-tuning-flags.def (SLOW_REGOFFSET_QUADWORD_STORE): New. * gcc/config/aarch64/aarch64.c (qdf24xx_tunings): Add SLOW_REGOFFSET_QUADWORD_STORE to tuning flags. (aarch64_classify_address): Avoid register indexing for quad mode stores when SLOW_REGOFFSET_QUADWORD_STORE is set. * gcc/config/aarch64/constraints.md (Uts): New constraint. * gcc/config/aarch64/aarch64.md (movti_aarch64, movtf_aarch64): Use it. * gcc/config/aarch64/aarch64-simd.md (aarch64_simd_mov<mode>): Likewise. gcc/testsuite/ * gcc/testsuite/gcc.target/aarch64/pr82533.c: New test case.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 159bc6aee7e..15924fc3f58 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -120,6 +120,9 @@ enum aarch64_symbol_type ADDR_QUERY_LDP_STP Query what is valid for a load/store pair. + ADDR_QUERY_STR + Query what is valid for a store. + ADDR_QUERY_ANY Query what is valid for at least one memory constraint, which may allow things that "m" doesn't. For example, the SVE LDR and STR @@ -128,6 +131,7 @@ enum aarch64_symbol_type enum aarch64_addr_query_type { ADDR_QUERY_M, ADDR_QUERY_LDP_STP, + ADDR_QUERY_STR, ADDR_QUERY_ANY }; diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 3d1f6a01cb7..48d92702723 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -131,9 +131,9 @@ (define_insn "*aarch64_simd_mov<VQ:mode>" [(set (match_operand:VQ 0 "nonimmediate_operand" - "=w, Umq, m, w, ?r, ?w, ?r, w") + "=w, Umq, Uts, w, ?r, ?w, ?r, w") (match_operand:VQ 1 "general_operand" - "m, Dz, w, w, w, r, r, Dn"))] + "m, Dz, w, w, w, r, r, Dn"))] "TARGET_SIMD && (register_operand (operands[0], <MODE>mode) || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index ea9ead234cb..04baf5b6de6 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -41,4 +41,8 @@ AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW) are not considered cheap. */ AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND) +/* Don't use a register offset in a memory address for a quad-word store. */ +AARCH64_EXTRA_TUNING_OPTION ("slow_regoffset_quadword_store", + SLOW_REGOFFSET_QUADWORD_STORE) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0599a79bfeb..664d4a18354 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -894,7 +894,7 @@ static const struct tune_params qdf24xx_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_SLOW_REGOFFSET_QUADWORD_STORE), /* tune_flags. */ &qdf24xx_prefetch_tune }; @@ -5531,6 +5531,16 @@ aarch64_classify_address (struct aarch64_address_info *info, || vec_flags == VEC_ADVSIMD || vec_flags == VEC_SVE_DATA)); + /* Avoid register indexing for 128-bit stores when the + AARCH64_EXTRA_TUNE_SLOW_REGOFFSET_QUADWORD_STORE option is set. */ + if (!optimize_size + && type == ADDR_QUERY_STR + && (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_SLOW_REGOFFSET_QUADWORD_STORE) + && (mode == TImode || mode == TFmode + || aarch64_vector_data_mode_p (mode))) + allow_reg_index_p = false; + /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and [Rn, #offset, MUL VL]. */ if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0 diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index edb6a758333..348b867ff7f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1079,7 +1079,7 @@ (define_insn "*movti_aarch64" [(set (match_operand:TI 0 - "nonimmediate_operand" "=r, w,r,w,r,m,m,w,m") + "nonimmediate_operand" "=r,w,r,w,r,m,m,w,Uts") (match_operand:TI 1 "aarch64_movti_operand" " rn,r,w,w,m,r,Z,m,w"))] "(register_operand (operands[0], TImode) @@ -1226,9 +1226,9 @@ (define_insn "*movtf_aarch64" [(set (match_operand:TF 0 - "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") + "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,Uts,?r,m ,m") (match_operand:TF 1 - "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))] + "general_operand" " w,?r, ?r,w ,Y,Y ,m,w ,m ,?r,Y"))] "TARGET_FLOAT && (register_operand (operands[0], TFmode) || aarch64_reg_or_fp_zero (operands[1], TFmode))" "@ diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 6cc4cadfd10..091df3dc06a 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -243,6 +243,13 @@ (match_test "aarch64_legitimate_address_p (V2DImode, XEXP (op, 0), 1)"))) +(define_memory_constraint "Uts" + "@internal + An address valid for storing a 128-it AdvSIMD register" + (and (match_code "mem") + (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0), + 1, ADDR_QUERY_STR)"))) + (define_memory_constraint "Uty" "@internal An address valid for SVE LD1Rs." diff --git a/gcc/testsuite/gcc.target/aarch64/pr82533.c b/gcc/testsuite/gcc.target/aarch64/pr82533.c new file mode 100644 index 00000000000..fa28ffac03a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr82533.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=falkor -O2 -ftree-vectorize" } */ + +void +copy (int N, double *c, double *a) +{ + for (int i = 0; i < N; ++i) + c[i] = a[i]; +} + +/* { dg-final { scan-assembler-not "str\tq\[0-9\]+, \\\[x\[0-9\]+, x\[0-9\]+\\\]" } } */