> Patch 2 is a bug fix that fixes up the splitters so that they take > into account the right register for the right mode . For instance a > register not fit for a TImode value shouldn't be put in one even if > the larger mode allows a different register . This is possible for > OImode values or indeed HFA style values being passed around as > parameters and is potentially an issue for folks building hard-float > systems with neon and using some of the large structures. ,
The large struct mode splitters don't take into account whether a TImode value can be generated from a value that is in an appropriate neon register for that value. This is possible in cases where you have an EImode, OImode, CImode or TImode value in the appropriate registers as these could be passed in their corresponding neon D registers. This was exposed by the tests for v{ld/st/tbl/tbx}2/3/4{lane/}* and friends in the new set of tests that follow at the end of this patch series. This is a problem for folks using the new hard float ABI and passing such values in registers - so it might not show up that much in practice but it's certainly worth backporting after sitting in trunk for a few days. It certainly is not a regression since this bug has always been there but it is a fundamental correctness issue in the backend with respect to such splits, so I'd like some more consensus on whether this can be safely backported. regards, Ramana 2012-07-27 Ramana Radhakrishnan <ramana.radhakrish...@linaro.org> PR target/ * config/arm/arm-protos.h (arm_split_eimoves): Declare. (arm_split_tocx_imoves): Declare. * config/arm/iterators.md (TOCXI): New. * config/arm/neon.md (EI TI OI CI XI mode splitters): Unify and use iterator. Simplify EImode splitter. Move logic to ... * config/arm/arm.c (arm_split_eimoves): here .. Handle case for EImode values in registers not suitable for splits into TImode values. (arm_split_tocx_imoves): Likewise. --- gcc/config/arm/arm-protos.h | 3 + gcc/config/arm/arm.c | 91 +++++++++++++++++++++++++++++++++++++++++++ gcc/config/arm/iterators.md | 3 + gcc/config/arm/neon.md | 84 +++++----------------------------------- 4 files changed, 107 insertions(+), 74 deletions(-) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index c590ef4..dc93c5d 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -248,6 +248,9 @@ extern int vfp3_const_double_for_fract_bits (rtx); extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); +extern void arm_split_tocx_imoves (rtx *, enum machine_mode); +extern void arm_split_eimoves (rtx *); + #endif /* RTX_CODE */ extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1f3f9b3..b281485 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -26410,4 +26410,95 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) } +/* EImode values are usually in 3 DImode registers. This could be suitably + split into TImode moves and DImode moves. */ +void +arm_split_eimoves (rtx *operands) +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + int count = 0; + int increment = 0; + rtx dest[3], src[3]; + int i, j; + + if (NEON_REGNO_OK_FOR_QUAD (rdest) && NEON_REGNO_OK_FOR_QUAD (rsrc)) + { + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + count = 2; + increment = 4; + } + else + { + dest[0] = gen_rtx_REG (DImode, rdest); + src[0] = gen_rtx_REG (DImode, rsrc); + dest[1] = gen_rtx_REG (DImode, rdest + 2); + src[1] = gen_rtx_REG (DImode, rsrc + 2); + count = 3; + increment = 2; + } + + dest[count - 1] = gen_rtx_REG (DImode, rdest + 4); + src[count - 1] = gen_rtx_REG (DImode, rsrc + 4); + + neon_disambiguate_copy (operands, dest, src, count); + + for (i = 0, j = 0 ; j < count ; i = i + 2, j++) + emit_move_insn (operands[i], operands[i + 1]); + + return; +} + +/* Split TI, CI, OI and XImode moves into appropriate smaller + forms. */ +void +arm_split_tocx_imoves (rtx *operands, enum machine_mode mode) +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + enum machine_mode split_mode; + int count = 0; + int factor = 0; + int j; + /* We never should need more than 8 DImode registers in the worst case. */ + rtx dest[8], src[8]; + int i; + + if (NEON_REGNO_OK_FOR_QUAD (rdest) && NEON_REGNO_OK_FOR_QUAD (rsrc)) + { + split_mode = TImode; + if (dump_file) + fprintf (dump_file, "split_mode is TImode\n"); + } + else + { + split_mode = DImode; + if (dump_file) + fprintf (dump_file, "split_mode is DImode\n"); + } + + + count = GET_MODE_SIZE (mode) / GET_MODE_SIZE (split_mode); + factor = GET_MODE_SIZE (split_mode) / UNITS_PER_WORD; + + if (dump_file) + fprintf (dump_file, "count %d factor %d\n", count, factor); + + for (i = 0 ; i < count; i++) + { + dest[i] = gen_rtx_REG (split_mode, rdest + i * factor ); + src[i] = gen_rtx_REG (split_mode, rsrc + i * factor); + } + + neon_disambiguate_copy (operands, dest, src, count); + for (j = 0, i = 0 ; j < count ; j++, i = i + 2) + { + emit_move_insn (operands[i], operands[i + 1]); + } + + return; + +} + #include "gt-arm.h" diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index def8d9f..3474d16 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -89,6 +89,9 @@ ;; Opaque structure types wider than TImode. (define_mode_iterator VSTRUCT [EI OI CI XI]) +;; Opaque structure types other than EImode. +(define_mode_iterator TOCXI [TI OI CI XI]) + ;; Opaque structure types used in table lookups (except vtbl1/vtbx1). (define_mode_iterator VTAB [TI EI OI]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 1ffbb7d..7434625 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -293,85 +293,21 @@ [(set (match_operand:EI 0 "s_register_operand" "") (match_operand:EI 1 "s_register_operand" ""))] "TARGET_NEON && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3))] + [(const_int 0)] { - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[2], src[2]; - - dest[0] = gen_rtx_REG (TImode, rdest); - src[0] = gen_rtx_REG (TImode, rsrc); - dest[1] = gen_rtx_REG (DImode, rdest + 4); - src[1] = gen_rtx_REG (DImode, rsrc + 4); - - neon_disambiguate_copy (operands, dest, src, 2); + arm_split_eimoves (operands); + DONE; }) -(define_split - [(set (match_operand:OI 0 "s_register_operand" "") - (match_operand:OI 1 "s_register_operand" ""))] +;; Splitter for TI, OI, CI and XI modes. +(define_split ;; TI, OI, CI and XImode move split. + [(set (match_operand:TOCXI 0 "s_register_operand" "") + (match_operand:TOCXI 1 "s_register_operand" ""))] "TARGET_NEON && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3))] + [(const_int 0)] { - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[2], src[2]; - - dest[0] = gen_rtx_REG (TImode, rdest); - src[0] = gen_rtx_REG (TImode, rsrc); - dest[1] = gen_rtx_REG (TImode, rdest + 4); - src[1] = gen_rtx_REG (TImode, rsrc + 4); - - neon_disambiguate_copy (operands, dest, src, 2); -}) - -(define_split - [(set (match_operand:CI 0 "s_register_operand" "") - (match_operand:CI 1 "s_register_operand" ""))] - "TARGET_NEON && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3)) - (set (match_dup 4) (match_dup 5))] -{ - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[3], src[3]; - - dest[0] = gen_rtx_REG (TImode, rdest); - src[0] = gen_rtx_REG (TImode, rsrc); - dest[1] = gen_rtx_REG (TImode, rdest + 4); - src[1] = gen_rtx_REG (TImode, rsrc + 4); - dest[2] = gen_rtx_REG (TImode, rdest + 8); - src[2] = gen_rtx_REG (TImode, rsrc + 8); - - neon_disambiguate_copy (operands, dest, src, 3); -}) - -(define_split - [(set (match_operand:XI 0 "s_register_operand" "") - (match_operand:XI 1 "s_register_operand" ""))] - "TARGET_NEON && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3)) - (set (match_dup 4) (match_dup 5)) - (set (match_dup 6) (match_dup 7))] -{ - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[4], src[4]; - - dest[0] = gen_rtx_REG (TImode, rdest); - src[0] = gen_rtx_REG (TImode, rsrc); - dest[1] = gen_rtx_REG (TImode, rdest + 4); - src[1] = gen_rtx_REG (TImode, rsrc + 4); - dest[2] = gen_rtx_REG (TImode, rdest + 8); - src[2] = gen_rtx_REG (TImode, rsrc + 8); - dest[3] = gen_rtx_REG (TImode, rdest + 12); - src[3] = gen_rtx_REG (TImode, rsrc + 12); - - neon_disambiguate_copy (operands, dest, src, 4); + arm_split_tocx_imoves (operands, <MODE>mode); + DONE; }) (define_expand "movmisalign<mode>" -- 1.7.4.1