Hi all, In this PR we get an ICE when the hoist pass ends up creating an (insn 88 0 0 (set (reg:OI 136) (const_int 0 [0])) -1 (nil))
instruction. AArch64 doesn't support such an OImode set. The only OImode set operations that aarch64 supports are load/store-multiple operations on vector registers. want_to_gcse_p should have rejected this move long before process_insert_insn tried to insert it in the stream. But it didn't because can_assign_to_reg_without_clobbers_p is only given the (const_int 0) expression and asked whether there can be a valid SET operation on that. It should also consider the mode that such an operation is requested in, rather than extracting the mode from the operand (VOIDmode for CONST_INTs). Luckily, want_to_gcse_p already has a mode argument that it uses in its costs calculation, so we can just pass it down. This patch extends can_assign_to_reg_without_clobbers_p to take a mode argument and use it when testing the validity of the SET instructions that it creates, so such an OImode move is properly rejected. Bootstrapped and tested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, x86_64-unknown-linux-gnu. There are no codegen differences on SPEC2006 for aarch64 resulting from this patch. This bug appears in all versions that have aarch64, so it's not a regression, but I think it's a fairly low risk patch. Is this ok for trunk now or when stage 1 reopens? Thanks, Kyrill 2016-02-24 Kyrylo Tkachov <kyrylo.tkac...@arm.com> PR rtl-optimization/69886 * gcse.c (can_assign_to_reg_without_clobbers_p): Accept mode argument. Use it when checking validity of set instructions. (want_to_gcse_p): Pass mode to can_assign_to_reg_without_clobbers_p. (compute_ld_motion_mems): Update can_assign_to_reg_without_clobbers_p callsite. * rtl.h (can_assign_to_reg_without_clobbers_p): Update prototype. * store-motion.c (find_moveable_store): Update can_assign_to_reg_without_clobbers_p callsite. 2016-02-24 Kyrylo Tkachov <kyrylo.tkac...@arm.com> PR rtl-optimization/69886 * gcc.dg/torture/pr69886.c: New test.
diff --git a/gcc/gcse.c b/gcc/gcse.c index 500de7a95ce856d7cd710d3be1efed865d40703c..51277a1cb613a4005ee240c51949083daed8c54c 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -810,7 +810,7 @@ want_to_gcse_p (rtx x, machine_mode mode, int *max_distance_ptr) *max_distance_ptr = max_distance; } - return can_assign_to_reg_without_clobbers_p (x); + return can_assign_to_reg_without_clobbers_p (x, mode); } } @@ -818,9 +818,9 @@ want_to_gcse_p (rtx x, machine_mode mode, int *max_distance_ptr) static GTY(()) rtx_insn *test_insn; -/* Return true if we can assign X to a pseudo register such that the - resulting insn does not result in clobbering a hard register as a - side-effect. +/* Return true if we can assign X to a pseudo register of mode MODE + such that the resulting insn does not result in clobbering a hard + register as a side-effect. Additionally, if the target requires it, check that the resulting insn can be copied. If it cannot, this means that X is special and probably @@ -831,14 +831,14 @@ static GTY(()) rtx_insn *test_insn; maybe live hard regs. */ bool -can_assign_to_reg_without_clobbers_p (rtx x) +can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode) { int num_clobbers = 0; int icode; bool can_assign = false; /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */ - if (general_operand (x, GET_MODE (x))) + if (general_operand (x, mode)) return 1; else if (GET_MODE (x) == VOIDmode) return 0; @@ -857,7 +857,7 @@ can_assign_to_reg_without_clobbers_p (rtx x) /* Now make an insn like the one we would make when GCSE'ing and see if valid. */ - PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x)); + PUT_MODE (SET_DEST (PATTERN (test_insn)), mode); SET_SRC (PATTERN (test_insn)) = x; icode = recog (PATTERN (test_insn), test_insn, &num_clobbers); @@ -3830,12 +3830,13 @@ compute_ld_motion_mems (void) if (MEM_P (dest) && simple_mem (dest)) { ptr = ldst_entry (dest); - + machine_mode src_mode = GET_MODE (src); if (! MEM_P (src) && GET_CODE (src) != ASM_OPERANDS /* Check for REG manually since want_to_gcse_p returns 0 for all REGs. */ - && can_assign_to_reg_without_clobbers_p (src)) + && can_assign_to_reg_without_clobbers_p (src, + src_mode)) ptr->stores = alloc_INSN_LIST (insn, ptr->stores); else ptr->invalid = 1; diff --git a/gcc/rtl.h b/gcc/rtl.h index 703dffe1d3e479fa787eb3491f68370f3b68048c..a5f20d9ce3bfee6e803ac5aeec79e1955b3e1687 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3565,7 +3565,7 @@ extern void init_lower_subreg (void); /* In gcse.c */ extern bool can_copy_p (machine_mode); -extern bool can_assign_to_reg_without_clobbers_p (rtx); +extern bool can_assign_to_reg_without_clobbers_p (rtx, machine_mode); extern rtx fis_get_condition (rtx_insn *); /* In ira.c */ diff --git a/gcc/store-motion.c b/gcc/store-motion.c index c3b4d463df0de1dc22dbecfb1ef7f67f752dd518..fffdffc3c346dc80693148f0250fe4dd77fe06ea 100644 --- a/gcc/store-motion.c +++ b/gcc/store-motion.c @@ -557,7 +557,8 @@ find_moveable_store (rtx_insn *insn, int *regs_set_before, int *regs_set_after) assumes that we can do this. But sometimes the target machine has oddities like MEM read-modify-write instruction. See for example PR24257. */ - if (!can_assign_to_reg_without_clobbers_p (SET_SRC (set))) + if (!can_assign_to_reg_without_clobbers_p (SET_SRC (set), + GET_MODE (SET_SRC (set)))) return; ptr = st_expr_entry (dest); diff --git a/gcc/testsuite/gcc.dg/torture/pr69886.c b/gcc/testsuite/gcc.dg/torture/pr69886.c new file mode 100644 index 0000000000000000000000000000000000000000..d896d4fec9be43641f36dd694954da03ae29db1f --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr69886.c @@ -0,0 +1,15 @@ +/* PR rtl-optimization/69886. */ +/* { dg-do compile } */ +/* { dg-options "--param=gcse-unrestricted-cost=0" } */ +/* { dg-additional-options "-mavx" { target { i?86-*-* x86_64-*-* } } } */ + +typedef unsigned v32su __attribute__ ((vector_size (32))); + +unsigned +foo (v32su v32su_0, v32su v32su_1, v32su v32su_2, v32su v32su_3, v32su v32su_4) +{ + v32su_3 += v32su_2 *= v32su_2[3]; + if (v32su_4[3]) + v32su_2 &= (v32su){ v32su_1[3], 0xbb72, 64 }; + return v32su_0[2] + v32su_2[4] + v32su_3[1]; +}