Hello! Attached patch fixes the combine deficiency, where combine is not able to recognize combination of:
Trying 6 -> 8: Failed to match this instruction: (set (reg:QI 66) (mem/j:QI (plus:SI (subreg:SI (plus:DI (reg/v:DI 62 [ position ]) (const_int 1 [0x1])) 0) (symbol_ref:SI ("array") [flags 0x40] <var_decl 0x7ffff19ad260 array>)) [0 array S1 A8])) This should be a valid address, but the RTX is not accepted in ix86_decompose_address since we have two displacements here. Combine should simplify this RTX to: (set (reg:QI 68) (mem/j:QI (plus:SI (subreg:SI (reg/v:DI 62 [ position ]) 0) (const:SI (plus:SI (symbol_ref:SI ("array") [flags 0x40] <var_decl 0x7f8d1bc41390 array>) (const_int 1 [0x1])))) [0 array S1 A8])) as is the case with -m32 (but rejected for 32bit targets in ix86_address_subreg_operand for unrelated reason). The solution is, to transform operands of PLUS and MINUS RTX in the form of (subreg:M (op:N A C) 0) to (op:M (subreg:N (A 0)) C), as is done for standalone subreg operation in 32bit case. This transformation generates canonical (plus:M (plus:M (subreg:N (A 0) C1) C2)) RTXes that can be recognized and further optimized by simplify_plus_minus. The effect of the patch on following testcase (-O2 -mx32 -maddres-mode=short): --cut here-- extern char array[40]; char foo (long long position) { return array[position + 1]; } --cut here-- foo: - addq $1, %rdi - movzbl array(%edi), %eax + movzbl array+1(%edi), %eax ret The patch is effective also for -fpic: foo: movl array@GOTPCREL(%rip), %eax - addq $1, %rdi - movzbl (%eax,%edi), %eax + movzbl 1(%eax,%edi), %eax ret Generated code for vanilla 32bit and 64bit targets is unchanged. 2012-09-25 Uros Bizjak <ubiz...@gmail.com> PR rtl-optimization/54457 * combine.c (combine_simplify_rtx) <RTX_COMM_ARITH, RTX_BIN_ARIT>: For PLUS and MINUS, convert plus_minus_operand_p operands in the form of (subreg:M (op:N A C) 0) to (op:M (subreg:N (A 0)) C). * simplify-rtx.c (plus_minus_operand_p): Make global. * rtl.h (plus_minus_operand_p): Declare. testsuite/ChangeLog: 2012-09-25 Uros Bizjak <ubiz...@gmail.com> PR rtl-optimization/54457 * gcc.target/i386/pr54457.c: New test. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}, also on x32 target by H.J [1]. OK for mainline? [1] http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02448.html Uros.
Index: combine.c =================================================================== --- combine.c (revision 191681) +++ combine.c (working copy) @@ -5314,6 +5314,26 @@ combine_simplify_rtx (rtx x, enum machine_mode op0 break; case RTX_COMM_ARITH: case RTX_BIN_ARITH: + if ((code == PLUS || code == MINUS) + && GET_MODE_CLASS (mode) == MODE_INT + && HWI_COMPUTABLE_MODE_P (mode)) + { + /* Adjust operands in the form of (subreg:M (op:N A C) 0) + to (op:M (subreg:N (A 0)) C). This transformation + generates canonical (plus:M (plus:M (subreg:N (A 0) C1) C2)) + RTXes that can be recognized and further optimized by + simplify_plus_minus. */ + if (GET_CODE (XEXP (x, 0)) == SUBREG + && plus_minus_operand_p (SUBREG_REG (XEXP (x, 0)))) + SUBST (XEXP (x, 0), + force_to_mode (XEXP (x, 0), mode, + ~(unsigned HOST_WIDE_INT) 0, 0)); + if (GET_CODE (XEXP (x, 1)) == SUBREG + && plus_minus_operand_p (SUBREG_REG (XEXP (x, 1)))) + SUBST (XEXP (x, 1), + force_to_mode (XEXP (x, 1), mode, + ~(unsigned HOST_WIDE_INT) 0, 0)); + } temp = simplify_binary_operation (code, mode, XEXP (x, 0), XEXP (x, 1)); break; case RTX_BITFIELD_OPS: Index: rtl.h =================================================================== --- rtl.h (revision 191681) +++ rtl.h (working copy) @@ -1895,6 +1895,7 @@ extern bool val_signbit_known_set_p (enum machine_ unsigned HOST_WIDE_INT); extern bool val_signbit_known_clear_p (enum machine_mode, unsigned HOST_WIDE_INT); +extern bool plus_minus_operand_p (const_rtx); /* In reginfo.c */ extern enum machine_mode choose_hard_reg_mode (unsigned int, unsigned int, Index: simplify-rtx.c =================================================================== --- simplify-rtx.c (revision 191681) +++ simplify-rtx.c (working copy) @@ -48,7 +48,6 @@ along with GCC; see the file COPYING3. If not see ((((HOST_WIDE_INT) low) < 0) ? ((HOST_WIDE_INT) -1) : ((HOST_WIDE_INT) 0)) static rtx neg_const_int (enum machine_mode, const_rtx); -static bool plus_minus_operand_p (const_rtx); static bool simplify_plus_minus_op_data_cmp (rtx, rtx); static rtx simplify_plus_minus (enum rtx_code, enum machine_mode, rtx, rtx); static rtx simplify_immed_subreg (enum machine_mode, rtx, enum machine_mode, @@ -4186,7 +4185,7 @@ simplify_plus_minus (enum rtx_code code, enum mach } /* Check whether an operand is suitable for calling simplify_plus_minus. */ -static bool +bool plus_minus_operand_p (const_rtx x) { return GET_CODE (x) == PLUS