Il 30/09/2012 11:02, Richard Sandiford ha scritto:
> Uros Bizjak <[email protected]> writes:
>> On Thu, Sep 27, 2012 at 8:20 PM, Jakub Jelinek <[email protected]> wrote:
>>> On Thu, Sep 27, 2012 at 08:04:58PM +0200, Uros Bizjak wrote:
>>>> After some off-line discussion with Richard, attached is v2 of the patch.
>>>>
>>>> 2012-09-27 Uros Bizjak <[email protected]>
>>>>
>>>> PR rtl-optimization/54457
>>>> * simplify-rtx.c (simplify_subreg):
>>>> Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>>>> to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>>>
>>> Is that a good idea even for WORD_REGISTER_OPERATIONS targets?
>>
>> I have bootstrapped and regtested [1] the patch on
>> alphaev68-pc-linux-gnu, a WORD_REGISTER_OPERATIONS target, and there
>> were no additional failures.
>
> Thanks. Given Jakub's question/concern, I'd really prefer a third
> opinion rather than approving it myself, but... OK if no-one objects
> within 24hrs.
I used to have a patch doing roughly the same thing (for more operations
but not MULT). I never submitted because I didn't have the time to
audit all targets after changing the canonicalization.
Perhaps you can take the best of both worlds.
Paolo
change canonicalization
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in (branch pr39726)
+++ gcc/Makefile.in (working copy)
@@ -2844,7 +2844,7 @@ jump.o : jump.c $(CONFIG_H) $(SYSTEM_H)
simplify-rtx.o : simplify-rtx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(REAL_H) insn-config.h \
$(RECOG_H) $(EXPR_H) $(TOPLEV_H) output.h $(FUNCTION_H) $(GGC_H) $(TM_P_H) \
- $(TREE_H) $(TARGET_H)
+ $(TREE_H) $(TARGET_H) $(OPTABS_H)
cgraph.o : cgraph.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
langhooks.h $(TOPLEV_H) $(FLAGS_H) $(GGC_H) $(TARGET_H) $(CGRAPH_H) \
gt-cgraph.h output.h intl.h $(BASIC_BLOCK_H) debug.h $(HASHTAB_H) \
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c (branch pr39726)
+++ gcc/simplify-rtx.c (working copy)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.
#include "output.h"
#include "ggc.h"
#include "target.h"
+#include "optabs.h"
/* Simplification and canonicalization of RTL. */
@@ -5191,6 +5192,8 @@ rtx
simplify_subreg (enum machine_mode outermode, rtx op,
enum machine_mode innermode, unsigned int byte)
{
+ int is_lowpart;
+
/* Little bit of sanity checking. */
gcc_assert (innermode != VOIDmode);
gcc_assert (outermode != VOIDmode);
@@ -5300,11 +5303,13 @@ simplify_subreg (enum machine_mode outer
return NULL_RTX;
}
+ is_lowpart = subreg_lowpart_offset (outermode, innermode) == byte;
+
/* Merge implicit and explicit truncations. */
if (GET_CODE (op) == TRUNCATE
&& GET_MODE_SIZE (outermode) < GET_MODE_SIZE (innermode)
- && subreg_lowpart_offset (outermode, innermode) == byte)
+ && is_lowpart)
return simplify_gen_unary (TRUNCATE, outermode, XEXP (op, 0),
GET_MODE (XEXP (op, 0)));
@@ -5343,7 +5348,7 @@ simplify_subreg (enum machine_mode outer
The information is used only by alias analysis that can not
grog partial register anyway. */
- if (subreg_lowpart_offset (outermode, innermode) == byte)
+ if (is_lowpart)
ORIGINAL_REGNO (x) = ORIGINAL_REGNO (op);
return x;
}
@@ -5393,6 +5398,51 @@ simplify_subreg (enum machine_mode outer
return NULL_RTX;
}
+ /* Try to move a subreg inside an arithmetic operation. */
+ if (is_lowpart && ARITHMETIC_P (op)
+ && GET_MODE_CLASS (outermode) == MODE_INT
+ && GET_MODE_CLASS (innermode) == MODE_INT)
+ {
+ enum insn_code ic;
+ enum machine_mode cnt_mode;
+ switch (GET_CODE (op))
+ {
+ case ABS:
+ case NEG:
+ return simplify_gen_unary (GET_CODE (op), outermode,
+ rtl_hooks.gen_lowpart_no_emit
+ (outermode, XEXP (op, 0)),
+ outermode);
+
+ case ASHIFT:
+ /* i386 always uses QImode for the shift count. Get the
+ appropriate mode from the optab. */
+ ic = ashl_optab->handlers[outermode].insn_code;
+ if (ic != CODE_FOR_nothing)
+ cnt_mode = insn_data[ic].operand[2].mode;
+ else
+ cnt_mode = outermode;
+ return simplify_gen_binary (GET_CODE (op), outermode,
+ rtl_hooks.gen_lowpart_no_emit
+ (outermode, XEXP (op, 0)),
+ rtl_hooks.gen_lowpart_no_emit
+ (cnt_mode, XEXP (op, 1)));
+
+ case PLUS:
+ case MINUS:
+ case AND:
+ case IOR:
+ case XOR:
+ return simplify_gen_binary (GET_CODE (op), outermode,
+ rtl_hooks.gen_lowpart_no_emit
+ (outermode, XEXP (op, 0)),
+ rtl_hooks.gen_lowpart_no_emit
+ (outermode, XEXP (op, 1)));
+ default:
+ break;
+ }
+ }
+
/* Optimize SUBREG truncations of zero and sign extended values. */
if ((GET_CODE (op) == ZERO_EXTEND
|| GET_CODE (op) == SIGN_EXTEND)
Index: gcc/combine.c
===================================================================
--- gcc/combine.c (branch pr39726)
+++ gcc/combine.c (working copy)
@@ -11155,47 +11155,6 @@ simplify_comparison (enum rtx_code code,
continue;
}
- /* If this is (and:M1 (subreg:M2 X 0) (const_int C1)) where C1
- fits in both M1 and M2 and the SUBREG is either paradoxical
- or represents the low part, permute the SUBREG and the AND
- and try again. */
- if (GET_CODE (XEXP (op0, 0)) == SUBREG)
- {
- unsigned HOST_WIDE_INT c1;
- tmode = GET_MODE (SUBREG_REG (XEXP (op0, 0)));
- /* Require an integral mode, to avoid creating something like
- (AND:SF ...). */
- if (SCALAR_INT_MODE_P (tmode)
- /* It is unsafe to commute the AND into the SUBREG if the
- SUBREG is paradoxical and WORD_REGISTER_OPERATIONS is
- not defined. As originally written the upper bits
- have a defined value due to the AND operation.
- However, if we commute the AND inside the SUBREG then
- they no longer have defined values and the meaning of
- the code has been changed. */
- && (0
-#ifdef WORD_REGISTER_OPERATIONS
- || (mode_width > GET_MODE_BITSIZE (tmode)
- && mode_width <= BITS_PER_WORD)
-#endif
- || (mode_width <= GET_MODE_BITSIZE (tmode)
- && subreg_lowpart_p (XEXP (op0, 0))))
- && CONST_INT_P (XEXP (op0, 1))
- && mode_width <= HOST_BITS_PER_WIDE_INT
- && GET_MODE_BITSIZE (tmode) <= HOST_BITS_PER_WIDE_INT
- && ((c1 = INTVAL (XEXP (op0, 1))) & ~mask) == 0
- && (c1 & ~GET_MODE_MASK (tmode)) == 0
- && c1 != mask
- && c1 != GET_MODE_MASK (tmode))
- {
- op0 = simplify_gen_binary (AND, tmode,
- SUBREG_REG (XEXP (op0, 0)),
- gen_int_mode (c1, tmode));
- op0 = gen_lowpart (mode, op0);
- continue;
- }
- }
-
/* Convert (ne (and (not X) 1) 0) to (eq (and X 1) 0). */
if (const_op == 0 && equality_comparison_p
&& XEXP (op0, 1) == const1_rtx