While working on an ARM backend patch, I tripped over a case in which a subreg of a vector zero-extension was wrongly being optimised to zero. This comes from the following code in simplify_subreg:
/* Optimize SUBREG truncations of zero and sign extended values. */ if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND) && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)) { unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); /* If we're requesting the lowpart of a zero or sign extension, there are three possibilities. If the outermode is the same as the origmode, we can omit both the extension and the subreg. If the outermode is not larger than the origmode, we can apply the truncation without the extension. Finally, if the outermode is larger than the origmode, but both are integer modes, we can just extend to the appropriate mode. */ if (bitpos == 0) { enum machine_mode origmode = GET_MODE (XEXP (op, 0)); if (outermode == origmode) return XEXP (op, 0); if (GET_MODE_PRECISION (outermode) <= GET_MODE_PRECISION (origmode)) return simplify_gen_subreg (outermode, XEXP (op, 0), origmode, subreg_lowpart_offset (outermode, origmode)); if (SCALAR_INT_MODE_P (outermode)) return simplify_gen_unary (GET_CODE (op), outermode, XEXP (op, 0), origmode); } /* A SUBREG resulting from a zero extension may fold to zero if it extracts higher bits that the ZERO_EXTEND's source bits. */ if (GET_CODE (op) == ZERO_EXTEND && bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))) return CONST0_RTX (outermode); } I think all four of the optimisations above are only valid for integer extensions, i.e. for integer innermodes. (Except as already checked, they should be correct regardless of the outermode.) It looks like the same problem applies to several other optimisations in the same function. In all cases, it's the check for inner modes that's missing; the outer one is already checked where necessary. However, for: /* Recognize a word extraction from a multi-word subreg. */ if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) && SCALAR_INT_MODE_P (outermode) && GET_MODE_PRECISION (outermode) >= BITS_PER_WORD && GET_MODE_PRECISION (innermode) >= (2 * GET_MODE_PRECISION (outermode)) && CONST_INT_P (XEXP (op, 1)) && (INTVAL (XEXP (op, 1)) & (GET_MODE_PRECISION (outermode) - 1)) == 0 && INTVAL (XEXP (op, 1)) >= 0 && INTVAL (XEXP (op, 1)) < GET_MODE_PRECISION (innermode) && byte == subreg_lowpart_offset (outermode, innermode)) { int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT; return simplify_gen_subreg (outermode, XEXP (op, 0), innermode, (WORDS_BIG_ENDIAN ? byte - shifted_bytes : byte + shifted_bytes)); } I don't think the outer mode check is necessary; nothing seems to rely on the outer mode being interpreted as an integer. So in this case, it looks like the right thing is to check innermode instead of, rather than as well as, outermode. Tested on arm-linux-gnueabi and x86_64-linux-gnu. Although I suppose I could self-approve this, I'd appreciate it if someone would double-check. Richard gcc/ * simplify-rtx.c (simplify_subreg): Check that the inner mode is a scalar integer before applying integer-only optimisations to inner arithmetic. Index: gcc/simplify-rtx.c =================================================================== --- gcc/simplify-rtx.c 2011-09-13 13:33:29.423670877 +0100 +++ gcc/simplify-rtx.c 2011-09-13 13:33:51.748629417 +0100 @@ -5611,6 +5611,7 @@ simplify_subreg (enum machine_mode outer /* Optimize SUBREG truncations of zero and sign extended values. */ if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND) + && SCALAR_INT_MODE_P (innermode) && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)) { unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte); @@ -5649,6 +5650,7 @@ simplify_subreg (enum machine_mode outer if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) && SCALAR_INT_MODE_P (outermode) + && SCALAR_INT_MODE_P (innermode) /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE to avoid the possibility that an outer LSHIFTRT shifts by more than the sign extension's sign_bit_copies and introduces zeros @@ -5668,6 +5670,7 @@ simplify_subreg (enum machine_mode outer if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) && SCALAR_INT_MODE_P (outermode) + && SCALAR_INT_MODE_P (innermode) && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode) && CONST_INT_P (XEXP (op, 1)) && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND @@ -5682,6 +5685,7 @@ simplify_subreg (enum machine_mode outer the outer subreg is effectively a truncation to the original mode. */ if (GET_CODE (op) == ASHIFT && SCALAR_INT_MODE_P (outermode) + && SCALAR_INT_MODE_P (innermode) && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode) && CONST_INT_P (XEXP (op, 1)) && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND @@ -5695,7 +5699,7 @@ simplify_subreg (enum machine_mode outer /* Recognize a word extraction from a multi-word subreg. */ if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) - && SCALAR_INT_MODE_P (outermode) + && SCALAR_INT_MODE_P (innermode) && GET_MODE_PRECISION (outermode) >= BITS_PER_WORD && GET_MODE_PRECISION (innermode) >= (2 * GET_MODE_PRECISION (outermode)) && CONST_INT_P (XEXP (op, 1)) @@ -5717,6 +5721,7 @@ simplify_subreg (enum machine_mode outer if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) + && SCALAR_INT_MODE_P (innermode) && MEM_P (XEXP (op, 0)) && CONST_INT_P (XEXP (op, 1)) && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))