While working on an ARM backend patch, I tripped over a case in which
a subreg of a vector zero-extension was wrongly being optimised to zero.
This comes from the following code in simplify_subreg:

  /* Optimize SUBREG truncations of zero and sign extended values.  */
  if ((GET_CODE (op) == ZERO_EXTEND
       || GET_CODE (op) == SIGN_EXTEND)
      && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode))
    {
      unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);

      /* If we're requesting the lowpart of a zero or sign extension,
         there are three possibilities.  If the outermode is the same
         as the origmode, we can omit both the extension and the subreg.
         If the outermode is not larger than the origmode, we can apply
         the truncation without the extension.  Finally, if the outermode
         is larger than the origmode, but both are integer modes, we
         can just extend to the appropriate mode.  */
      if (bitpos == 0)
        {
          enum machine_mode origmode = GET_MODE (XEXP (op, 0));
          if (outermode == origmode)
            return XEXP (op, 0);
          if (GET_MODE_PRECISION (outermode) <= GET_MODE_PRECISION (origmode))
            return simplify_gen_subreg (outermode, XEXP (op, 0), origmode,
                                        subreg_lowpart_offset (outermode,
                                                               origmode));
          if (SCALAR_INT_MODE_P (outermode))
            return simplify_gen_unary (GET_CODE (op), outermode,
                                       XEXP (op, 0), origmode);
        }

      /* A SUBREG resulting from a zero extension may fold to zero if
         it extracts higher bits that the ZERO_EXTEND's source bits.  */
      if (GET_CODE (op) == ZERO_EXTEND
          && bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))))
        return CONST0_RTX (outermode);
    }

I think all four of the optimisations above are only valid for integer
extensions, i.e. for integer innermodes.  (Except as already checked,
they should be correct regardless of the outermode.)

It looks like the same problem applies to several other optimisations
in the same function.  In all cases, it's the check for inner modes
that's missing; the outer one is already checked where necessary.
However, for:

  /* Recognize a word extraction from a multi-word subreg.  */
  if ((GET_CODE (op) == LSHIFTRT
       || GET_CODE (op) == ASHIFTRT)
      && SCALAR_INT_MODE_P (outermode)
      && GET_MODE_PRECISION (outermode) >= BITS_PER_WORD
      && GET_MODE_PRECISION (innermode) >= (2 * GET_MODE_PRECISION (outermode))
      && CONST_INT_P (XEXP (op, 1))
      && (INTVAL (XEXP (op, 1)) & (GET_MODE_PRECISION (outermode) - 1)) == 0
      && INTVAL (XEXP (op, 1)) >= 0
      && INTVAL (XEXP (op, 1)) < GET_MODE_PRECISION (innermode)
      && byte == subreg_lowpart_offset (outermode, innermode))
    {
      int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
      return simplify_gen_subreg (outermode, XEXP (op, 0), innermode,
                                  (WORDS_BIG_ENDIAN
                                   ? byte - shifted_bytes
                                   : byte + shifted_bytes));
    }

I don't think the outer mode check is necessary; nothing seems to rely
on the outer mode being interpreted as an integer.  So in this case,
it looks like the right thing is to check innermode instead of,
rather than as well as, outermode.

Tested on arm-linux-gnueabi and x86_64-linux-gnu.  Although I suppose
I could self-approve this, I'd appreciate it if someone would double-check.

Richard


gcc/
        * simplify-rtx.c (simplify_subreg): Check that the inner mode is
        a scalar integer before applying integer-only optimisations to
        inner arithmetic.

Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c  2011-09-13 13:33:29.423670877 +0100
+++ gcc/simplify-rtx.c  2011-09-13 13:33:51.748629417 +0100
@@ -5611,6 +5611,7 @@ simplify_subreg (enum machine_mode outer
   /* Optimize SUBREG truncations of zero and sign extended values.  */
   if ((GET_CODE (op) == ZERO_EXTEND
        || GET_CODE (op) == SIGN_EXTEND)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode))
     {
       unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
@@ -5649,6 +5650,7 @@ simplify_subreg (enum machine_mode outer
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
       && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
         to avoid the possibility that an outer LSHIFTRT shifts by more
         than the sign extension's sign_bit_copies and introduces zeros
@@ -5668,6 +5670,7 @@ simplify_subreg (enum machine_mode outer
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
       && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)
       && CONST_INT_P (XEXP (op, 1))
       && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5682,6 +5685,7 @@ simplify_subreg (enum machine_mode outer
      the outer subreg is effectively a truncation to the original mode.  */
   if (GET_CODE (op) == ASHIFT
       && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)
       && CONST_INT_P (XEXP (op, 1))
       && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5695,7 +5699,7 @@ simplify_subreg (enum machine_mode outer
   /* Recognize a word extraction from a multi-word subreg.  */
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
-      && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_PRECISION (outermode) >= BITS_PER_WORD
       && GET_MODE_PRECISION (innermode) >= (2 * GET_MODE_PRECISION (outermode))
       && CONST_INT_P (XEXP (op, 1))
@@ -5717,6 +5721,7 @@ simplify_subreg (enum machine_mode outer
 
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
+      && SCALAR_INT_MODE_P (innermode)
       && MEM_P (XEXP (op, 0))
       && CONST_INT_P (XEXP (op, 1))
       && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))

Reply via email to