On Wed, Mar 30, 2011 at 6:05 PM, Jakub Jelinek <ja...@redhat.com> wrote:
> Hi!
>
> MEM_REFs which can represent type punning on lhs don't force
> non-gimple types to be addressable.  This causes various problems
> in the expander, which wasn't prepared to handle that.
>
> This patch tries to fix what I've found and adds a bunch of
> testcases.  The original report was with just -O2 on some large testcase
> from Eigen, most of the testcases have -fno-tree-sra just because
> I've given up on delta when it stopped reducing at 32KB.
>
> The first problem (the one from Eigen) is _mm_store_pd into
> a std::complex<double> var, which is a single field and thus
> has DCmode TYPE_MODE.  As starting with 4.6 that var is not
> TREE_ADDRESSABLE, its DECL_RTL is a CONCAT, and for assignments
> to concat expand_assignment was expecting either that
> from has COMPLEX_TYPE (and matching mode to the store), or
> that it is a real or imaginary subpart store, thus when
> trying to store a V2DF mode value it expected it to be
> real part store (bitpos == 0) and tried to set a DFmode pseudo
> from V2DFmode rtx.
> Further testing revealed that it is possible to hit many other
> cases with CONCAT destination, it can be a store of just a few bits,
> or can overlap parts of both real and imaginary, or be partially
> out of bounds.
> The patch handles the case from Eigen - bitpos == 0 bitsize ==
> GET_MODE_BITSIZE (GET_MODE (to_rtx)) non-COMPLEX_TYPE by setting
> each half separately, if it is a store which is not touching
> one of the parts by just adjusting bitpos for the imaginary
> case and storing just to one of the parts (this is
> the bitpos + bitsize < half_bitsize resp. bitpos >= half_bitsize
> case) and finally adds a generic slow one for the very unusual
> cases with partial overlap of both.
>
> After testing it with the testcases I wrote, I found a bunch of
> other ICEs though, and reproduced them even without CONCAT
> on the LHS (the testcases below which don't contain any _Complex
> keyword).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux,
> regtested with a cross compiler on these new testcases also
> for powerpc{,64}-linux and s390{,x}-linux.
>
> Ok for trunk and after a while for 4.6?

Looks good to me.  2nd eyes welcome though.

Thanks,
Richard.

> 2011-03-30  Jakub Jelinek  <ja...@redhat.com>
>
>        PR middle-end/48335
>        * expr.c (expand_assignment): Handle all possibilities
>        if TO_RTX is CONCAT.
>        * expmed.c (store_bit_field_1): Avoid trying to create
>        invalid SUBREGs.
>        (store_split_bit_field): If SUBREG_REG (op0) or
>        op0 itself has smaller mode than word, return it
>        for offset 0 and const0_rtx for out of bounds stores.
>        If word is const0_rtx, skip it.
>
>        * gcc.c-torture/compile/pr48335-1.c: New test.
>        * gcc.dg/pr48335-1.c: New test.
>        * gcc.dg/pr48335-2.c: New test.
>        * gcc.dg/pr48335-3.c: New test.
>        * gcc.dg/pr48335-4.c: New test.
>        * gcc.dg/pr48335-5.c: New test.
>        * gcc.dg/pr48335-6.c: New test.
>        * gcc.dg/pr48335-7.c: New test.
>        * gcc.dg/pr48335-8.c: New test.
>        * gcc.target/i386/pr48335-1.c: New test.
>
> --- gcc/expr.c.jj       2011-03-23 17:15:55.000000000 +0100
> +++ gcc/expr.c  2011-03-30 11:38:15.000000000 +0200
> @@ -4278,16 +4278,47 @@ expand_assignment (tree to, tree from, b
>       /* Handle expand_expr of a complex value returning a CONCAT.  */
>       else if (GET_CODE (to_rtx) == CONCAT)
>        {
> -         if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from))))
> +         unsigned short mode_bitsize = GET_MODE_BITSIZE (GET_MODE (to_rtx));
> +         if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from)))
> +             && bitpos == 0
> +             && bitsize == mode_bitsize)
> +           result = store_expr (from, to_rtx, false, nontemporal);
> +         else if (bitsize == mode_bitsize / 2
> +                  && (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1)))
> +           result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
> +                                nontemporal);
> +         else if (bitpos + bitsize <= mode_bitsize / 2)
> +           result = store_field (XEXP (to_rtx, 0), bitsize, bitpos,
> +                                 mode1, from, TREE_TYPE (tem),
> +                                 get_alias_set (to), nontemporal);
> +         else if (bitpos >= mode_bitsize / 2)
> +           result = store_field (XEXP (to_rtx, 1), bitsize,
> +                                 bitpos - mode_bitsize / 2, mode1, from,
> +                                 TREE_TYPE (tem), get_alias_set (to),
> +                                 nontemporal);
> +         else if (bitpos == 0 && bitsize == mode_bitsize)
>            {
> -             gcc_assert (bitpos == 0);
> -             result = store_expr (from, to_rtx, false, nontemporal);
> +             rtx from_rtx = expand_normal (from);
> +             from_rtx = simplify_gen_subreg (GET_MODE (to_rtx), from_rtx,
> +                                             TYPE_MODE (TREE_TYPE (from)), 
> 0);
> +             emit_move_insn (XEXP (to_rtx, 0),
> +                             read_complex_part (from_rtx, false));
> +             emit_move_insn (XEXP (to_rtx, 1),
> +                             read_complex_part (from_rtx, true));
> +             result = NULL;
>            }
>          else
>            {
> -             gcc_assert (bitpos == 0 || bitpos == GET_MODE_BITSIZE (mode1));
> -             result = store_expr (from, XEXP (to_rtx, bitpos != 0), false,
> -                                  nontemporal);
> +             rtx temp = assign_stack_temp (GET_MODE (to_rtx),
> +                                           GET_MODE_SIZE (GET_MODE (to_rtx)),
> +                                           0);
> +             write_complex_part (temp, XEXP (to_rtx, 0), false);
> +             write_complex_part (temp, XEXP (to_rtx, 1), true);
> +             result = store_field (temp, bitsize, bitpos, mode1, from,
> +                                   TREE_TYPE (tem), get_alias_set (to),
> +                                   nontemporal);
> +             emit_move_insn (XEXP (to_rtx, 0), read_complex_part (temp, 
> false));
> +             emit_move_insn (XEXP (to_rtx, 1), read_complex_part (temp, 
> true));
>            }
>        }
>       else
> --- gcc/expmed.c.jj     2011-03-23 17:15:55.000000000 +0100
> +++ gcc/expmed.c        2011-03-30 13:37:54.000000000 +0200
> @@ -422,7 +422,10 @@ store_bit_field_1 (rtx str_rtx, unsigned
>             && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
>          : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
>             || (offset * BITS_PER_UNIT % bitsize == 0
> -                && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
> +                && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))
> +      && (MEM_P (op0)
> +         || GET_MODE (op0) == fieldmode
> +         || validate_subreg (fieldmode, GET_MODE (op0), op0, byte_offset)))
>     {
>       if (MEM_P (op0))
>        op0 = adjust_address (op0, fieldmode, offset);
> @@ -479,6 +482,7 @@ store_bit_field_1 (rtx str_rtx, unsigned
>       struct expand_operand ops[2];
>       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
>       rtx arg0 = op0;
> +      unsigned HOST_WIDE_INT subreg_off;
>
>       if (GET_CODE (arg0) == SUBREG)
>        {
> @@ -491,15 +495,18 @@ store_bit_field_1 (rtx str_rtx, unsigned
>          arg0 = SUBREG_REG (arg0);
>        }
>
> -      arg0 = gen_rtx_SUBREG (fieldmode, arg0,
> -                            (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
> -                            + (offset * UNITS_PER_WORD));
> -
> -      create_fixed_operand (&ops[0], arg0);
> -      /* Shrink the source operand to FIELDMODE.  */
> -      create_convert_operand_to (&ops[1], value, fieldmode, false);
> -      if (maybe_expand_insn (icode, 2, ops))
> -       return true;
> +      subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
> +                  + (offset * UNITS_PER_WORD);
> +      if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
> +       {
> +         arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
> +
> +         create_fixed_operand (&ops[0], arg0);
> +         /* Shrink the source operand to FIELDMODE.  */
> +         create_convert_operand_to (&ops[1], value, fieldmode, false);
> +         if (maybe_expand_insn (icode, 2, ops))
> +           return true;
> +       }
>     }
>
>   /* Handle fields bigger than a word.  */
> @@ -1045,22 +1052,32 @@ store_split_bit_field (rtx op0, unsigned
>       if (GET_CODE (op0) == SUBREG)
>        {
>          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
> -         word = operand_subword_force (SUBREG_REG (op0), word_offset,
> -                                       GET_MODE (SUBREG_REG (op0)));
> +         enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
> +         if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < 
> UNITS_PER_WORD)
> +           word = word_offset ? const0_rtx : op0;
> +         else
> +           word = operand_subword_force (SUBREG_REG (op0), word_offset,
> +                                         GET_MODE (SUBREG_REG (op0)));
>          offset = 0;
>        }
>       else if (REG_P (op0))
>        {
> -         word = operand_subword_force (op0, offset, GET_MODE (op0));
> +         enum machine_mode op0_mode = GET_MODE (op0);
> +         if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < 
> UNITS_PER_WORD)
> +           word = offset ? const0_rtx : op0;
> +         else
> +           word = operand_subword_force (op0, offset, GET_MODE (op0));
>          offset = 0;
>        }
>       else
>        word = op0;
>
>       /* OFFSET is in UNITs, and UNIT is in bits.
> -         store_fixed_bit_field wants offset in bytes.  */
> -      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
> -                            thispos, part);
> +        store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
> +        it is jut an out of bounds access.  Ignore it.  */
> +      if (word != const0_rtx)
> +       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
> +                              thispos, part);
>       bitsdone += thissize;
>     }
>  }
> --- gcc/testsuite/gcc.c-torture/compile/pr48335-1.c.jj  2011-03-30 
> 11:32:24.000000000 +0200
> +++ gcc/testsuite/gcc.c-torture/compile/pr48335-1.c     2011-03-30 
> 11:31:47.000000000 +0200
> @@ -0,0 +1,41 @@
> +/* PR middle-end/48335 */
> +
> +struct S { float d; };
> +
> +void bar (struct S);
> +
> +void
> +f0 (int x)
> +{
> +  struct S s = {.d = 0.0f };
> +  ((char *) &s.d)[0] = x;
> +  s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f1 (int x)
> +{
> +  struct S s = {.d = 0.0f };
> +  ((char *) &s.d)[1] = x;
> +  s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f2 (int x)
> +{
> +  struct S s = {.d = 0.0f };
> +  ((char *) &s.d)[2] = x;
> +  s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f3 (int x)
> +{
> +  struct S s = {.d = 0.0f };
> +  ((char *) &s.d)[3] = x;
> +  s.d *= 7.0;
> +  bar (s);
> +}
> --- gcc/testsuite/gcc.dg/pr48335-1.c.jj 2011-03-30 10:57:29.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-1.c    2011-03-29 18:28:03.000000000 +0200
> @@ -0,0 +1,48 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef long long T __attribute__((may_alias));
> +
> +struct S
> +{
> +  _Complex float d __attribute__((aligned (8)));
> +};
> +
> +void bar (struct S);
> +
> +void
> +f1 (T x)
> +{
> +  struct S s;
> +  *(T *) &s.d = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f2 (int x)
> +{
> +  struct S s = { .d = 0.0f };
> +  *(char *) &s.d = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f3 (int x)
> +{
> +  struct S s = { .d = 0.0f };
> +  ((char *) &s.d)[2] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f4 (int x, int y)
> +{
> +  struct S s = { .d = 0.0f };
> +  ((char *) &s.d)[y] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> --- gcc/testsuite/gcc.dg/pr48335-2.c.jj 2011-03-30 10:57:29.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-2.c    2011-03-29 18:27:53.000000000 +0200
> @@ -0,0 +1,58 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef long long T __attribute__((may_alias, aligned (1)));
> +typedef short U __attribute__((may_alias, aligned (1)));
> +
> +struct S
> +{
> +  _Complex float d __attribute__((aligned (8)));
> +};
> +
> +void bar (struct S);
> +
> +void
> +f1 (T x)
> +{
> +  struct S s;
> +  *(T *) ((char *) &s.d + 1) = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f2 (int x)
> +{
> +  struct S s = { .d = 0.0f };
> +  ((U *)((char *) &s.d + 1))[0] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f3 (int x)
> +{
> +  struct S s = { .d = 0.0f };
> +  ((U *)((char *) &s.d + 1))[1] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f4 (int x)
> +{
> +  struct S s = { .d = 0.0f };
> +  ((U *)((char *) &s.d + 1))[2] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f5 (int x)
> +{
> +  struct S s = { .d = 0.0f };
> +  ((U *)((char *) &s.d + 1))[3] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> --- gcc/testsuite/gcc.dg/pr48335-3.c.jj 2011-03-30 11:43:38.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-3.c    2011-03-30 11:42:54.000000000 +0200
> @@ -0,0 +1,48 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef short U __attribute__((may_alias, aligned (1)));
> +
> +struct S
> +{
> +  double d;
> +};
> +
> +void bar (struct S);
> +
> +void
> +f1 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[0] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f2 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[1] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f3 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[2] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f4 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[3] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> --- gcc/testsuite/gcc.dg/pr48335-4.c.jj 2011-03-30 12:59:11.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-4.c    2011-03-30 12:55:51.000000000 +0200
> @@ -0,0 +1,39 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef short U __attribute__((may_alias, aligned (1)));
> +
> +struct S
> +{
> +  double d;
> +};
> +
> +void bar (struct S);
> +
> +void
> +f1 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[-1] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f2 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[-2] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f3 (int x)
> +{
> +  struct S s = { .d = 0.0 };
> +  ((U *)((char *) &s.d + 1))[5] = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> --- gcc/testsuite/gcc.dg/pr48335-5.c.jj 2011-03-30 13:09:30.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-5.c    2011-03-30 13:01:10.000000000 +0200
> @@ -0,0 +1,38 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef long long T __attribute__((may_alias));
> +
> +struct S
> +{
> +  _Complex float d __attribute__((aligned (8)));
> +};
> +
> +int
> +f1 (struct S x)
> +{
> +  struct S s = x;
> +  return *(T *) &s.d;
> +}
> +
> +int
> +f2 (struct S x)
> +{
> +  struct S s = x;
> +  return *(char *) &s.d;
> +}
> +
> +int
> +f3 (struct S x)
> +{
> +  struct S s = x;
> +  return ((char *) &s.d)[2];
> +}
> +
> +int
> +f4 (struct S x, int y)
> +{
> +  struct S s = x;
> +  return ((char *) &s.d)[y];
> +}
> --- gcc/testsuite/gcc.dg/pr48335-6.c.jj 2011-03-30 13:09:30.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-6.c    2011-03-30 13:04:58.000000000 +0200
> @@ -0,0 +1,46 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef long long T __attribute__((may_alias, aligned (1)));
> +typedef short U __attribute__((may_alias, aligned (1)));
> +
> +struct S
> +{
> +  _Complex float d __attribute__((aligned (8)));
> +};
> +
> +T
> +f1 (struct S x)
> +{
> +  struct S s = x;
> +  return *(T *) ((char *) &s.d + 1);
> +}
> +
> +int
> +f2 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[0];
> +}
> +
> +int
> +f3 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[1];
> +}
> +
> +int
> +f4 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[2];
> +}
> +
> +int
> +f5 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[3];
> +}
> --- gcc/testsuite/gcc.dg/pr48335-7.c.jj 2011-03-30 13:09:30.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-7.c    2011-03-30 13:07:17.000000000 +0200
> @@ -0,0 +1,38 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef short U __attribute__((may_alias, aligned (1)));
> +
> +struct S
> +{
> +  double d;
> +};
> +
> +int
> +f1 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[0];
> +}
> +
> +int
> +f2 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[1];
> +}
> +
> +int
> +f3 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[2];
> +}
> +
> +int
> +f4 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[3];
> +}
> --- gcc/testsuite/gcc.dg/pr48335-8.c.jj 2011-03-30 13:09:30.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr48335-8.c    2011-03-30 13:08:17.000000000 +0200
> @@ -0,0 +1,31 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra" } */
> +
> +typedef short U __attribute__((may_alias, aligned (1)));
> +
> +struct S
> +{
> +  double d;
> +};
> +
> +int
> +f1 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[-1];
> +}
> +
> +int
> +f2 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[-2];
> +}
> +
> +int
> +f3 (struct S x)
> +{
> +  struct S s = x;
> +  return ((U *)((char *) &s.d + 1))[5];
> +}
> --- gcc/testsuite/gcc.target/i386/pr48335-1.c.jj        2011-03-30 
> 10:57:16.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/pr48335-1.c   2011-03-29 18:28:12.000000000 
> +0200
> @@ -0,0 +1,32 @@
> +/* PR middle-end/48335 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra -msse2" } */
> +
> +#include <emmintrin.h>
> +
> +typedef __float128 T __attribute__((may_alias));
> +
> +struct S
> +{
> +  _Complex double d __attribute__((aligned (16)));
> +};
> +
> +void bar (struct S);
> +
> +void
> +f1 (T x)
> +{
> +  struct S s;
> +  *(T *) &s.d = x;
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
> +
> +void
> +f2 (__m128d x)
> +{
> +  struct S s;
> +  _mm_store_pd ((double *) &s.d, x);
> +  __real__ s.d *= 7.0;
> +  bar (s);
> +}
>
>        Jakub
>

Reply via email to