4.7 Regression] -ftree-sra ignores alignment

rguenth at gcc dot gnu.org Mon, 16 Jan 2012 07:25:43 -0800

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444


--- Comment #12 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-16 
15:25:06 UTC ---
Testcase w/o includes that fails with 4.6 and 4.7:

typedef long long __m128i __attribute__ ((__vector_size__ (16),
__may_alias__));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef unsigned int uint32_t;

typedef struct {
    uint32_t v[4];
} a4x32;

a4x32* incr(a4x32* x)
{
  x->v[0] += 1;
  return x;
}

typedef struct {
    __m128i m;
} a1xm128i;

static inline  a1xm128i ssefunc( a1xm128i in,  a1xm128i k)
{
  a1xm128i ret;
  ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m);
  return ret;
}

static  a4x32  caster( a4x32 c4x32,  a1xm128i k)
{
  a1xm128i c1x128;
  if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort();
  __builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128));
  c1x128 = ssefunc(c1x128, k);
  __builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32));
  return c4x32;
}

typedef struct  {
    a1xm128i key;
    a4x32 c;
    __SIZE_TYPE__ elem;
    a4x32 v;
} Engine;

void ctor(Engine *e)
{
  e->elem = 0;
  e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 };
  e->c.v[0] = 0;
  e->c.v[1] = 0;
  e->c.v[2] = 0;
  e->c.v[3] = 0;
}

uint32_t method( Engine *e)
{
  if( e->elem == 0 )
    {
      e->v = caster(*incr(&e->c), e->key);
      e->elem = 4;
    }
  return e->v.v[--e->elem];
}

int main()
{
  Engine e4; ctor(&e4);
  Engine e5; ctor(&e5);
  if(method(&e4)!=method(&e5))
    __builtin_abort ();
  return 0;
}

and the problematic SRA is indeed happening during ESRA in caster () which
looks like (before SRA):

<bb 2>:
  MEM[(char * {ref-all})&c1x128] = MEM[(char * {ref-all})&c4x32];
  in = c1x128;
  k = k;
  D.1785_7 = k.m;
  D.1784_8 = in.m;
  D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7);
  D.1782.m = D.1783_9;
  D.1780 = D.1782;
  c1x128 = D.1780;
  MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128];
  D.1760 = c4x32;
  c1x128 ={v} {CLOBBER};
  return D.1760;

and after SRA:

<bb 2>:
  c4x32$m_4 = MEM[(struct  *)&c4x32].m;
  c1x128$m_14 = c4x32$m_4;
  in$m_13 = c1x128$m_14;
  k$m_12 = MEM[(struct  *)&k].m;
  D.1785_7 = k$m_12;
  D.1784_8 = in$m_13;
  D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7);
  SR.6_11 = D.1783_9;
  SR.7_10 = SR.6_11;
  c1x128$m_2 = SR.7_10;
  c4x32$m_15 = c1x128$m_2;
  MEM[(struct  *)&D.1760].m = c4x32$m_15;
  c1x128$m_16 = { 0, 0 };
  return D.1760;

notice that D.1760 is of type a4x32 and thus has the alignment of an
integer.  But SRA constructs in-place the object of type c1x128.
SRA analysis should have seen the alignment breaking copy

  MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128];

which uses a properly aligned type for the store.  Similarly the
prevailing store

  D.1760 = c4x32;

has the alignment of D.1760.

D.1760 already has a bogus type in lacc->type.  We can easily avoid
translating across aggregate copies that would transfer bogusly aligned types
to an access via

Index: tree-sra.c
===================================================================
--- tree-sra.c  (revision 183205)
+++ tree-sra.c  (working copy)
@@ -2290,7 +2290,9 @@ propagate_subaccesses_across_link (struc

   if (is_gimple_reg_type (racc->type))
     {
-      if (!lacc->first_child && !racc->first_child)
+      if (!lacc->first_child && !racc->first_child
+         && (get_object_alignment (lacc->expr)
+             >= get_object_alignment (racc->expr)))
        {
          tree t = lacc->base;

or make sure to transfer the alignment to a constructed bare(!) MEM_REF
from lacc->expr before overwriting that (assuming it retains the original
form up until here).

[Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment

Reply via email to