https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117709

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
I think it also shows that the openMP SIMD handling for the SIMD vars lacks
optimization:

  int D.2004[64]; 
  int D.2003[64];
  int D.2001[64];
...

those were supposed to be vector registers in the end, but we end up
with

  __builtin_memset (&D.2001, 0, 256); // loop distribution
  .MASK_STORE (&D.2004, 32B, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4 });
...
  vect__26.37_78 = .MASK_LOAD (&D.2001, 32B, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0 }, _52(D));

etc.

In particular a .MASK_LOAD with a UNDEF else value could be turned into a
non-mask load when it cannot trap.  But in general for loop or len-masking
of OpenMP SIMD loops we may want to special case handling of those lowering
introduced arrays.

In the GIMPLE I see in .optimized:

  vect__23.34_51 = .MASK_GATHER_LOAD (&MEM <int[11][101]> [(void *)&k + -88B],
{ 0, -15, -30, -45, -60, -75, -90, -105, -120, -135, -150, -165, -180, -195,
-210, -225, -240, -255, -270, -285, -300, -315, -330, -345, -360, -375, -390,
-405, -420, -435, -450, -465, -480, -495, -510, -525, -540, -555, -570, -585,
-600, -615, -630, -645, -660, -675, -690, -705, -720, -735, -750, -765, -780,
-795, -810, -825, -840, -855, -870, -885, -900, -915, -930, -945 }, 4, { 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
_53(D));

that looks odd, we load from before 'k' here.  The DR says

#(Data Ref: 
#  bb: 7 
#  stmt: _23 = k[0][_22];
#  ref: k[0][_22];
#  base_object: k;
#  Access function 0: {41, +, -15}_4
#  Access function 1: 0

which looks correct to me, but the initial value we choose is odd.  We do

              if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
                vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
                                             slp_node, &gs_info, &dataref_ptr,
                                             &vec_offsets);
              else
                dataref_ptr
                  = vect_create_data_ref_ptr (vinfo, first_stmt_info,
aggr_type,
                                              at_loop, offset, &dummy, gsi,
                                              &ptr_incr, false, bump);

and pass offset == -252

This offset is I think initialized from get_negative_load_store_type but
not reset when we divert to VMAT_GATHER_SCATTER.

This is all in need of serious TLC ..

Testing a patch (I'll attach in a moment).

Reply via email to