On Thu, 25 Sep 2014, Jakub Jelinek wrote: > Hi! > > As the testcases show, dr_explicit_realign_optimized (used on PowerPC/SPU > only) misbehaves if the base_address is in between 1 and vector element size > - 1 > modulo vector size. > The problem is that it wants to add a bias to base_addr such that > base_addr & ~vector_size > (base_addr + bias) & ~vector_size > are adjacent vector_size memory slots, but vect_create_data_ref_ptr > takes offset counted in vector elements and in the end multiplies that > by vector element size, so we end up actually with: > (base_addr + ((vector_size / vector_element_size) * vector_element_size) & > ~vector_size > which unfortunately is not enough, e.g. in the testcase > base_addr is 1 moduo vector_size, vector_size 16 and vector_element_size 2, > so we have > base_addr & ~16 > (base_addr + 14) & ~16 > instead of the desired > (base_addr + 15) & ~16 > and 1 & ~16 and (1 + 14) & ~16 is the same address. > > Fixed by passing another offset, measured in bytes (for the negative case > which are the only 3 other cases which pass any offset down we want it to be > as is), bootstrapped/regtested on x86_64-linux and i686-linux and on > {x86_64,i686,powerpc{,64},s390{,x}}-linux on the 4.8 branch. > > Ok for trunk/4.9/4.8?
Ok. Thanks, Richard. > 2014-09-25 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/63341 > * tree-vectorizer.h (vect_create_data_ref_ptr, > vect_create_addr_base_for_vector_ref): Add another tree argument > defaulting to NULL_TREE. > * tree-vect-data-refs.c (vect_create_data_ref_ptr): Add byte_offset > argument, pass it down to vect_create_addr_base_for_vector_ref. > (vect_create_addr_base_for_vector_ref): Add byte_offset argument, > add that to base_offset too if non-NULL. > * tree-vect-stmts.c (vectorizable_load): Add byte_offset variable, > for dr_explicit_realign_optimized set it to vector byte size > - 1 instead of setting offset, pass byte_offset down to > vect_create_data_ref_ptr. > > * gcc.dg/vect/pr63341-1.c: New test. > * gcc.dg/vect/pr63341-2.c: New test. > > --- gcc/tree-vectorizer.h.jj 2014-09-01 09:43:56.000000000 +0200 > +++ gcc/tree-vectorizer.h 2014-09-23 15:19:28.302484227 +0200 > @@ -1061,7 +1061,8 @@ extern bool vect_analyze_data_refs (loop > unsigned *); > extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree, > tree *, gimple_stmt_iterator *, > - gimple *, bool, bool *); > + gimple *, bool, bool *, > + tree = NULL_TREE); > extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, > tree); > extern tree vect_create_destination_var (tree, tree); > extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); > @@ -1078,7 +1079,8 @@ extern void vect_transform_grouped_load > extern void vect_record_grouped_load_vectors (gimple, vec<tree> ); > extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); > extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, > - tree, struct loop *); > + tree, struct loop *, > + tree = NULL_TREE); > > /* In tree-vect-loop.c. */ > /* FORNOW: Used in tree-parloops.c. */ > --- gcc/tree-vect-data-refs.c.jj 2014-09-18 15:48:22.000000000 +0200 > +++ gcc/tree-vect-data-refs.c 2014-09-23 15:11:06.163061112 +0200 > @@ -3860,6 +3860,9 @@ vect_get_new_vect_var (tree type, enum v > is as follows: > if LOOP=i_loop: &in (relative to i_loop) > if LOOP=j_loop: &in+i*2B (relative to j_loop) > + BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the > + initial address. Unlike OFFSET, which is number of elements to > + be added, BYTE_OFFSET is measured in bytes. > > Output: > 1. Return an SSA_NAME whose value is the address of the memory location of > @@ -3873,7 +3876,8 @@ tree > vect_create_addr_base_for_vector_ref (gimple stmt, > gimple_seq *new_stmt_list, > tree offset, > - struct loop *loop) > + struct loop *loop, > + tree byte_offset) > { > stmt_vec_info stmt_info = vinfo_for_stmt (stmt); > struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > @@ -3926,6 +3930,12 @@ vect_create_addr_base_for_vector_ref (gi > base_offset = fold_build2 (PLUS_EXPR, sizetype, > base_offset, offset); > } > + if (byte_offset) > + { > + byte_offset = fold_convert (sizetype, byte_offset); > + base_offset = fold_build2 (PLUS_EXPR, sizetype, > + base_offset, byte_offset); > + } > > /* base + base_offset */ > if (loop_vinfo) > @@ -3983,6 +3993,10 @@ vect_create_addr_base_for_vector_ref (gi > 5. BSI: location where the new stmts are to be placed if there is no loop > 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain > pointing to the initial address. > + 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added > + to the initial address accessed by the data-ref in STMT. This is > + similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET > + in bytes. > > Output: > 1. Declare a new ptr to vector_type, and have it point to the base of the > @@ -3996,6 +4010,8 @@ vect_create_addr_base_for_vector_ref (gi > initial_address = &a[init]; > if OFFSET is supplied: > initial_address = &a[init + OFFSET]; > + if BYTE_OFFSET is supplied: > + initial_address = &a[init] + BYTE_OFFSET; > > Return the initial_address in INITIAL_ADDRESS. > > @@ -4013,7 +4029,7 @@ tree > vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop, > tree offset, tree *initial_address, > gimple_stmt_iterator *gsi, gimple *ptr_incr, > - bool only_init, bool *inv_p) > + bool only_init, bool *inv_p, tree byte_offset) > { > const char *base_name; > stmt_vec_info stmt_info = vinfo_for_stmt (stmt); > @@ -4156,10 +4172,10 @@ vect_create_data_ref_ptr (gimple stmt, t > /* (2) Calculate the initial address of the aggregate-pointer, and set > the aggregate-pointer to point to it before the loop. */ > > - /* Create: (&(base[init_val+offset]) in the loop preheader. */ > + /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */ > > new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, > - offset, loop); > + offset, loop, byte_offset); > if (new_stmt_list) > { > if (pe) > --- gcc/tree-vect-stmts.c.jj 2014-08-01 09:24:12.000000000 +0200 > +++ gcc/tree-vect-stmts.c 2014-09-23 15:09:31.832553172 +0200 > @@ -5600,6 +5600,7 @@ vectorizable_load (gimple stmt, gimple_s > int i, j, group_size, group_gap; > tree msq = NULL_TREE, lsq; > tree offset = NULL_TREE; > + tree byte_offset = NULL_TREE; > tree realignment_token = NULL_TREE; > gimple phi = NULL; > vec<tree> dr_chain = vNULL; > @@ -6261,7 +6262,8 @@ vectorizable_load (gimple stmt, gimple_s > if (alignment_support_scheme == dr_explicit_realign_optimized) > { > phi = SSA_NAME_DEF_STMT (msq); > - offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); > + byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype), > + size_one_node); > } > } > else > @@ -6302,7 +6304,8 @@ vectorizable_load (gimple stmt, gimple_s > dataref_ptr > = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop, > offset, &dummy, gsi, &ptr_incr, > - simd_lane_access_p, &inv_p); > + simd_lane_access_p, &inv_p, > + byte_offset); > } > else if (dataref_offset) > dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, > --- gcc/testsuite/gcc.dg/vect/pr63341-1.c.jj 2014-09-23 15:27:24.658025472 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr63341-1.c 2014-09-23 15:28:16.645757979 > +0200 > @@ -0,0 +1,32 @@ > +/* PR tree-optimization/63341 */ > +/* { dg-do run } */ > + > +#include "tree-vect.h" > + > +typedef union U { unsigned short s; unsigned char c; } > __attribute__((packed)) U; > +struct S { char e __attribute__((aligned (64))); U s[32]; }; > +struct S t = {0, {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, > + {9}, {10}, {11}, {12}, {13}, {14}, {15}, {16}, > + {17}, {18}, {19}, {20}, {21}, {22}, {23}, {24}, > + {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}}}; > +unsigned short d[32] = { 1 }; > + > +__attribute__((noinline, noclone)) void > +foo () > +{ > + int i; > + for (i = 0; i < 32; i++) > + d[i] = t.s[i].s; > + if (__builtin_memcmp (d, t.s, sizeof d)) > + abort (); > +} > + > +int > +main () > +{ > + check_vect (); > + foo (); > + return 0; > +} > + > +/* { dg-final { cleanup-tree-dump "vect" } } */ > --- gcc/testsuite/gcc.dg/vect/pr63341-2.c.jj 2014-09-23 15:27:27.906013904 > +0200 > +++ gcc/testsuite/gcc.dg/vect/pr63341-2.c 2014-09-23 15:30:29.874081634 > +0200 > @@ -0,0 +1,35 @@ > +/* PR tree-optimization/63341 */ > +/* { dg-do run } */ > + > +#include "tree-vect.h" > + > +typedef union U { unsigned short s; unsigned char c; } > __attribute__((packed)) U; > +struct S { char e __attribute__((aligned (64))); U s[32]; }; > +struct S t = {0, {{0x5010}, {0x5111}, {0x5212}, {0x5313}, {0x5414}, > {0x5515}, {0x5616}, {0x5717}, > + {0x5818}, {0x5919}, {0x5a1a}, {0x5b1b}, {0x5c1c}, {0x5d1d}, > {0x5e1e}, {0x5f1f}, > + {0x6020}, {0x6121}, {0x6222}, {0x6323}, {0x6424}, {0x6525}, > {0x6626}, {0x6727}, > + {0x6828}, {0x6929}, {0x6a2a}, {0x6b2b}, {0x6c2c}, {0x6d2d}, > {0x6e2e}, {0x6f2f}}}; > +unsigned short d[32] = { 1 }; > + > +__attribute__((noinline, noclone)) void > +foo () > +{ > + int i; > + for (i = 0; i < 32; i++) > + d[i] = t.s[i].s + 4; > + for (i = 0; i < 32; i++) > + if (d[i] != t.s[i].s + 4) > + abort (); > + else > + asm volatile ("" : : : "memory"); > +} > + > +int > +main () > +{ > + check_vect (); > + foo (); > + return 0; > +} > + > +/* { dg-final { cleanup-tree-dump "vect" } } */ > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer