------- Comment #7 from dorit at gcc dot gnu dot org 2007-07-01 09:59 ------- I'm testing the following patch (seems to fix the two testcases in this PR on Pentium4. still need to bootstrap etc, and check the powerpc bits)
Index: gcc/targhooks.c =================================================================== *** gcc/targhooks.c (revision 126162) --- gcc/targhooks.c (working copy) *************** tree default_mangle_decl_assembler_name *** 634,637 **** --- 634,653 ---- return id; } + bool + default_builtin_vector_alignment_reachable (tree type, bool is_packed) + { + if (is_packed) + return false; + + /* Assuming that types whose size is > pointer-size are not guaranteed to be + naturally aligned. */ + if (tree_int_cst_compare (TYPE_SIZE (type), bitsize_int (POINTER_SIZE)) > 0) + return false; + + /* Assuming that types whose size is <= pointer-size + are naturally aligned. */ + return true; + } + #include "gt-targhooks.h" Index: gcc/targhooks.h =================================================================== *** gcc/targhooks.h (revision 126162) --- gcc/targhooks.h (working copy) *************** extern tree default_builtin_vectorized_c *** 62,67 **** --- 62,69 ---- extern tree default_builtin_reciprocal (enum built_in_function, bool, bool); + extern bool default_builtin_vector_alignment_reachable (tree, bool); + /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ Index: gcc/tree.h =================================================================== *** gcc/tree.h (revision 126162) --- gcc/tree.h (working copy) *************** extern tree get_inner_reference (tree, H *** 4327,4332 **** --- 4327,4338 ---- tree *, enum machine_mode *, int *, int *, bool); + /* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF, + look for whether EXP or any nested component-refs within EXP is marked + as PACKED. */ + + extern bool contains_packed_reference (tree exp); + /* Return 1 if T is an expression that get_inner_reference handles. */ extern int handled_component_p (tree); Index: gcc/target.h =================================================================== *** gcc/target.h (revision 126162) --- gcc/target.h (working copy) *************** struct gcc_target *** 413,418 **** --- 413,422 ---- element-by-element products for the odd elements. */ tree (* builtin_mul_widen_even) (tree); tree (* builtin_mul_widen_odd) (tree); + + /* Return true if vector alignment is reachable (by peeling N + interations) for the given type. */ + bool (* vector_alignment_reachable) (tree, bool); } vectorize; /* The initial value of target_flags. */ Index: gcc/testsuite/gcc.dg/vect/vect-align-1.c =================================================================== *** gcc/testsuite/gcc.dg/vect/vect-align-1.c (revision 0) --- gcc/testsuite/gcc.dg/vect/vect-align-1.c (revision 0) *************** *** 0 **** --- 1,50 ---- + /* { dg-require-effective-target vect_int } */ + + #include <stdlib.h> + #include <stdarg.h> + #include "tree-vect.h" + + /* Compile time known misalignment. Cannot use loop peeling to align + the store. */ + + #define N 16 + + struct foo { + char x; + int y[N]; + } __attribute__((packed)); + + int + main1 (struct foo * __restrict__ p) + { + int i; + int x[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + + for (i = 0; i < N; i++) + { + p->y[i] = x[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (p->y[i] != x[i]) + abort (); + } + return 0; + } + + + int main (void) + { + int i; + struct foo *p = malloc (2*sizeof (struct foo)); + check_vect (); + + main1 (p); + return 0; + } + + /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/pr25413a.c =================================================================== *** gcc/testsuite/gcc.dg/vect/pr25413a.c (revision 0) --- gcc/testsuite/gcc.dg/vect/pr25413a.c (revision 0) *************** *** 0 **** --- 1,129 ---- + /* { dg-require-effective-target vect_double } */ + + #include <stdarg.h> + #include "tree-vect.h" + + #define N 8 + + typedef unsigned int size_t; + + extern void *malloc (size_t __size) __attribute__ ((__nothrow__)) __attribute__ ((__malloc__)); + + typedef double num_t; + static const num_t num__infty = ((num_t)1.0)/((num_t)0.0); + + struct oct_tt; + typedef struct oct_tt oct_t; + + typedef unsigned int var_t; + typedef enum { + OCT_EMPTY = 0, + OCT_NORMAL = 1, + OCT_CLOSED = 2 + } oct_state; + + struct oct_tt { + var_t n; + + int ref; + + oct_state state; + struct oct_tt* closed; + + num_t* c; + }; + + void* octfapg_mm_malloc (size_t t); + oct_t* octfapg_alloc (var_t n); + oct_t* octfapg_full_copy (oct_t* m); + + struct mmalloc_tt; + typedef struct mmalloc_tt mmalloc_t; + + struct mmalloc_tt + { + int id; + + int nb_alloc; + int nb_realloc; + int nb_free; + + size_t rem; + size_t max; + size_t tot; + + }; + + typedef struct + { + size_t size; + + mmalloc_t* mm; + int id; + + double dummy; + + } mmheader_t; + + void* + octfapg_mm_malloc (size_t t) + { + char* m = (char*)malloc(t+sizeof(mmheader_t)); + return m+sizeof(mmheader_t); + } + + oct_t* octfapg_empty (var_t n); + + oct_t* + octfapg_empty (const var_t n) + { + oct_t* m; + /*octfapg_timing_enter("oct_empty",3);*/ + m = ((oct_t*) octfapg_mm_malloc (sizeof(oct_t))); + m->n = n; + m->ref = 1; + m->state = OCT_EMPTY; + m->closed = (oct_t*)((void *)0); + m->c = (num_t*)((void *)0); + /*octfapg_timing_exit("oct_empty",3);*/ + return m; + } + + oct_t* + octfapg_alloc (const var_t n) + { + size_t nn = (2*(size_t)(n)*((size_t)(n)+1)); + oct_t* m; + m = octfapg_empty(n); + m->c = ((num_t*) octfapg_mm_malloc (sizeof(num_t)*(nn))); + ; + m->state = OCT_NORMAL; + m->closed = (oct_t*)((void *)0); + return m; + } + + oct_t* + octfapg_universe (const var_t n) + { + oct_t* m; + size_t i, nn = (2*(size_t)(n)*((size_t)(n)+1)); + m = octfapg_alloc(n); + for (i=0;i<nn;i++) *(m->c+i) = num__infty; + for (i=0;i<2*n;i++) *(m->c+((size_t)(i)+(((size_t)(i)+1)*((size_t)(i)+1))/2)) = (num_t)(0); + m->state = OCT_CLOSED; + return m; + } + + int main (void) + { + int i; + check_vect (); + + oct_t *p = octfapg_universe(10); + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/vect-align-2.c =================================================================== *** gcc/testsuite/gcc.dg/vect/vect-align-2.c (revision 0) --- gcc/testsuite/gcc.dg/vect/vect-align-2.c (revision 0) *************** *** 0 **** --- 1,46 ---- + /* { dg-require-effective-target vect_int } */ + /* { dg-do run } */ + + #include <stdlib.h> + #include <stdarg.h> + #include "tree-vect.h" + + /* Compile time unknown misalignment. Cannot use loop peeling to align + the store. */ + + #define N 17 + + struct foo { + char x0; + int y[N][N]; + } __attribute__ ((packed)); + + struct foo f2; + int z[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + + void fbar(struct foo *fp) + { + int i,j; + for (i=0; i<N; i++) + for (j=0; j<N; j++) + f2.y[i][j] = z[i]; + + for (i=0; i<N; i++) + for (j=0; j<N; j++) + if (f2.y[i][j] != z[i]) + abort (); + } + + int main (void) + { + struct foo *fp = (struct foo *) malloc (2*sizeof (struct foo)); + + fbar(fp); + return 0; + } + + + /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */ + /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/pr31699.c =================================================================== *** gcc/testsuite/gcc.dg/vect/pr31699.c (revision 126162) --- gcc/testsuite/gcc.dg/vect/pr31699.c (working copy) *************** int main() *** 31,35 **** } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */ ! /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 31,36 ---- } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */ ! /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */ ! /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/pr25413.c =================================================================== *** gcc/testsuite/gcc.dg/vect/pr25413.c (revision 0) --- gcc/testsuite/gcc.dg/vect/pr25413.c (revision 0) *************** *** 0 **** --- 1,37 ---- + /* { dg-require-effective-target vect_double } */ + + #include <stdarg.h> + #include "tree-vect.h" + + #define N 8 + + struct + { + char c; + double d[N]; + } a; + + int main1() + { + int i; + for ( i=0; i<N; ++i ) + a.d[i]=1; + return 0; + } + + int main (void) + { + int i; + check_vect (); + + main1 (); + for (i=0; i<N; i++) + if (a.d[i] != 1) + abort (); + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/expr.c =================================================================== *** gcc/expr.c (revision 126162) --- gcc/expr.c (working copy) *************** get_inner_reference (tree exp, HOST_WIDE *** 5924,5929 **** --- 5924,5966 ---- return exp; } + bool + contains_packed_reference (tree exp) + { + bool packed_p = false; + + while (1) + { + switch (TREE_CODE (exp)) + { + case COMPONENT_REF: + { + tree field = TREE_OPERAND (exp, 1); + packed_p = DECL_PACKED (field) + || TYPE_PACKED (TREE_TYPE (field)) /* CHECKME */ + || TYPE_PACKED (TREE_TYPE (exp)); + if (packed_p) + goto done; + } + break; + + case BIT_FIELD_REF: + case ARRAY_REF: + case ARRAY_RANGE_REF: + case REALPART_EXPR: + case IMAGPART_EXPR: + case VIEW_CONVERT_EXPR: + break; + + default: + goto done; + } + exp = TREE_OPERAND (exp, 0); + } + done: + return packed_p; + } + /* Return a tree of sizetype representing the size, in bytes, of the element of EXP, an ARRAY_REF. */ Index: gcc/tree-vect-analyze.c =================================================================== *** gcc/tree-vect-analyze.c (revision 126162) --- gcc/tree-vect-analyze.c (working copy) *************** Software Foundation, 51 Franklin Street, *** 25,30 **** --- 25,31 ---- #include "tm.h" #include "ggc.h" #include "tree.h" + #include "target.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-flow.h" *************** vect_verify_datarefs_alignment (loop_vec *** 1379,1384 **** --- 1380,1449 ---- } + static bool + vector_alignment_reachable_p (struct data_reference *dr) + { + tree stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + + if (DR_GROUP_FIRST_DR (stmt_info)) + { + /* For interleaved access we peel only if number of iterations in + the prolog loop ({VF - misalignment}), is a multiple of the + number of the interleaved accesses. */ + int elem_size, mis_in_elements; + int nelements = TYPE_VECTOR_SUBPARTS (vectype); + + /* FORNOW: handle only known alignment. */ + if (!known_alignment_for_access_p (dr)) + return false; + + elem_size = UNITS_PER_SIMD_WORD / nelements; + mis_in_elements = DR_MISALIGNMENT (dr) / elem_size; + + if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info)) + return false; + } + + /* If misalignment is known at the compiler time then apply peeling + only if natural alignment is reachable through peeling. */ + if (known_alignment_for_access_p (dr) && !aligned_access_p (dr)) + { + HOST_WIDE_INT elmsize = + int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); + if (DR_MISALIGNMENT (dr) % elmsize) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize); + fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr)); + fprintf (vect_dump, "data size does not divide the misalignment.\n"); + } + return false; + } + } + + if (!known_alignment_for_access_p (dr)) + { + tree type = (TREE_TYPE (DR_REF (dr))); + tree ba = DR_BASE_OBJECT (dr); + bool is_packed = false; + + if (ba) + is_packed = contains_packed_reference (ba); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed); + if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) + return true; + else + return false; + } + + return true; + } + /* Function vect_enhance_data_refs_alignment This pass will use loop versioning and loop peeling in order to enhance *************** vect_enhance_data_refs_alignment (loop_v *** 1540,1572 **** if (!DR_IS_READ (dr) && !aligned_access_p (dr)) { ! if (DR_GROUP_FIRST_DR (stmt_info)) ! { ! /* For interleaved access we peel only if number of iterations in ! the prolog loop ({VF - misalignment}), is a multiple of the ! number of the interleaved accesses. */ ! int elem_size, mis_in_elements; ! tree vectype = STMT_VINFO_VECTYPE (stmt_info); ! int nelements = TYPE_VECTOR_SUBPARTS (vectype); ! ! /* FORNOW: handle only known alignment. */ ! if (!known_alignment_for_access_p (dr)) ! { ! do_peeling = false; ! break; ! } ! ! elem_size = UNITS_PER_SIMD_WORD / nelements; ! mis_in_elements = DR_MISALIGNMENT (dr) / elem_size; ! ! if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info)) ! { ! do_peeling = false; ! break; ! } ! } ! dr0 = dr; ! do_peeling = true; break; } } --- 1605,1615 ---- if (!DR_IS_READ (dr) && !aligned_access_p (dr)) { ! do_peeling = vector_alignment_reachable_p (dr); ! if (do_peeling) ! dr0 = dr; ! if (!do_peeling && vect_print_dump_info (REPORT_DETAILS)) ! fprintf (vect_dump, "vector alignment may not be reachable"); break; } } Index: gcc/target-def.h =================================================================== *** gcc/target-def.h (revision 126162) --- gcc/target-def.h (working copy) *************** Foundation, 51 Franklin Street, Fifth Fl *** 356,361 **** --- 356,364 ---- default_builtin_vectorized_conversion #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0 + #define TARGET_VECTOR_ALIGNMENT_REACHABLE \ + default_builtin_vector_alignment_reachable + #define TARGET_VECTORIZE \ { \ *************** Foundation, 51 Franklin Street, Fifth Fl *** 363,369 **** TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \ TARGET_VECTORIZE_BUILTIN_CONVERSION, \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \ ! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \ } #define TARGET_DEFAULT_TARGET_FLAGS 0 --- 366,373 ---- TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \ TARGET_VECTORIZE_BUILTIN_CONVERSION, \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \ ! TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \ ! TARGET_VECTOR_ALIGNMENT_REACHABLE \ } #define TARGET_DEFAULT_TARGET_FLAGS 0 Index: gcc/config/rs6000/rs6000.c =================================================================== *** gcc/config/rs6000/rs6000.c (revision 126162) --- gcc/config/rs6000/rs6000.c (working copy) *************** static tree rs6000_builtin_mul_widen_odd *** 717,722 **** --- 717,723 ---- static tree rs6000_builtin_conversion (enum tree_code, tree); static void def_builtin (int, const char *, tree, int); + static bool rs6000_vector_alignment_reachable (tree, bool); static void rs6000_init_builtins (void); static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx); static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx); *************** static const char alt_reg_names[][8] = *** 984,989 **** --- 985,993 ---- #undef TARGET_VECTORIZE_BUILTIN_CONVERSION #define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion + #undef TARGET_VECTOR_ALIGNMENT_REACHABLE + #define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins *************** rs6000_builtin_mul_widen_odd (tree type) *** 1806,1811 **** --- 1810,1844 ---- } } + + /* Return true iff, data reference of TYPE can reach vector alignment (16) + after applying N number of iterations. This routine does not determine + how may iterations are required to reach desired alignment. */ + + static bool + rs6000_vector_alignment_reachable (tree type, bool is_packed) + { + if (is_packed) + return false; + + if (TARGET_MACHO) + { + if (TARGET_32BIT) + { + if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) + return true; + + if (rs6000_alignment_flags == MASK_ALIGN_POWER) + return true; + } + return false; + } + + /* Assuming that all other types are naturally aligned. CHECKME! */ + return true; + } + + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25413