------- Comment #7 from dorit at gcc dot gnu dot org  2007-07-01 09:59 -------
I'm testing the following patch (seems to fix the two testcases in this PR on
Pentium4. still need to bootstrap etc, and check the powerpc bits)

Index: gcc/targhooks.c
===================================================================
*** gcc/targhooks.c     (revision 126162)
--- gcc/targhooks.c     (working copy)
*************** tree default_mangle_decl_assembler_name
*** 634,637 ****
--- 634,653 ----
     return id;
  }

+ bool
+ default_builtin_vector_alignment_reachable (tree type, bool is_packed)
+ {
+   if (is_packed)
+     return false;
+ 
+   /* Assuming that types whose size is > pointer-size are not guaranteed to
be
+      naturally aligned.  */
+   if (tree_int_cst_compare (TYPE_SIZE (type), bitsize_int (POINTER_SIZE)) >
0)
+     return false;
+ 
+   /* Assuming that types whose size is <= pointer-size
+      are naturally aligned.  */
+   return true;
+ }
+ 
  #include "gt-targhooks.h"
Index: gcc/targhooks.h
===================================================================
*** gcc/targhooks.h     (revision 126162)
--- gcc/targhooks.h     (working copy)
*************** extern tree default_builtin_vectorized_c
*** 62,67 ****
--- 62,69 ----

  extern tree default_builtin_reciprocal (enum built_in_function, bool, bool);

+ extern bool default_builtin_vector_alignment_reachable (tree, bool);
+
  /* These are here, and not in hooks.[ch], because not all users of
     hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */

Index: gcc/tree.h
===================================================================
*** gcc/tree.h  (revision 126162)
--- gcc/tree.h  (working copy)
*************** extern tree get_inner_reference (tree, H
*** 4327,4332 ****
--- 4327,4338 ----
                                 tree *, enum machine_mode *, int *, int *,
                                 bool);

+ /* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF,
+    look for whether EXP or any nested component-refs within EXP is marked
+    as PACKED.  */
+ 
+ extern bool contains_packed_reference (tree exp);
+
  /* Return 1 if T is an expression that get_inner_reference handles.  */

  extern int handled_component_p (tree);
Index: gcc/target.h
===================================================================
*** gcc/target.h        (revision 126162)
--- gcc/target.h        (working copy)
*************** struct gcc_target
*** 413,418 ****
--- 413,422 ----
         element-by-element products for the odd elements.  */
      tree (* builtin_mul_widen_even) (tree);
      tree (* builtin_mul_widen_odd) (tree);
+ 
+     /* Return true if vector alignment is reachable (by peeling N
+        interations) for the given type.  */
+     bool (* vector_alignment_reachable) (tree, bool);
    } vectorize;

    /* The initial value of target_flags.  */
Index: gcc/testsuite/gcc.dg/vect/vect-align-1.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-align-1.c    (revision 0)
--- gcc/testsuite/gcc.dg/vect/vect-align-1.c    (revision 0)
***************
*** 0 ****
--- 1,50 ----
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdlib.h>
+ #include <stdarg.h>
+ #include "tree-vect.h"
+
+ /* Compile time known misalignment. Cannot use loop peeling to align
+    the store.  */
+ 
+ #define N 16
+
+ struct foo {
+   char x;
+   int y[N];
+ } __attribute__((packed));
+
+ int
+ main1 (struct foo * __restrict__ p)
+ {
+   int i;
+   int x[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ 
+   for (i = 0; i < N; i++)
+     {
+       p->y[i] = x[i];
+     }
+ 
+   /* check results:  */
+   for (i = 0; i < N; i++)
+     {
+       if (p->y[i] != x[i])
+       abort ();
+     }
+   return 0;
+ }
+
+ 
+ int main (void)
+ {
+   int i;
+   struct foo *p = malloc (2*sizeof (struct foo));
+   check_vect ();
+   
+   main1 (p);
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "Alignment of access forced using
versioning" 1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/pr25413a.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr25413a.c        (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr25413a.c        (revision 0)
***************
*** 0 ****
--- 1,129 ----
+ /* { dg-require-effective-target vect_double } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+ 
+ #define N 8
+
+ typedef unsigned int size_t;
+ 
+ extern void *malloc (size_t __size) __attribute__ ((__nothrow__))
__attribute__ ((__malloc__));
+ 
+ typedef double num_t;
+ static const num_t num__infty = ((num_t)1.0)/((num_t)0.0);
+ 
+ struct oct_tt;
+ typedef struct oct_tt oct_t;
+ 
+ typedef unsigned int var_t;
+ typedef enum {
+   OCT_EMPTY = 0,
+   OCT_NORMAL = 1,
+   OCT_CLOSED = 2
+ } oct_state;
+ 
+ struct oct_tt {
+   var_t n;
+ 
+   int ref;
+ 
+   oct_state state;
+   struct oct_tt* closed;
+ 
+   num_t* c;
+ };
+ 
+ void* octfapg_mm_malloc (size_t t);
+ oct_t* octfapg_alloc (var_t n);
+ oct_t* octfapg_full_copy (oct_t* m);
+ 
+ struct mmalloc_tt;
+ typedef struct mmalloc_tt mmalloc_t;
+ 
+ struct mmalloc_tt
+ {
+   int id;
+ 
+   int nb_alloc;
+   int nb_realloc;
+   int nb_free;
+ 
+   size_t rem;
+   size_t max;
+   size_t tot;
+ 
+ };
+ 
+ typedef struct
+ {
+   size_t size;
+ 
+   mmalloc_t* mm;
+   int id;
+ 
+   double dummy;
+ 
+ } mmheader_t;
+
+ void*
+ octfapg_mm_malloc (size_t t)
+ {
+   char* m = (char*)malloc(t+sizeof(mmheader_t));
+   return m+sizeof(mmheader_t);
+ }
+ 
+ oct_t* octfapg_empty (var_t n);
+ 
+ oct_t*
+ octfapg_empty (const var_t n)
+ {
+   oct_t* m;
+   /*octfapg_timing_enter("oct_empty",3);*/
+   m = ((oct_t*) octfapg_mm_malloc (sizeof(oct_t)));
+   m->n = n;
+   m->ref = 1;
+   m->state = OCT_EMPTY;
+   m->closed = (oct_t*)((void *)0);
+   m->c = (num_t*)((void *)0);
+   /*octfapg_timing_exit("oct_empty",3);*/
+   return m;
+ }
+
+ oct_t*
+ octfapg_alloc (const var_t n)
+ {
+   size_t nn = (2*(size_t)(n)*((size_t)(n)+1));
+   oct_t* m;
+   m = octfapg_empty(n);
+   m->c = ((num_t*) octfapg_mm_malloc (sizeof(num_t)*(nn)));
+   ;
+   m->state = OCT_NORMAL;
+   m->closed = (oct_t*)((void *)0);
+   return m;
+ }
+ 
+ oct_t*
+ octfapg_universe (const var_t n)
+ {
+   oct_t* m;
+   size_t i, nn = (2*(size_t)(n)*((size_t)(n)+1));
+   m = octfapg_alloc(n);
+   for (i=0;i<nn;i++) *(m->c+i) = num__infty;
+   for (i=0;i<2*n;i++)
*(m->c+((size_t)(i)+(((size_t)(i)+1)*((size_t)(i)+1))/2)) = (num_t)(0);
+   m->state = OCT_CLOSED;
+   return m;
+ }
+ 
+ int main (void)
+ { 
+   int i;
+   check_vect ();
+
+   oct_t *p = octfapg_universe(10);
+   return 0;
+ } 
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable"
1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "Alignment of access forced using
versioning" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/vect-align-2.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/vect-align-2.c    (revision 0)
--- gcc/testsuite/gcc.dg/vect/vect-align-2.c    (revision 0)
***************
*** 0 ****
--- 1,46 ----
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-do run } */
+
+ #include <stdlib.h>
+ #include <stdarg.h>
+ #include "tree-vect.h"
+ 
+ /* Compile time unknown misalignment. Cannot use loop peeling to align
+    the store.  */
+ 
+ #define N 17
+ 
+ struct foo {
+   char x0;
+   int y[N][N];
+ } __attribute__ ((packed));
+ 
+ struct foo f2;
+ int z[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ 
+ void fbar(struct foo *fp)
+ {
+   int i,j;
+    for (i=0; i<N; i++)
+       for (j=0; j<N; j++)
+         f2.y[i][j] = z[i];
+
+    for (i=0; i<N; i++)
+       for (j=0; j<N; j++)
+       if (f2.y[i][j] != z[i])
+         abort ();
+ }
+
+ int main (void)
+ {
+   struct foo  *fp = (struct foo *) malloc (2*sizeof (struct foo));
+ 
+   fbar(fp);
+   return 0;
+ }
+ 
+ 
+ /* { dg-final { scan-tree-dump-times "Alignment of access forced using
peeling" 0 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "Alignment of access forced using
versioning" 1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/pr31699.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr31699.c (revision 126162)
--- gcc/testsuite/gcc.dg/vect/pr31699.c (working copy)
*************** int main()
*** 31,35 ****
  }

  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target
vect_intfloat_cvt } } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using
peeling" 1 "vect" { xfail vect_no_align } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 31,36 ----
  }

  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target
vect_intfloat_cvt } } } */
! /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable"
1 "vect" } } */
! /* { dg-final { scan-tree-dump-times "Alignment of access forced using
versioning" 1 "vect" } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/pr25413.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr25413.c (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr25413.c (revision 0)
***************
*** 0 ****
--- 1,37 ----
+ /* { dg-require-effective-target vect_double } */
+ 
+ #include <stdarg.h>
+ #include "tree-vect.h"
+ 
+ #define N 8
+ 
+ struct
+ {
+   char c;
+   double d[N];
+ } a;
+ 
+ int main1()
+ {
+   int i;
+   for ( i=0; i<N; ++i )
+     a.d[i]=1;
+   return 0;
+ }
+ 
+ int main (void)
+ { 
+   int i;
+   check_vect ();
+   
+   main1 ();
+   for (i=0; i<N; i++)
+     if (a.d[i] != 1)
+       abort ();
+   return 0;
+ } 
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "vector alignment may not be reachable"
1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned
store" 1 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/expr.c
===================================================================
*** gcc/expr.c  (revision 126162)
--- gcc/expr.c  (working copy)
*************** get_inner_reference (tree exp, HOST_WIDE
*** 5924,5929 ****
--- 5924,5966 ----
    return exp;
  }

+ bool
+ contains_packed_reference (tree exp)
+ {
+   bool packed_p = false;
+
+   while (1)
+     {
+       switch (TREE_CODE (exp))
+       {
+       case COMPONENT_REF:
+         {
+           tree field = TREE_OPERAND (exp, 1);
+           packed_p = DECL_PACKED (field) 
+                      || TYPE_PACKED (TREE_TYPE (field)) /* CHECKME */
+                      || TYPE_PACKED (TREE_TYPE (exp));
+           if (packed_p)
+             goto done;
+         }
+         break;
+ 
+       case BIT_FIELD_REF:
+       case ARRAY_REF:
+       case ARRAY_RANGE_REF:
+       case REALPART_EXPR:
+       case IMAGPART_EXPR:
+       case VIEW_CONVERT_EXPR:
+         break;
+ 
+       default:
+         goto done;
+       }
+       exp = TREE_OPERAND (exp, 0);
+     }
+  done:
+   return packed_p;
+ }
+ 
  /* Return a tree of sizetype representing the size, in bytes, of the element
     of EXP, an ARRAY_REF.  */

Index: gcc/tree-vect-analyze.c
===================================================================
*** gcc/tree-vect-analyze.c     (revision 126162)
--- gcc/tree-vect-analyze.c     (working copy)
*************** Software Foundation, 51 Franklin Street,
*** 25,30 ****
--- 25,31 ----
  #include "tm.h"
  #include "ggc.h"
  #include "tree.h"
+ #include "target.h"
  #include "basic-block.h"
  #include "diagnostic.h"
  #include "tree-flow.h"
*************** vect_verify_datarefs_alignment (loop_vec
*** 1379,1384 ****
--- 1380,1449 ----
  }


+ static bool
+ vector_alignment_reachable_p (struct data_reference *dr)
+ {
+   tree stmt = DR_STMT (dr);
+   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ 
+   if (DR_GROUP_FIRST_DR (stmt_info))
+     {
+       /* For interleaved access we peel only if number of iterations in
+        the prolog loop ({VF - misalignment}), is a multiple of the
+        number of the interleaved accesses.  */
+       int elem_size, mis_in_elements;
+       int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ 
+       /* FORNOW: handle only known alignment.  */
+       if (!known_alignment_for_access_p (dr))
+       return false;
+ 
+       elem_size = UNITS_PER_SIMD_WORD / nelements;
+       mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
+ 
+       if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
+       return false;
+     }
+ 
+   /* If misalignment is known at the compiler time then apply peeling
+      only if natural alignment is reachable through peeling.  */
+   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
+     {
+       HOST_WIDE_INT elmsize = 
+               int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+       if (DR_MISALIGNMENT (dr) % elmsize)
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           {
+             fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC,
elmsize);
+             fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT
(dr));
+             fprintf (vect_dump, "data size does not divide the
misalignment.\n");
+           }
+         return false;
+       }
+     }
+ 
+   if (!known_alignment_for_access_p (dr))
+     {
+       tree type = (TREE_TYPE (DR_REF (dr)));
+       tree ba = DR_BASE_OBJECT (dr);
+       bool is_packed = false;
+ 
+       if (ba)
+       is_packed = contains_packed_reference (ba);
+
+       if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
+       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
+       return true;
+       else
+       return false;
+     }
+ 
+   return true;
+ }
+ 
  /* Function vect_enhance_data_refs_alignment

     This pass will use loop versioning and loop peeling in order to enhance
*************** vect_enhance_data_refs_alignment (loop_v
*** 1540,1572 ****

        if (!DR_IS_READ (dr) && !aligned_access_p (dr))
          {
!         if (DR_GROUP_FIRST_DR (stmt_info))
!           {
!             /* For interleaved access we peel only if number of iterations in
!                the prolog loop ({VF - misalignment}), is a multiple of the
!                number of the interleaved accesses.  */
!             int elem_size, mis_in_elements;
!             tree vectype = STMT_VINFO_VECTYPE (stmt_info);
!             int nelements = TYPE_VECTOR_SUBPARTS (vectype);
! 
!             /* FORNOW: handle only known alignment.  */
!             if (!known_alignment_for_access_p (dr))
!               {
!                 do_peeling = false;
!                 break;
!               }
! 
!             elem_size = UNITS_PER_SIMD_WORD / nelements;
!             mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
! 
!             if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
!               {
!                 do_peeling = false;
!                 break;
!               }
!           }
!         dr0 = dr;
!         do_peeling = true;
          break;
        }
      }
--- 1605,1615 ----

        if (!DR_IS_READ (dr) && !aligned_access_p (dr))
          {
!         do_peeling = vector_alignment_reachable_p (dr);
!         if (do_peeling)
!           dr0 = dr;
!         if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
!             fprintf (vect_dump, "vector alignment may not be reachable");
          break;
        }
      }
Index: gcc/target-def.h
===================================================================
*** gcc/target-def.h    (revision 126162)
--- gcc/target-def.h    (working copy)
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 356,361 ****
--- 356,364 ----
    default_builtin_vectorized_conversion
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
  #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
+ #define TARGET_VECTOR_ALIGNMENT_REACHABLE \
+   default_builtin_vector_alignment_reachable
+ 

  #define TARGET_VECTORIZE                                                \
    {                                                                   \
*************** Foundation, 51 Franklin Street, Fifth Fl
*** 363,369 ****
      TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION,                     \
      TARGET_VECTORIZE_BUILTIN_CONVERSION,                              \
      TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                            \
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD                            \
    }

  #define TARGET_DEFAULT_TARGET_FLAGS 0
--- 366,373 ----
      TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION,                     \
      TARGET_VECTORIZE_BUILTIN_CONVERSION,                              \
      TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,                            \
!     TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD,                           \
!     TARGET_VECTOR_ALIGNMENT_REACHABLE                                 \
    }

  #define TARGET_DEFAULT_TARGET_FLAGS 0
Index: gcc/config/rs6000/rs6000.c
===================================================================
*** gcc/config/rs6000/rs6000.c  (revision 126162)
--- gcc/config/rs6000/rs6000.c  (working copy)
*************** static tree rs6000_builtin_mul_widen_odd
*** 717,722 ****
--- 717,723 ----
  static tree rs6000_builtin_conversion (enum tree_code, tree);

  static void def_builtin (int, const char *, tree, int);
+ static bool rs6000_vector_alignment_reachable (tree, bool);
  static void rs6000_init_builtins (void);
  static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx);
  static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx);
*************** static const char alt_reg_names[][8] =
*** 984,989 ****
--- 985,993 ----
  #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
  #define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion

+ #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
+ #define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
+ 
  #undef TARGET_INIT_BUILTINS
  #define TARGET_INIT_BUILTINS rs6000_init_builtins

*************** rs6000_builtin_mul_widen_odd (tree type)
*** 1806,1811 ****
--- 1810,1844 ----
      }
  }

+ 
+ /* Return true iff, data reference of TYPE can reach vector alignment (16)
+    after applying N number of iterations.  This routine does not determine
+    how may iterations are required to reach desired alignment.  */
+
+ static bool
+ rs6000_vector_alignment_reachable (tree type, bool is_packed)
+ {
+   if (is_packed)
+     return false;
+
+   if (TARGET_MACHO)
+     {
+       if (TARGET_32BIT)
+       {
+         if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
+           return true;
+ 
+         if (rs6000_alignment_flags ==  MASK_ALIGN_POWER)
+           return true;
+       }
+       return false;
+     }
+ 
+   /* Assuming that all other types are naturally aligned. CHECKME!  */
+   return true;
+ }
+ 
+
  /* Handle generic options of the form -mfoo=yes/no.
     NAME is the option name.
     VALUE is the option value.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25413

Reply via email to