This patch is a revised version of one previously posted by Pat
Wellander a couple of years ago, to align arrays of size >= 16 bytes on
a 16-byte boundary to make better use of Altivec instructions.
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01345.html
The original patch only increased the alignment of local arrays, which
works because the stack is already aligned at 16 bytes for Altivec. The
comments on the original patch submission were that this should apply to
all arrays and that it should also look for structs containing arrays
that would benefit from being 16-byte-aligned as well. I've made both
those changes here, and enabled the additional alignment only when not
-Os to address concerns about wasting memory.
I noted that the additional alignment of static arrays is partially
redundant with existing generic vector optimizations; I xfailed a couple
of tests that were failing because there was nothing left for those
passes to do any more.
I regression-tested this on powerpc-none-eabi. OK to commit?
-Sandra
2013-05-20 Sandra Loosemore <san...@codesourcery.com>
Pat Wellander <pwell...@codesourcery.com>
gcc/
* config/rs6000/rs6000-protos.h (rs6000_data_alignment): Declare.
* config/rs6000/rs6000.c (align_for_altivec): New.
(rs6000_data_alignment): New.
* config/rs6000/rs6000.h (DATA_ALIGNMENT): Expand to call
rs6000_data_alignment.
gcc/testsuite/
* gcc.target/powerpc/altivec-35.c: New test case.
* gcc.dg/vect/section-anchors-vect-69.c: Xfail for Altivec.
* gcc.dg/vect/aligned-section-anchors-nest-1.c: Likewise.
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h (revision 199031)
+++ gcc/config/rs6000/rs6000-protos.h (working copy)
@@ -28,6 +28,7 @@
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
tree, enum machine_mode);
+extern unsigned rs6000_data_alignment (const_tree, unsigned);
#endif /* TREE_CODE */
extern bool easy_altivec_constant (rtx, enum machine_mode);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 199031)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -5119,6 +5119,74 @@ invalid_e500_subreg (rtx op, enum machin
return false;
}
+/* Return true if TYPE is a candidate for extra alignment on Altivec.
+ This includes arrays with size of at least 16 bytes (128 bits) and
+ structs/unions with such an array at an offset that is a multiple
+ of 16 bytes. */
+static bool
+align_for_altivec (const_tree type)
+{
+ if (TREE_CODE (type) == ARRAY_TYPE
+ && tree_low_cst (TYPE_SIZE (type), 1) >= 128)
+ return true;
+
+ else if (TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE)
+ {
+ tree field;
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ if (TREE_CODE (field) == FIELD_DECL
+ && (tree_low_cst (DECL_FIELD_OFFSET (field), 1) & 0xf) == 0
+ && align_for_altivec (TREE_TYPE (field)))
+ return true;
+ }
+
+ return false;
+}
+
+
+/* Implement DATA_ALIGNMENT. */
+
+unsigned
+rs6000_data_alignment (const_tree type, unsigned align)
+{
+ /* Align SPE vectors and paired floats to 64 bits and other vectors
+ to 128 bits. */
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ {
+ if (TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
+ return 64;
+ else if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))
+ return 64;
+ else
+ return 128;
+ }
+
+ /* Align E500 v2 doubles to 64 bits. */
+ if (TARGET_E500_DOUBLE
+ && TREE_CODE (type) == REAL_TYPE
+ && TYPE_MODE (type) == DFmode)
+ return 64;
+
+ /* Align Altivec arrays >= 16 bytes on a 16-byte boundary, but only
+ if not -Os. Also check for structs or unions containing such an array
+ appropriately aligned within the struct, and align them, too.
+ Note that on Altivec the stack is 16-byte aligned, otherwise this
+ would not work for local array variables. */
+ if (TARGET_ALTIVEC && !optimize_size && align_for_altivec (type))
+ return 128;
+
+ /* Make char arrays word-aligned so strcpy will be faster. */
+ if (TREE_CODE (type) == ARRAY_TYPE
+ && TYPE_MODE (TREE_TYPE (type)) == QImode
+ && align < BITS_PER_WORD)
+ return BITS_PER_WORD;
+
+ /* Use the specified alignment. */
+ return align;
+}
+
+
/* AIX increases natural record alignment to doubleword if the first
field is an FP double while the FP fields remain word aligned. */
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h (revision 199031)
+++ gcc/config/rs6000/rs6000.h (working copy)
@@ -782,21 +782,10 @@ extern unsigned rs6000_pointer_size;
? BITS_PER_WORD \
: (ALIGN))
-/* Make arrays of chars word-aligned for the same reasons.
- Align vectors to 128 bits. Align SPE vectors and E500 v2 doubles to
- 64 bits. */
+/* Data alignment requirements for SPE and Altivec are complicated enough
+ that we defer to a helper function. */
#define DATA_ALIGNMENT(TYPE, ALIGN) \
- (TREE_CODE (TYPE) == VECTOR_TYPE \
- ? (((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (TYPE))) \
- || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) \
- ? 64 : 128) \
- : ((TARGET_E500_DOUBLE \
- && TREE_CODE (TYPE) == REAL_TYPE \
- && TYPE_MODE (TYPE) == DFmode) \
- ? 64 \
- : (TREE_CODE (TYPE) == ARRAY_TYPE \
- && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \
- && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN)))
+ (rs6000_data_alignment ((TYPE), (ALIGN)))
/* Nonzero if move instructions will actually fail to work
when given unaligned data. */
Index: gcc/testsuite/gcc.target/powerpc/altivec-35.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/altivec-35.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/altivec-35.c (revision 0)
@@ -0,0 +1,64 @@
+/* altivec-35.c */
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O0 -maltivec" } */
+
+#include <stdlib.h>
+
+/* Check Alignment of large Altivec arrays is at least 128 bits. */
+
+struct s
+{
+ int a[5];
+ char b;
+};
+
+union u
+{
+ int a;
+ struct s ss;
+};
+
+long a1[5], a2[6], a3[4];
+char c1[17];
+struct s s1;
+union u u1;
+
+#define CHECK(ADDR) \
+ if (((long) (ADDR)) & 0xf) abort ();
+
+int main(void)
+{
+ long aa1[5], aa2[6], aa3[4];
+ char cc1[17];
+ struct s ss1;
+ union u uu1;
+
+ static long aaa1[5], aaa2[6], aaa3[4];
+ static char ccc1[17];
+ static struct s sss1;
+ static union u uuu1;
+
+ CHECK (a1);
+ CHECK (a2);
+ CHECK (a3);
+ CHECK (c1);
+ CHECK (&s1);
+ CHECK (&u1);
+
+ CHECK (aa1);
+ CHECK (aa2);
+ CHECK (aa3);
+ CHECK (cc1);
+ CHECK (&ss1);
+ CHECK (&uu1);
+
+ CHECK (aaa1);
+ CHECK (aaa2);
+ CHECK (aaa3);
+ CHECK (ccc1);
+ CHECK (&sss1);
+ CHECK (&uuu1);
+
+ return 0;
+}
Index: gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c (revision 199031)
+++ gcc/testsuite/gcc.dg/vect/section-anchors-vect-69.c (working copy)
@@ -115,6 +115,7 @@ int main (void)
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* Alignment forced using versioning until the pass that increases alignment
is extended to handle structs. */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" { target {vect_int && vector_alignment_reachable } } } } */
+/* Altivec already aligns these arrays appropriately by default. */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" { target {vect_int && vector_alignment_reachable } xfail { powerpc_altivec_ok } } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 4 "vect" { target {vect_int && {! vector_alignment_reachable} } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/aligned-section-anchors-nest-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/aligned-section-anchors-nest-1.c (revision 199031)
+++ gcc/testsuite/gcc.dg/vect/aligned-section-anchors-nest-1.c (working copy)
@@ -30,5 +30,7 @@ int *foo(void)
return &c[0][0];
}
-/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3 "increase_alignment" } } */
+/* Altivec already aligns these arrays appropriately by default, so there
+ is nothing for the increase_alignment pass to do. */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3 "increase_alignment" { xfail { powerpc_altivec_ok } } } } */
/* { dg-final { cleanup-ipa-dump "increase_alignment" } } */