Hi,

The patch fixes a bug in the AArch64 backend in calculating the beginning address of the unnamed incoming arguments on the stack, i.e. the initial value of __va_list->__stack. aarch64_layout_arg incorrectly calculates the size of named arguments on stack using the number of registers needed as if there were enough registers available. This is wrong, as for instance when passed in registers an HFA/HVA* argument takes as many SIMD registers as the number of its fields; when passed on the stack, however, it should be passed as what its storage layout is (rounded to the nearest multiple of 8 bytes).

The bug only affects builtin va_start, as it is other routines like aarch64_pad_arg_upward rather than aarch64_layout_arg which take care of the positioning of outgoing arguments on stack and the fetching of the incoming named arguments from stack.

The patch has passed bootstrapping.

OK for the trunk and 4.9.1 branch once the regtest passes as well?

Thanks,
Yufeng

* HFA: Homogeneous Floating-point Aggregate
  HVA: Homogeneous Short-Vector Aggregate


gcc/

        PR target/61483
        * config/aarch64/aarch64.c (aarch64_layout_arg): Add new local
        variable 'size'; calculate 'size' right in the front; use
        'size' to compute 'nregs' (when 'allocate_ncrn != 0') and
        pcum->aapcs_stack_words.

gcc/testsuite/

        PR target/61483
        * gcc.target/aarch64/aapcs64/type-def.h (struct hfa_fx2_t): New type.
        * gcc.target/aarch64/aapcs64/va_arg-13.c: New test.
        * gcc.target/aarch64/aapcs64/va_arg-14.c: Ditto.
        * gcc.target/aarch64/aapcs64/va_arg-15.c: Ditto.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fabd6a9..56a5a5d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1459,6 +1459,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum 
machine_mode mode,
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
   int ncrn, nvrn, nregs;
   bool allocate_ncrn, allocate_nvrn;
+  HOST_WIDE_INT size;
 
   /* We need to do this once per argument.  */
   if (pcum->aapcs_arg_processed)
@@ -1466,6 +1467,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum 
machine_mode mode,
 
   pcum->aapcs_arg_processed = true;
 
+  /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
+  size
+    = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
+                       UNITS_PER_WORD);
+
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
                                                 mode,
@@ -1516,9 +1522,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum 
machine_mode mode,
     }
 
   ncrn = pcum->aapcs_ncrn;
-  nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
-          + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
+  nregs = size / UNITS_PER_WORD;
 
   /* C6 - C9.  though the sign and zero extension semantics are
      handled elsewhere.  This is the case where the argument fits
@@ -1567,13 +1571,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum 
machine_mode mode,
   pcum->aapcs_nextncrn = NUM_ARG_REGS;
 
   /* The argument is passed on stack; record the needed number of words for
-     this argument (we can re-use NREGS) and align the total size if
-     necessary.  */
+     this argument and align the total size if necessary.  */
 on_stack:
-  pcum->aapcs_stack_words = nregs;
+  pcum->aapcs_stack_words = size / UNITS_PER_WORD;
   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
-                                              16 / UNITS_PER_WORD) + 1;
+                                              16 / UNITS_PER_WORD);
   return;
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
index a95d06a..07e56ff 100644
--- a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h
@@ -34,6 +34,13 @@ struct hfa_fx2_t
   float b;
 };
 
+struct hfa_fx3_t
+{
+  float a;
+  float b;
+  float c;
+};
+
 struct hfa_dx2_t
 {
   double a;
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-13.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-13.c
new file mode 100644
index 0000000..27c4099
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-13.c
@@ -0,0 +1,53 @@
+/* Test AAPCS64 layout and __builtin_va_start.
+
+   Pass named HFA/HVA argument on stack.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-13.c"
+
+struct float_float_t
+{
+  float a;
+  float b;
+} float_float;
+
+union float_int_t
+{
+  float b8;
+  int b5;
+} float_int;
+
+#define HAS_DATA_INIT_FUNC
+void
+init_data ()
+{
+  float_float.a = 1.2f;
+  float_float.b = 2.2f;
+
+  float_int.b8 = 4983.80f;
+}
+
+#include "abitest.h"
+#else
+  ARG (float, 1.0f, S0, 0)
+  ARG (float, 2.0f, S1, 1)
+  ARG (float, 3.0f, S2, 2)
+  ARG (float, 4.0f, S3, 3)
+  ARG (float, 5.0f, S4, 4)
+  ARG (float, 6.0f, S5, 5)
+  ARG (float, 7.0f, S6, 6)
+  ARG (struct float_float_t, float_float, STACK, 7)
+  ARG (int,  9, W0, 8)
+  ARG (int, 10, W1, 9)
+  ARG (int, 11, W2, 10)
+  ARG (int, 12, W3, 11)
+  ARG (int, 13, W4, 12)
+  ARG (int, 14, W5, 13)
+  ARG (int, 15, W6, LAST_NAMED_ARG_ID)
+  DOTS
+  ANON (union float_int_t, float_int, W7, 15)
+  LAST_ANON (long long, 12683143434LL, STACK + 8, 16)
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-14.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-14.c
new file mode 100644
index 0000000..91080d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-14.c
@@ -0,0 +1,35 @@
+/* Test AAPCS64 layout and __builtin_va_start.
+
+   Pass named HFA/HVA argument on stack.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-14.c"
+#include "type-def.h"
+
+struct hfa_fx2_t hfa_fx2 = {1.2f, 2.2f};
+struct hfa_fx3_t hfa_fx3 = {3.2f, 4.2f, 5.2f};
+vf4_t float32x4 = {6.2f, 7.2f, 8.2f, 9.2f};
+vf4_t float32x4_2 = {10.2f, 11.2f, 12.2f, 13.2f};
+
+#include "abitest.h"
+#else
+  ARG (float, 1.0f, S0, 0)
+  ARG (float, 2.0f, S1, 1)
+  ARG (float, 3.0f, S2, 2)
+  ARG (float, 4.0f, S3, 3)
+  ARG (float, 5.0f, S4, 4)
+  ARG (float, 6.0f, S5, 5)
+  ARG (float, 7.0f, S6, 6)
+  ARG (struct hfa_fx3_t, hfa_fx3, STACK, 7)
+  /* Previous argument size has been rounded up to the nearest multiple of
+     8 bytes.  */
+  ARG (struct hfa_fx2_t, hfa_fx2, STACK + 16, 8)
+  /* NSAA is rounded up to the nearest natural alignment of float32x4.  */
+  ARG (vf4_t, float32x4, STACK + 32, 9)
+  ARG (vf4_t, float32x4_2, STACK + 48, LAST_NAMED_ARG_ID)
+  DOTS
+  LAST_ANON (double, 123456789.987, STACK + 64, 11)
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-15.c 
b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-15.c
new file mode 100644
index 0000000..2da4e46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-15.c
@@ -0,0 +1,35 @@
+/* Test AAPCS64 layout and __builtin_va_start.
+
+   Pass named __128int argument on stack.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-15.c"
+#include "type-def.h"
+
+union int128_t qword;
+
+#define HAS_DATA_INIT_FUNC
+void
+init_data ()
+{
+  /* Init signed quad-word integer.  */
+  qword.l64 = 0xfdb9753102468aceLL;
+  qword.h64 = 0xeca8642013579bdfLL;
+}
+
+#include "abitest.h"
+#else
+  ARG (int, 1, W0, 0)
+  ARG (int, 2, W1, 1)
+  ARG (int, 3, W2, 2)
+  ARG (int, 4, W3, 3)
+  ARG (int, 5, W4, 4)
+  ARG (int, 6, W5, 5)
+  ARG (int, 7, W6, 6)
+  ARG (__int128, qword.i, STACK, LAST_NAMED_ARG_ID)
+  DOTS
+  LAST_ANON (int, 8, STACK + 16, 8)
+#endif

Reply via email to