PR92999 is a case where the VFP calling convention does not allocate
enough FP registers for a homogenous aggregate containing FP16 values.
I believe this is the complete fix but would appreciate another set of
eyes on this.

Could I get a hand with a regression test run on an armhf environment
while I fix my environment ?

gcc/ChangeLog:

PR target/92999
*  config/arm/arm.c (aapcs_vfp_allocate_return_reg): Adjust to handle
aggregates with elements smaller than SFmode.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr92999.c: New test.


Thanks,
Ramana

Signed-off-by: Ramana Radhakrishnan <ramana....@gmail.com>
---
 gcc/config/arm/arm.cc                  |  6 ++++-
 gcc/testsuite/gcc.target/arm/pr92999.c | 31 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/pr92999.c

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 2eb4d51e4a3..03f4057f717 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -6740,7 +6740,11 @@ aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant 
ATTRIBUTE_UNUSED,
              count *= 2;
            }
        }
-      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+
+      /* Aggregates can contain FP16 or BF16 values which would need to
+        be passed in via FP registers.  */
+      shift = (MAX(GET_MODE_SIZE(ag_mode), GET_MODE_SIZE(SFmode))
+              / GET_MODE_SIZE(SFmode));
       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
       for (i = 0; i < count; i++)
        {
diff --git a/gcc/testsuite/gcc.target/arm/pr92999.c 
b/gcc/testsuite/gcc.target/arm/pr92999.c
new file mode 100644
index 00000000000..faa21fdb7d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr92999.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-mfp16-format=ieee" } */
+
+//
+// Compile with gcc -mfp16-format=ieee
+// Any optimization level is fine.
+//
+// Correct output should be
+// "y.first = 1, y.second = -99"
+//
+// Buggy output is
+// "y.first = -99, y.second = -99"
+//
+#include <stdlib.h>
+struct phalf {
+    __fp16 first;
+    __fp16 second;
+};
+
+struct phalf phalf_copy(struct phalf* src) __attribute__((noinline));
+struct phalf phalf_copy(struct phalf* src) {
+    return *src;
+}
+
+int main() {
+    struct phalf x = { 1.0, -99.0};
+    struct phalf y = phalf_copy(&x);
+    if (y.first != 1.0 && y.second != -99.0)
+       abort();
+    return 0;
+}
-- 
2.34.1

Reply via email to