PR92999 is a case where the VFP calling convention does not allocate
enough FP registers for a homogenous aggregate containing FP16 values.
I believe this is the complete fix but would appreciate another set of
eyes on this.
Could I get a hand with a regression test run on an armhf environment
while I fix my environment ?
gcc/ChangeLog:
PR target/92999
* config/arm/arm.c (aapcs_vfp_allocate_return_reg): Adjust to handle
aggregates with elements smaller than SFmode.
gcc/testsuite/ChangeLog:
* gcc.target/arm/pr92999.c: New test.
Thanks,
Ramana
Signed-off-by: Ramana Radhakrishnan <[email protected]>
---
gcc/config/arm/arm.cc | 6 ++++-
gcc/testsuite/gcc.target/arm/pr92999.c | 31 ++++++++++++++++++++++++++
2 files changed, 36 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/arm/pr92999.c
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 2eb4d51e4a3..03f4057f717 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -6740,7 +6740,11 @@ aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant
ATTRIBUTE_UNUSED,
count *= 2;
}
}
- shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+
+ /* Aggregates can contain FP16 or BF16 values which would need to
+ be passed in via FP registers. */
+ shift = (MAX(GET_MODE_SIZE(ag_mode), GET_MODE_SIZE(SFmode))
+ / GET_MODE_SIZE(SFmode));
par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
for (i = 0; i < count; i++)
{
diff --git a/gcc/testsuite/gcc.target/arm/pr92999.c
b/gcc/testsuite/gcc.target/arm/pr92999.c
new file mode 100644
index 00000000000..faa21fdb7d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr92999.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-mfp16-format=ieee" } */
+
+//
+// Compile with gcc -mfp16-format=ieee
+// Any optimization level is fine.
+//
+// Correct output should be
+// "y.first = 1, y.second = -99"
+//
+// Buggy output is
+// "y.first = -99, y.second = -99"
+//
+#include <stdlib.h>
+struct phalf {
+ __fp16 first;
+ __fp16 second;
+};
+
+struct phalf phalf_copy(struct phalf* src) __attribute__((noinline));
+struct phalf phalf_copy(struct phalf* src) {
+ return *src;
+}
+
+int main() {
+ struct phalf x = { 1.0, -99.0};
+ struct phalf y = phalf_copy(&x);
+ if (y.first != 1.0 && y.second != -99.0)
+ abort();
+ return 0;
+}
--
2.34.1