[PATCH] D75904: [ARM][CodeGen] Fixing stack alignment of HFA arguments on AArch32 PCS

Lucas Prates via Phabricator via cfe-commits Tue, 10 Mar 2020 04:31:04 -0700

pratlucas created this revision.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.
pratlucas added a parent revision: D75903: [AArch64][CodeGen] Fixing stack 
alignment of HFA arguments on AArch64 PCS.
pratlucas added reviewers: t.p.northover, olista01, rnk, asl.


Properly complying with AArch32 PCS on the handling of over-aligned HFA
arguments when those are placed on the stack. AAPCS specifies that the
stacked argument address should be adjusted upwards until correctly
aligned for the argument before copying it to memory.

This patch fixes the alignment of these arguments by makign use of the
stack alignment propagated through the `alignstack` IR argument
attribute during the calling convention lowering for ARM targets.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D75904

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/arm-aapcs-vfp.c
  llvm/lib/Target/ARM/ARMCallingConv.cpp
  llvm/test/CodeGen/ARM/aapcs-hfa-code.ll

Index: llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
===================================================================
--- llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
+++ llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -3,6 +3,8 @@
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
 
+%struct.hfa_align = type { [2 x float] }
+
 define arm_aapcs_vfpcc void @test_1float({ float } %a) {
   call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 })
   ret void
@@ -104,3 +106,73 @@
 
   ret void
 }
+
+; Over-aligned HFA argument placed on register - one element per register
+define arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_reg:
+; CHECK-DAG: bx lr
+
+; CHECK-M4F-LABEL: test_hfa_align_reg:
+; CHECK-M4F-DAG: bx lr
+
+  %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0
+  ret float %h1.coerce.fca.0.0.extract
+}
+
+; Call with over-align HFA argument placed on registers - one element per register
+define arm_aapcs_vfpcc float @test_hfa_align_reg_call() local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_reg_call:
+; CHECK-DAG: vmov.f32	s0, #1.000000e+00
+; CHECK-DAG: vmov.f32	s1, #2.000000e+00
+; CHECK-DAG: bl	test_hfa_align_reg
+
+; CHECK-M4F-LABEL: test_hfa_align_reg_call:
+; CHECK-M4F-DAG: vmov.f32	s0, #1.000000e+00
+; CHECK-M4F-DAG: vmov.f32	s1, #2.000000e+00
+; CHECK-M4F-DAG: bl	test_hfa_align_reg
+
+  %call = call arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5
+  ret float %call
+}
+
+; Over-aligned HFA argument placed on the stack - stack round up to alignment
+define arm_aapcs_vfpcc float @test_hfa_align_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f1, %struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_stack:
+; CHECK-DAG: vldr	s0, [sp, #8]
+; CHECK-DAG: bx	lr
+
+; CHECK-M4F-LABEL: test_hfa_align_stack:
+; CHECK-M4F-DAG: vldr	s0, [sp, #8]
+; CHECK-M4F-DAG: bx	lr
+
+  %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0
+  ret float %h1.coerce.fca.0.0.extract
+}
+
+; Call with over-aligned HFA argument placed on the stack - stack round up to alignment
+define arm_aapcs_vfpcc float @test_hfa_align_stack_call() local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_stack_call:
+; CHECK-DAG: sub	sp, sp, #16
+; CHECK-DAG: mov	r0, #1073741824
+; CHECK-DAG: mov	r1, #1065353216
+; CHECK-DAG: str	r1, [sp, #8]
+; CHECK-DAG: str	r0, [sp, #12]
+; CHECK-DAG: bl	test_hfa_align_stack
+; CHECK-DAG: add	sp, sp, #16
+
+; CHECK-M4F-LABEL: test_hfa_align_stack_call:
+; CHECK-M4F-DAG: sub	sp, #16
+; CHECK-M4F-DAG: mov.w	r0, #1073741824
+; CHECK-M4F-DAG: mov.w	r1, #1065353216
+; CHECK-M4F-DAG: strd	r1, r0, [sp, #8]
+; CHECK-M4F-DAG: bl	test_hfa_align_stack
+; CHECK-M4F-DAG: add	sp, #16
+
+  %call = call arm_aapcs_vfpcc float @test_hfa_align_stack(double undef, double undef, double undef, double undef, double undef, double undef, double undef, double undef, float undef, %struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5
+  ret float %call
+}
+
Index: llvm/lib/Target/ARM/ARMCallingConv.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -266,7 +266,10 @@
   // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
   // be allocating a bunch of i32 slots).
   unsigned RestAlign = std::min(Align, Size);
-
+  if (ArgFlags.getStackAlign()) {
+    const llvm::Align ArgStackAlign(ArgFlags.getStackAlign());
+    Align = std::max(Align, unsigned(ArgStackAlign.value()));
+  }
   for (auto &It : PendingMembers) {
     It.convertToMem(State.AllocateStack(Size, Align));
     State.addLoc(It);
Index: clang/test/CodeGen/arm-aapcs-vfp.c
===================================================================
--- clang/test/CodeGen/arm-aapcs-vfp.c
+++ clang/test/CodeGen/arm-aapcs-vfp.c
@@ -147,3 +147,17 @@
 // is passed ByVal (due to being > 64 bytes), so the backend handles this instead.
 void test_vfp_stack_gpr_split_6(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_seventeen_ints k) {}
 // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_6(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, %struct.struct_seventeen_ints* byval(%struct.struct_seventeen_ints) align 4 %k)
+
+// Make sure over-alignment information is propagated to the backend properly
+typedef struct {
+  __attribute__((__aligned__(8))) float v[2];
+} hfa_align;
+// CHECK: define arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %h1.coerce) #0
+float test_hfa_align_arg(hfa_align h1) {
+  return h1.v[0];
+}
+// CHECK: %call = call arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %1) #4
+float test_hfa_align_call() {
+  hfa_align h = {1.0, 2.0};
+  return test_hfa_align_arg(h);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -5934,7 +5934,10 @@
       return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
     }
   }
-  return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+  bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) !=
+                             getContext().getTypeAlignInChars(Base);
+  return ABIArgInfo::getDirect(nullptr, /*Offset=*/0, /*Padding=*/nullptr,
+                               /*CanBeFlattened=*/false, NeedsStackAlignment);
 }
 
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
@@ -6000,9 +6003,13 @@
     uint64_t Members = 0;
     if (isHomogeneousAggregate(Ty, Base, Members)) {
       assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
+      bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) !=
+                                 getContext().getTypeAlignInChars(Base);
       llvm::Type *Ty =
         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
-      return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
+      return ABIArgInfo::getDirect(Ty, /*Offset=*/0, /*Padding=*/nullptr,
+                                   /*CanBeFlattened=*/false,
+                                   NeedsStackAlignment);
     }
   }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D75904: [ARM][CodeGen] Fixing stack alignment of HFA arguments on AArch32 PCS

Reply via email to