pratlucas created this revision.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya,
kristof.beyls.
Herald added projects: clang, LLVM.
pratlucas added a parent revision: D75903: [AArch64][CodeGen] Fixing stack
alignment of HFA arguments on AArch64 PCS.
pratlucas added reviewers: t.p.northover, olista01, rnk, asl.
Properly complying with AArch32 PCS on the handling of over-aligned HFA
arguments when those are placed on the stack. AAPCS specifies that the
stacked argument address should be adjusted upwards until correctly
aligned for the argument before copying it to memory.
This patch fixes the alignment of these arguments by makign use of the
stack alignment propagated through the `alignstack` IR argument
attribute during the calling convention lowering for ARM targets.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D75904
Files:
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGen/arm-aapcs-vfp.c
llvm/lib/Target/ARM/ARMCallingConv.cpp
llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
Index: llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
===================================================================
--- llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
+++ llvm/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -3,6 +3,8 @@
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+%struct.hfa_align = type { [2 x float] }
+
define arm_aapcs_vfpcc void @test_1float({ float } %a) {
call arm_aapcs_vfpcc void @test_1float({ float } { float 1.0 })
ret void
@@ -104,3 +106,73 @@
ret void
}
+
+; Over-aligned HFA argument placed on register - one element per register
+define arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_reg:
+; CHECK-DAG: bx lr
+
+; CHECK-M4F-LABEL: test_hfa_align_reg:
+; CHECK-M4F-DAG: bx lr
+
+ %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0
+ ret float %h1.coerce.fca.0.0.extract
+}
+
+; Call with over-align HFA argument placed on registers - one element per register
+define arm_aapcs_vfpcc float @test_hfa_align_reg_call() local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_reg_call:
+; CHECK-DAG: vmov.f32 s0, #1.000000e+00
+; CHECK-DAG: vmov.f32 s1, #2.000000e+00
+; CHECK-DAG: bl test_hfa_align_reg
+
+; CHECK-M4F-LABEL: test_hfa_align_reg_call:
+; CHECK-M4F-DAG: vmov.f32 s0, #1.000000e+00
+; CHECK-M4F-DAG: vmov.f32 s1, #2.000000e+00
+; CHECK-M4F-DAG: bl test_hfa_align_reg
+
+ %call = call arm_aapcs_vfpcc float @test_hfa_align_reg(%struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5
+ ret float %call
+}
+
+; Over-aligned HFA argument placed on the stack - stack round up to alignment
+define arm_aapcs_vfpcc float @test_hfa_align_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f1, %struct.hfa_align alignstack(8) %h1.coerce) local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_stack:
+; CHECK-DAG: vldr s0, [sp, #8]
+; CHECK-DAG: bx lr
+
+; CHECK-M4F-LABEL: test_hfa_align_stack:
+; CHECK-M4F-DAG: vldr s0, [sp, #8]
+; CHECK-M4F-DAG: bx lr
+
+ %h1.coerce.fca.0.0.extract = extractvalue %struct.hfa_align %h1.coerce, 0, 0
+ ret float %h1.coerce.fca.0.0.extract
+}
+
+; Call with over-aligned HFA argument placed on the stack - stack round up to alignment
+define arm_aapcs_vfpcc float @test_hfa_align_stack_call() local_unnamed_addr #3 {
+entry:
+; CHECK-LABEL: test_hfa_align_stack_call:
+; CHECK-DAG: sub sp, sp, #16
+; CHECK-DAG: mov r0, #1073741824
+; CHECK-DAG: mov r1, #1065353216
+; CHECK-DAG: str r1, [sp, #8]
+; CHECK-DAG: str r0, [sp, #12]
+; CHECK-DAG: bl test_hfa_align_stack
+; CHECK-DAG: add sp, sp, #16
+
+; CHECK-M4F-LABEL: test_hfa_align_stack_call:
+; CHECK-M4F-DAG: sub sp, #16
+; CHECK-M4F-DAG: mov.w r0, #1073741824
+; CHECK-M4F-DAG: mov.w r1, #1065353216
+; CHECK-M4F-DAG: strd r1, r0, [sp, #8]
+; CHECK-M4F-DAG: bl test_hfa_align_stack
+; CHECK-M4F-DAG: add sp, #16
+
+ %call = call arm_aapcs_vfpcc float @test_hfa_align_stack(double undef, double undef, double undef, double undef, double undef, double undef, double undef, double undef, float undef, %struct.hfa_align alignstack(8) { [2 x float] [float 1.000000e+00, float 2.000000e+00] }) #5
+ ret float %call
+}
+
Index: llvm/lib/Target/ARM/ARMCallingConv.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -266,7 +266,10 @@
// possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
// be allocating a bunch of i32 slots).
unsigned RestAlign = std::min(Align, Size);
-
+ if (ArgFlags.getStackAlign()) {
+ const llvm::Align ArgStackAlign(ArgFlags.getStackAlign());
+ Align = std::max(Align, unsigned(ArgStackAlign.value()));
+ }
for (auto &It : PendingMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
Index: clang/test/CodeGen/arm-aapcs-vfp.c
===================================================================
--- clang/test/CodeGen/arm-aapcs-vfp.c
+++ clang/test/CodeGen/arm-aapcs-vfp.c
@@ -147,3 +147,17 @@
// is passed ByVal (due to being > 64 bytes), so the backend handles this instead.
void test_vfp_stack_gpr_split_6(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_seventeen_ints k) {}
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_6(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, %struct.struct_seventeen_ints* byval(%struct.struct_seventeen_ints) align 4 %k)
+
+// Make sure over-alignment information is propagated to the backend properly
+typedef struct {
+ __attribute__((__aligned__(8))) float v[2];
+} hfa_align;
+// CHECK: define arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %h1.coerce) #0
+float test_hfa_align_arg(hfa_align h1) {
+ return h1.v[0];
+}
+// CHECK: %call = call arm_aapcs_vfpcc float @test_hfa_align_arg(%struct.hfa_align alignstack(8) %1) #4
+float test_hfa_align_call() {
+ hfa_align h = {1.0, 2.0};
+ return test_hfa_align_arg(h);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -5934,7 +5934,10 @@
return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
}
}
- return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+ bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) !=
+ getContext().getTypeAlignInChars(Base);
+ return ABIArgInfo::getDirect(nullptr, /*Offset=*/0, /*Padding=*/nullptr,
+ /*CanBeFlattened=*/false, NeedsStackAlignment);
}
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
@@ -6000,9 +6003,13 @@
uint64_t Members = 0;
if (isHomogeneousAggregate(Ty, Base, Members)) {
assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
+ bool NeedsStackAlignment = getContext().getTypeAlignInChars(Ty) !=
+ getContext().getTypeAlignInChars(Base);
llvm::Type *Ty =
llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
- return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
+ return ABIArgInfo::getDirect(Ty, /*Offset=*/0, /*Padding=*/nullptr,
+ /*CanBeFlattened=*/false,
+ NeedsStackAlignment);
}
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits