https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/90849
>From 18c489682d38837d7b0abc7b9ecf829a9df4bd0c Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Thu, 2 May 2024 11:33:13 +0000 Subject: [PATCH 1/2] [Clang] Added lifetime markers for temp. allocated Aggregate types --- clang/lib/CodeGen/CGCall.cpp | 10 ++++ clang/test/CodeGen/aarch64-byval-temp.c | 46 +++++++++++++++++-- ...-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c | 2 + ...cle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp | 2 + clang/test/CodeGen/nofpclass.c | 22 +++++---- 5 files changed, 70 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 1b4ca2a8b2fe8..8fdde5326b8ed 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5202,6 +5202,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; + // Emit lifetime markers for the temporary alloca. + llvm::TypeSize ByvalTempElementSize = + CGM.getDataLayout().getTypeAllocSize(Addr.getElementType()); + llvm::Value *LifetimeSize = + EmitLifetimeStart(ByvalTempElementSize, Addr.getPointer()); + + // Add cleanup code to emit the end lifetime marker after the call. + if (LifetimeSize) // In case we disabled lifetime markers. + CallLifetimeEndAfterCall.emplace_back(Addr, LifetimeSize); + I->copyInto(*this, Addr); } else { // We want to avoid creating an unnecessary temporary+copy here; diff --git a/clang/test/CodeGen/aarch64-byval-temp.c b/clang/test/CodeGen/aarch64-byval-temp.c index e9e2586406e5c..765a298f83ed0 100644 --- a/clang/test/CodeGen/aarch64-byval-temp.c +++ b/clang/test/CodeGen/aarch64-byval-temp.c @@ -1,13 +1,15 @@ -// RUN: %clang_cc1 -emit-llvm -triple arm64-- -o - %s -O0 | FileCheck %s --check-prefix=CHECK-O0 -// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -triple arm64-- -o - %s -O3 | FileCheck %s --check-prefix=CHECK-O3 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -emit-llvm -triple arm64-- -fexperimental-max-bitint-width=1024 -o - %s -O0 | FileCheck %s --check-prefix=CHECK-O0 +// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -fexperimental-max-bitint-width=1024 -triple arm64-- -o - %s -O3 | FileCheck %s --check-prefix=CHECK-O3 struct large { void* pointers[8]; }; void pass_large(struct large); +void pass_large_BitInt(_BitInt(129)); -// For arm64, we don't use byval to pass structs but instead we create +// For arm64, we don't use byval to pass structs and _BitInt(>128) type, but instead we create // temporary allocas. // // Make sure we generate the appropriate lifetime markers for the temporary @@ -71,3 +73,41 @@ void example(void) { // Mark the end of the lifetime of `l`. // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %l) // CHECK-O3-NEXT: ret void + +void example_BitInt(void) { + _BitInt(129) l = {0}; + pass_large_BitInt(l); + pass_large_BitInt(l); +} +// CHECK-O0-LABEL: define dso_local void @example_BitInt( +// CHECK-O0-NEXT: entry: +// CHECK-O0-NEXT: [[L:%.*]] = alloca i129, align 16 +// CHECK-O0-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16 +// CHECK-O0-NEXT: [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16 +// CHECK-O0-NEXT: store i129 0, ptr [[L]], align 16 +// CHECK-O0-NEXT: [[TMP0:%.*]] = load i129, ptr [[L]], align 16 +// CHECK-O0-NEXT: store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16 +// CHECK-O0-NEXT: call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]]) +// CHECK-O0-NEXT: [[TMP1:%.*]] = load i129, ptr [[L]], align 16 +// CHECK-O0-NEXT: store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16 +// CHECK-O0-NEXT: call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]]) +// CHECK-O0-NEXT: ret void +// +// CHECK-O3-LABEL: define dso_local void @example_BitInt( +// CHECK-O3-NEXT: entry: +// CHECK-O3-NEXT: [[L:%.*]] = alloca i129, align 16 +// CHECK-O3-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16 +// CHECK-O3-NEXT: [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16 +// CHECK-O3-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[L]]) +// CHECK-O3-NEXT: store i129 0, ptr [[L]], align 16, !tbaa [[TBAA6:![0-9]+]] +// CHECK-O3-NEXT: [[TMP0:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]] +// CHECK-O3-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) +// CHECK-O3-NEXT: store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16, !tbaa [[TBAA6]] +// CHECK-O3-NEXT: call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]]) +// CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) +// CHECK-O3-NEXT: [[TMP1:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]] +// CHECK-O3-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) +// CHECK-O3-NEXT: store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16, !tbaa [[TBAA6]] +// CHECK-O3-NEXT: call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]]) +// CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) +// CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[L]]) diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index a4abe96cc08a6..55e1ed393d848 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -88,8 +88,10 @@ typedef svint8_t vec2 __attribute__((arm_sve_vector_bits(N))); // CHECK-NEXT: entry: // CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS,8)]] x i8>, align 16 // CHECK-NEXT: [[X:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[X_COERCE:%.*]], i64 0) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]] // CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 16, [[TBAA6]] // CHECK-NEXT: call void @f3(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]] // CHECK-NEXT: ret void // CHECK128-LABEL: declare void @f3(<16 x i8> noundef) diff --git a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp index 05587fd9e7fe2..30ea73b63bce1 100644 --- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp +++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp @@ -73,8 +73,10 @@ typedef svint16_t vec2 __attribute__((arm_sve_vector_bits(N))); // CHECK128-NEXT: ret void // CHECKWIDE-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS, 16)]] x i16>, align 16 // CHECKWIDE-NEXT: [[X:%.*]] = tail call <[[#div(VBITS, 16)]] x i16> @llvm.vector.extract.v[[#div(VBITS, 16)]]i16.nxv8i16(<vscale x 8 x i16> [[X_COERCE:%.*]], i64 0) +// CHECKWIDE-NEXT: call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]] // CHECKWIDE-NEXT: store <[[#div(VBITS, 16)]] x i16> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 16, [[TBAA6:!tbaa !.*]] // CHECKWIDE-NEXT: call void @_Z1fDv[[#div(VBITS, 16)]]_s(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]] +// CHECKWIDE-NEXT: call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]] // CHECKWIDE-NEXT: ret void void g(vec2 x) { f(x); } // OK #endif diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c index dd90d02f7759b..fc4c64f9b921b 100644 --- a/clang/test/CodeGen/nofpclass.c +++ b/clang/test/CodeGen/nofpclass.c @@ -172,7 +172,7 @@ double2 defined_func_v2f64(double2 a, double2 b, double2 c) { // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @call_extern_func // CLFINITEONLY-SAME: (float noundef nofpclass(nan inf) [[A:%.*]], double noundef nofpclass(nan inf) [[B:%.*]], half noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { // CLFINITEONLY-NEXT: entry: -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) #[[ATTR10:[0-9]+]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) #[[ATTR11:[0-9]+]] // CLFINITEONLY-NEXT: ret float [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -249,7 +249,7 @@ float call_extern_func(float a, double b, _Float16 c) { // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) double @call_extern_func_vec // CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], i32 noundef [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { // CLFINITEONLY-NEXT: entry: -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR10]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR11]] // CLFINITEONLY-NEXT: ret double [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -389,7 +389,7 @@ float2 call_extern_func_vec(float2 a, double2 b, half2 c) { // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x float> @defined_complex_func // CLFINITEONLY-SAME: (<2 x float> noundef nofpclass(nan inf) [[A_COERCE:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double noundef nofpclass(nan inf) [[B_COERCE1:%.*]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { // CLFINITEONLY-NEXT: entry: -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) [[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE]]) #[[ATTR10]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) [[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) [[C_COERCE]]) #[[ATTR11]] // CLFINITEONLY-NEXT: ret <2 x float> [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -927,12 +927,14 @@ _Complex _Float16 defined_complex_func_f16_ret(_Complex _Float16 c) { // CLFINITEONLY-NEXT: [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8 // CLFINITEONLY-NEXT: [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[CF16]], i64 2 // CLFINITEONLY-NEXT: [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], align 2 +// CLFINITEONLY-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12:[0-9]+]] // CLFINITEONLY-NEXT: [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8 // CLFINITEONLY-NEXT: store double [[CF64_COERCE0]], ptr [[INDIRECT_ARG_TEMP]], align 8 // CLFINITEONLY-NEXT: store double [[CF64_COERCE1]], ptr [[INDIRECT_ARG_TEMP_IMAGP]], align 8 // CLFINITEONLY-NEXT: [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[CF16_REAL]], i64 0 // CLFINITEONLY-NEXT: [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1 -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]] +// CLFINITEONLY-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]] // CLFINITEONLY-NEXT: ret float [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -1178,12 +1180,14 @@ float call_variadic(float f32, double f64, _Float16 f16, // CLFINITEONLY-NEXT: [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8 // CLFINITEONLY-NEXT: [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[CF16]], i64 2 // CLFINITEONLY-NEXT: [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], align 2 +// CLFINITEONLY-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]] // CLFINITEONLY-NEXT: [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8 // CLFINITEONLY-NEXT: store double [[CF64_COERCE0]], ptr [[INDIRECT_ARG_TEMP]], align 8 // CLFINITEONLY-NEXT: store double [[CF64_COERCE1]], ptr [[INDIRECT_ARG_TEMP_IMAGP]], align 8 // CLFINITEONLY-NEXT: [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x half> poison, half [[CF16_REAL]], i64 0 // CLFINITEONLY-NEXT: [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement <2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1 -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) [[FPTR]](float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) float (float, ...) [[FPTR]](float noundef nofpclass(nan inf) [[F32]], double noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) [[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) [[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 [[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) [[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]] +// CLFINITEONLY-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR12]] // CLFINITEONLY-NEXT: ret float [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -1364,9 +1368,9 @@ extern __m256d extern_m256d(__m256d, ...); // // CLFINITEONLY: Function Attrs: convergent norecurse nounwind // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <4 x double> @call_m256d -// CLFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] { +// CLFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] { // CLFINITEONLY-NEXT: entry: -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <4 x double> (<4 x double>, ...) @extern_m256d(<4 x double> noundef nofpclass(nan inf) [[X]], <4 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR10]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <4 x double> (<4 x double>, ...) @extern_m256d(<4 x double> noundef nofpclass(nan inf) [[X]], <4 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR11]] // CLFINITEONLY-NEXT: ret <4 x double> [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone @@ -1407,9 +1411,9 @@ __m256d call_m256d(__m256d x) { // // CLFINITEONLY: Function Attrs: convergent norecurse nounwind // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <25 x double> @call_matrix -// CLFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] { +// CLFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] { // CLFINITEONLY-NEXT: entry: -// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <25 x double> @extern_matrix(<25 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR10]] +// CLFINITEONLY-NEXT: [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) <25 x double> @extern_matrix(<25 x double> noundef nofpclass(nan inf) [[X]]) #[[ATTR11]] // CLFINITEONLY-NEXT: ret <25 x double> [[CALL]] // // NONANS: Function Attrs: noinline nounwind optnone >From c9dad42ae9017405e5dbd323596eac4be59e4498 Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Wed, 15 May 2024 11:14:38 +0000 Subject: [PATCH 2/2] Restructured the whole case statement --- clang/lib/CodeGen/CGCall.cpp | 69 +++++++++++++----------------------- 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8fdde5326b8ed..19d62a74cfbc4 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5192,28 +5192,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::Indirect: case ABIArgInfo::IndirectAliased: { assert(NumIRArgs == 1); - if (!I->isAggregate()) { - // Make a temporary alloca to pass the argument. - RawAddress Addr = CreateMemTempWithoutCast( - I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp"); - - llvm::Value *Val = getAsNaturalPointerTo(Addr, I->Ty); - if (ArgHasMaybeUndefAttr) - Val = Builder.CreateFreeze(Val); - IRCallArgs[FirstIRArg] = Val; - - // Emit lifetime markers for the temporary alloca. - llvm::TypeSize ByvalTempElementSize = - CGM.getDataLayout().getTypeAllocSize(Addr.getElementType()); - llvm::Value *LifetimeSize = - EmitLifetimeStart(ByvalTempElementSize, Addr.getPointer()); - - // Add cleanup code to emit the end lifetime marker after the call. - if (LifetimeSize) // In case we disabled lifetime markers. - CallLifetimeEndAfterCall.emplace_back(Addr, LifetimeSize); - - I->copyInto(*this, Addr); - } else { + if (I->isAggregate()) { // We want to avoid creating an unnecessary temporary+copy here; // however, we need one in three cases: // 1. If the argument is not byval, and we are required to copy the @@ -5266,28 +5245,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } } - if (NeedCopy) { - // Create an aligned temporary, and copy to it. - RawAddress AI = CreateMemTempWithoutCast( - I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); - llvm::Value *Val = getAsNaturalPointerTo(AI, I->Ty); - if (ArgHasMaybeUndefAttr) - Val = Builder.CreateFreeze(Val); - IRCallArgs[FirstIRArg] = Val; - - // Emit lifetime markers for the temporary alloca. - llvm::TypeSize ByvalTempElementSize = - CGM.getDataLayout().getTypeAllocSize(AI.getElementType()); - llvm::Value *LifetimeSize = - EmitLifetimeStart(ByvalTempElementSize, AI.getPointer()); - - // Add cleanup code to emit the end lifetime marker after the call. - if (LifetimeSize) // In case we disabled lifetime markers. - CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize); - - // Generate the copy. - I->copyInto(*this, AI); - } else { + if (!NeedCopy) { // Skip the extra memcpy call. llvm::Value *V = getAsNaturalPointerTo(Addr, I->Ty); auto *T = llvm::PointerType::get( @@ -5299,8 +5257,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (ArgHasMaybeUndefAttr) Val = Builder.CreateFreeze(Val); IRCallArgs[FirstIRArg] = Val; + break; } } + + // For non-aggregate args and aggregate args meeting conditions above + // we need to create an aligned temporary, and copy to it. + RawAddress AI = CreateMemTempWithoutCast( + I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); + llvm::Value *Val = getAsNaturalPointerTo(AI, I->Ty); + if (ArgHasMaybeUndefAttr) + Val = Builder.CreateFreeze(Val); + IRCallArgs[FirstIRArg] = Val; + + // Emit lifetime markers for the temporary alloca. + llvm::TypeSize ByvalTempElementSize = + CGM.getDataLayout().getTypeAllocSize(AI.getElementType()); + llvm::Value *LifetimeSize = + EmitLifetimeStart(ByvalTempElementSize, AI.getPointer()); + + // Add cleanup code to emit the end lifetime marker after the call. + if (LifetimeSize) // In case we disabled lifetime markers. + CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize); + + // Generate the copy. + I->copyInto(*this, AI); break; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits