https://github.com/JonPsson1 created https://github.com/llvm/llvm-project/pull/83446
The casting of FP atomic loads and stores are always done by the front-end, even though the AtomicExpandPass will do it if the target requests it (which is the default). This patch removes this casting in the front-end entirely. @efriedma-quic @arsenm @uweigand >From 58de0db5378adc54d85ee6fbd291716871cd3c6d Mon Sep 17 00:00:00 2001 From: Jonas Paulsson <pauls...@linux.ibm.com> Date: Thu, 29 Feb 2024 14:16:57 +0100 Subject: [PATCH] Don't do casting of atomic FP loads/stores in FE. --- clang/lib/CodeGen/CGAtomic.cpp | 96 ++++++++++++------- .../CodeGen/SystemZ/atomic_fp_load_store.c | 84 ++++++++++++++++ clang/test/CodeGen/atomic.c | 3 +- clang/test/CodeGen/c11atomics-ios.c | 8 +- clang/test/OpenMP/atomic_read_codegen.c | 10 +- clang/test/OpenMP/atomic_write_codegen.c | 13 +-- 6 files changed, 156 insertions(+), 58 deletions(-) create mode 100644 clang/test/CodeGen/SystemZ/atomic_fp_load_store.c diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index a8d846b4f6a592..fb03d013e8afc7 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -194,12 +194,14 @@ namespace { RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot, SourceLocation loc, bool AsValue) const; - /// Converts a rvalue to integer value. - llvm::Value *convertRValueToInt(RValue RVal) const; + llvm::Value *getScalarRValValueOrNull(RValue RVal) const; - RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal, - AggValueSlot ResultSlot, - SourceLocation Loc, bool AsValue) const; + /// Converts an rvalue to integer value if needed. + llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const; + + RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot, + SourceLocation Loc, bool AsValue, + bool CastFP = true) const; /// Copy an atomic r-value into atomic-layout memory. void emitCopyIntoMemory(RValue rvalue) const; @@ -261,7 +263,8 @@ namespace { void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, llvm::AtomicOrdering AO, bool IsVolatile); /// Emits atomic load as LLVM instruction. - llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile); + llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile, + bool CastFP = true); /// Emits atomic compare-and-exchange op as a libcall. llvm::Value *EmitAtomicCompareExchangeLibcall( llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr, @@ -1396,12 +1399,13 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, LVal.getBaseInfo(), TBAAAccessInfo())); } -RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, - AggValueSlot ResultSlot, - SourceLocation Loc, - bool AsValue) const { +RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val, + AggValueSlot ResultSlot, + SourceLocation Loc, bool AsValue, + bool CastFP) const { // Try not to in some easy cases. - assert(IntVal->getType()->isIntegerTy() && "Expected integer value"); + assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) && + "Expected integer or floating point value"); if (getEvaluationKind() == TEK_Scalar && (((!LVal.isBitField() || LVal.getBitFieldInfo().Size == ValueSizeInBits) && @@ -1410,13 +1414,14 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, auto *ValTy = AsValue ? CGF.ConvertTypeForMem(ValueTy) : getAtomicAddress().getElementType(); - if (ValTy->isIntegerTy()) { - assert(IntVal->getType() == ValTy && "Different integer types."); - return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy)); + if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) { + assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) && + "Different integer types."); + return RValue::get(CGF.EmitFromMemory(Val, ValueTy)); } else if (ValTy->isPointerTy()) - return RValue::get(CGF.Builder.CreateIntToPtr(IntVal, ValTy)); - else if (llvm::CastInst::isBitCastable(IntVal->getType(), ValTy)) - return RValue::get(CGF.Builder.CreateBitCast(IntVal, ValTy)); + return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy)); + else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy)) + return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy)); } // Create a temporary. This needs to be big enough to hold the @@ -1433,8 +1438,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, // Slam the integer into the temporary. Address CastTemp = castToAtomicIntPointer(Temp); - CGF.Builder.CreateStore(IntVal, CastTemp) - ->setVolatile(TempIsVolatile); + CGF.Builder.CreateStore(Val, CastTemp)->setVolatile(TempIsVolatile); return convertAtomicTempToRValue(Temp, ResultSlot, Loc, AsValue); } @@ -1453,9 +1457,11 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, } llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO, - bool IsVolatile) { + bool IsVolatile, bool CastFP) { // Okay, we're doing this natively. - Address Addr = getAtomicAddressAsAtomicIntPointer(); + Address Addr = getAtomicAddress(); + if (!(Addr.getElementType()->isIEEELikeFPTy() && !CastFP)) + Addr = castToAtomicIntPointer(Addr); llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load"); Load->setAtomic(AO); @@ -1515,7 +1521,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc, } // Okay, we're doing this natively. - auto *Load = EmitAtomicLoadOp(AO, IsVolatile); + auto *Load = EmitAtomicLoadOp(AO, IsVolatile, /*CastFP=*/false); // If we're ignoring an aggregate return, don't do anything. if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored()) @@ -1523,7 +1529,8 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc, // Okay, turn that back into the original value or atomic (for non-simple // lvalues) type. - return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue); + return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue, + /*CastFP=*/false); } /// Emit a load from an l-value of atomic type. Note that the r-value @@ -1586,12 +1593,18 @@ Address AtomicInfo::materializeRValue(RValue rvalue) const { return TempLV.getAddress(CGF); } -llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const { +llvm::Value *AtomicInfo::getScalarRValValueOrNull(RValue RVal) const { + if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) + return RVal.getScalarVal(); + return nullptr; +} + +llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CastFP) const { // If we've got a scalar value of the right size, try to avoid going - // through memory. - if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) { - llvm::Value *Value = RVal.getScalarVal(); - if (isa<llvm::IntegerType>(Value->getType())) + // through memory. Floats get casted if needed by AtomicExpandPass. + if (llvm::Value *Value = getScalarRValValueOrNull(RVal)) { + if (isa<llvm::IntegerType>(Value->getType()) || + (!CastFP && Value->getType()->isIEEELikeFPTy())) return CGF.EmitToMemory(Value, ValueTy); else { llvm::IntegerType *InputIntTy = llvm::IntegerType::get( @@ -1677,8 +1690,8 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange( auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success, Failure, IsWeak); return std::make_pair( - ConvertIntToValueOrAtomic(Res.first, AggValueSlot::ignored(), - SourceLocation(), /*AsValue=*/false), + ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(), + SourceLocation(), /*AsValue=*/false), Res.second); } @@ -1787,8 +1800,8 @@ void AtomicInfo::EmitAtomicUpdateOp( requiresMemSetZero(getAtomicAddress().getElementType())) { CGF.Builder.CreateStore(PHI, NewAtomicIntAddr); } - auto OldRVal = ConvertIntToValueOrAtomic(PHI, AggValueSlot::ignored(), - SourceLocation(), /*AsValue=*/false); + auto OldRVal = ConvertToValueOrAtomic(PHI, AggValueSlot::ignored(), + SourceLocation(), /*AsValue=*/false); EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); // Try to write new value using cmpxchg operation. @@ -1953,13 +1966,22 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, } // Okay, we're doing this natively. - llvm::Value *intValue = atomics.convertRValueToInt(rvalue); + llvm::Value *ValToStore = + atomics.convertRValueToInt(rvalue, /*CastFP=*/false); // Do the atomic store. - Address addr = atomics.castToAtomicIntPointer(atomics.getAtomicAddress()); - intValue = Builder.CreateIntCast( - intValue, addr.getElementType(), /*isSigned=*/false); - llvm::StoreInst *store = Builder.CreateStore(intValue, addr); + Address Addr = atomics.getAtomicAddress(); + bool ShouldCastToInt = true; + if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue)) + if (isa<llvm::IntegerType>(Value->getType()) || + Value->getType()->isIEEELikeFPTy()) + ShouldCastToInt = false; + if (ShouldCastToInt) { + Addr = atomics.castToAtomicIntPointer(Addr); + ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(), + /*isSigned=*/false); + } + llvm::StoreInst *store = Builder.CreateStore(ValToStore, Addr); if (AO == llvm::AtomicOrdering::Acquire) AO = llvm::AtomicOrdering::Monotonic; diff --git a/clang/test/CodeGen/SystemZ/atomic_fp_load_store.c b/clang/test/CodeGen/SystemZ/atomic_fp_load_store.c new file mode 100644 index 00000000000000..3533e460a31b60 --- /dev/null +++ b/clang/test/CodeGen/SystemZ/atomic_fp_load_store.c @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s +// +// Test that floating point atomic stores and loads do not get casted to/from +// integer. + +#include <stdatomic.h> + +_Atomic float Af; +_Atomic double Ad; +_Atomic long double Ald; + +//// Atomic stores of floating point values. +void fun0(float Arg) { +// CHECK-LABEL: @fun0 +// CHECK: store atomic float %Arg, ptr @Af seq_cst, align 4 + Af = Arg; +} + +void fun1(double Arg) { +// CHECK-LABEL: @fun1 +// CHECK: store atomic double %Arg, ptr @Ad seq_cst, align 8 + Ad = Arg; +} + +void fun2(long double Arg) { +// CHECK-LABEL: @fun2 +// CHECK: store atomic fp128 %Arg, ptr @Ald seq_cst, align 16 + Ald = Arg; +} + +void fun3(_Atomic float *Dst, float Arg) { +// CHECK-LABEL: @fun +// CHECK: store atomic float %Arg, ptr %Dst seq_cst, align 4 + *Dst = Arg; +} + +void fun4(_Atomic double *Dst, double Arg) { +// CHECK-LABEL: @fun4 +// CHECK: store atomic double %Arg, ptr %Dst seq_cst, align 8 + *Dst = Arg; +} + +void fun5(_Atomic long double *Dst, long double Arg) { +// CHECK-LABEL: @fun5 +// CHECK: store atomic fp128 %Arg, ptr %Dst seq_cst, align 16 + *Dst = Arg; +} + +//// Atomic loads of floating point values. +float fun6() { +// CHECK-LABEL: @fun6 +// CHECK: %atomic-load = load atomic float, ptr @Af seq_cst, align 4 + return Af; +} + +float fun7() { +// CHECK-LABEL: @fun7 +// CHECK: %atomic-load = load atomic double, ptr @Ad seq_cst, align 8 + return Ad; +} + +float fun8() { +// CHECK-LABEL: @fun8 +// CHECK: %atomic-load = load atomic fp128, ptr @Ald seq_cst, align 16 + return Ald; +} + +float fun9(_Atomic float *Src) { +// CHECK-LABEL: @fun9 +// CHECK: %atomic-load = load atomic float, ptr %Src seq_cst, align 4 + return *Src; +} + +double fun10(_Atomic double *Src) { +// CHECK-LABEL: @fun10 +// CHECK: %atomic-load = load atomic double, ptr %Src seq_cst, align 8 + return *Src; +} + +long double fun11(_Atomic long double *Src) { +// CHECK-LABEL: @fun11 +// CHECK: %atomic-load = load atomic fp128, ptr %Src seq_cst, align 16 + return *Src; +} diff --git a/clang/test/CodeGen/atomic.c b/clang/test/CodeGen/atomic.c index 9143bedab90616..af5c056bbfe6e8 100644 --- a/clang/test/CodeGen/atomic.c +++ b/clang/test/CodeGen/atomic.c @@ -145,6 +145,5 @@ void force_global_uses(void) { (void)glob_int; // CHECK: load atomic i32, ptr @[[GLOB_INT]] seq_cst (void)glob_flt; - // CHECK: %[[LOCAL_FLT:.+]] = load atomic i32, ptr @[[GLOB_FLT]] seq_cst - // CHECK-NEXT: bitcast i32 %[[LOCAL_FLT]] to float + // CHECK: load atomic float, ptr @[[GLOB_FLT]] seq_cst } diff --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c index bcb6519ab0dc81..811820b67fbdbf 100644 --- a/clang/test/CodeGen/c11atomics-ios.c +++ b/clang/test/CodeGen/c11atomics-ios.c @@ -19,15 +19,13 @@ void testFloat(_Atomic(float) *fp) { _Atomic(float) x = 2.0f; // CHECK-NEXT: [[T0:%.*]] = load ptr, ptr [[FP]] -// CHECK-NEXT: [[T2:%.*]] = load atomic i32, ptr [[T0]] seq_cst, align 4 -// CHECK-NEXT: [[T3:%.*]] = bitcast i32 [[T2]] to float -// CHECK-NEXT: store float [[T3]], ptr [[F]] +// CHECK-NEXT: [[T2:%.*]] = load atomic float, ptr [[T0]] seq_cst, align 4 +// CHECK-NEXT: store float [[T2]], ptr [[F]] float f = *fp; // CHECK-NEXT: [[T0:%.*]] = load float, ptr [[F]], align 4 // CHECK-NEXT: [[T1:%.*]] = load ptr, ptr [[FP]], align 4 -// CHECK-NEXT: [[T2:%.*]] = bitcast float [[T0]] to i32 -// CHECK-NEXT: store atomic i32 [[T2]], ptr [[T1]] seq_cst, align 4 +// CHECK-NEXT: store atomic float [[T0]], ptr [[T1]] seq_cst, align 4 *fp = f; // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/atomic_read_codegen.c b/clang/test/OpenMP/atomic_read_codegen.c index b60e1686d4dab0..0a68c8e2c35a56 100644 --- a/clang/test/OpenMP/atomic_read_codegen.c +++ b/clang/test/OpenMP/atomic_read_codegen.c @@ -128,13 +128,11 @@ int main(void) { // CHECK: store i64 #pragma omp atomic read ullv = ullx; -// CHECK: load atomic i32, ptr {{.*}} monotonic, align 4 -// CHECK: bitcast i32 {{.*}} to float +// CHECK: load atomic float, ptr {{.*}} monotonic, align 4 // CHECK: store float #pragma omp atomic read fv = fx; -// CHECK: load atomic i64, ptr {{.*}} monotonic, align 8 -// CHECK: bitcast i64 {{.*}} to double +// CHECK: load atomic double, ptr {{.*}} monotonic, align 8 // CHECK: store double #pragma omp atomic read dv = dx; @@ -194,11 +192,11 @@ int main(void) { // CHECK: store i64 #pragma omp atomic read lv = cix; -// CHECK: load atomic i32, ptr {{.*}} monotonic, align 4 +// CHECK: load atomic float, ptr {{.*}} monotonic, align 4 // CHECK: store i64 #pragma omp atomic read ulv = fx; -// CHECK: load atomic i64, ptr {{.*}} monotonic, align 8 +// CHECK: load atomic double, ptr {{.*}} monotonic, align 8 // CHECK: store i64 #pragma omp atomic read llv = dx; diff --git a/clang/test/OpenMP/atomic_write_codegen.c b/clang/test/OpenMP/atomic_write_codegen.c index 24dfbf9c0e8fc7..afe8737d30b065 100644 --- a/clang/test/OpenMP/atomic_write_codegen.c +++ b/clang/test/OpenMP/atomic_write_codegen.c @@ -131,13 +131,11 @@ int main(void) { #pragma omp atomic write ullx = ullv; // CHECK: load float, ptr -// CHECK: bitcast float {{.*}} to i32 -// CHECK: store atomic i32 {{.*}}, ptr {{.*}} monotonic, align 4 +// CHECK: store atomic float {{.*}}, ptr {{.*}} monotonic, align 4 #pragma omp atomic write fx = fv; // CHECK: load double, ptr -// CHECK: bitcast double {{.*}} to i64 -// CHECK: store atomic i64 {{.*}}, ptr {{.*}} monotonic, align 8 +// CHECK: store atomic double {{.*}}, ptr {{.*}} monotonic, align 8 #pragma omp atomic write dx = dv; // CHECK: [[LD:%.+]] = load x86_fp80, ptr @@ -215,11 +213,11 @@ int main(void) { #pragma omp atomic write cix = lv; // CHECK: load i64, ptr -// CHECK: store atomic i32 %{{.+}}, ptr {{.*}} monotonic, align 4 +// CHECK: store atomic float %{{.+}}, ptr {{.*}} monotonic, align 4 #pragma omp atomic write fx = ulv; // CHECK: load i64, ptr -// CHECK: store atomic i64 %{{.+}}, ptr {{.*}} monotonic, align 8 +// CHECK: store atomic double %{{.+}}, ptr {{.*}} monotonic, align 8 #pragma omp atomic write dx = llv; // CHECK: load i64, ptr @@ -491,8 +489,7 @@ int main(void) { float2x.x = ulv; // CHECK: call i32 @llvm.read_register.i32( // CHECK: sitofp i32 %{{.+}} to double -// CHECK: bitcast double %{{.+}} to i64 -// CHECK: store atomic i64 %{{.+}}, ptr @{{.+}} seq_cst, align 8 +// CHECK: store atomic double %{{.+}}, ptr @{{.+}} seq_cst, align 8 // CHECK: call{{.*}} @__kmpc_flush( #pragma omp atomic write seq_cst dv = rix; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits