Author: Pravin Jagtap Date: 2023-11-08T15:09:10+05:30 New Revision: 32a3f2afe6ea7ffb02a6a188b123ded6f4c89f6c
URL: https://github.com/llvm/llvm-project/commit/32a3f2afe6ea7ffb02a6a188b123ded6f4c89f6c DIFF: https://github.com/llvm/llvm-project/commit/32a3f2afe6ea7ffb02a6a188b123ded6f4c89f6c.diff LOG: [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (#71139) Operands of `__builtin_amdgcn_update_dpp` need to evaluate to constant to match the intrinsic requirements. Fixes: SWDEV-426822, SWDEV-431138 --------- Authored-by: Pravin Jagtap <pravin.jag...@amd.com> Added: clang/test/CodeGenHIP/dpp-const-fold.hip Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5ab81cc605819c3..e7e498e8a933131 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5708,18 +5708,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::FunctionType *FTy = F->getFunctionType(); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { - Value *ArgValue; - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - ArgValue = EmitScalarExpr(E->getArg(i)); - } else { - // If this is required to be a constant, constant fold it so that we - // know that the generated intrinsic gets a ConstantInt. - ArgValue = llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext())); - } - + Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); // If the intrinsic arg type is diff erent from the builtin arg type // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); @@ -8599,15 +8588,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext()))); - } + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } switch (BuiltinID) { @@ -11094,15 +11075,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, continue; } } - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } else { - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), - *E->getArg(i)->getIntegerConstantExpr(getContext()))); - } + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); @@ -13814,16 +13787,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - continue; - } - - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } // These exist so that the builtin that takes an immediate can be bounds @@ -17588,6 +17552,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = getLLVMContext().getOrInsertSyncScopeID(scp); } +llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, + unsigned Idx, + const CallExpr *E) { + llvm::Value *Arg = nullptr; + if ((ICEArguments & (1 << Idx)) == 0) { + Arg = EmitScalarExpr(E->getArg(Idx)); + } else { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(Idx)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); + } + return Arg; +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; @@ -17638,8 +17619,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; - for (unsigned I = 0; I != E->getNumArgs(); ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); + // Find out if any arguments are required to be integer constant + // expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + for (unsigned I = 0; I != E->getNumArgs(); ++I) { + Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E)); + } assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); @@ -20615,17 +20603,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, Ops.push_back(AggValue); continue; } - - // If this is a normal argument, just emit it as a scalar. - if ((ICEArguments & (1 << i)) == 0) { - Ops.push_back(EmitScalarExpr(E->getArg(i))); - continue; - } - - // If this is required to be a constant, constant fold it so that we know - // that the generated intrinsic gets a ConstantInt. - Ops.push_back(llvm::ConstantInt::get( - getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext()))); + Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } Intrinsic::ID ID = Intrinsic::not_intrinsic; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 42f94c9b540191e..dc6773eb83f5ece 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4327,6 +4327,8 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, + const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip new file mode 100644 index 000000000000000..1d1d135fb06239a --- /dev/null +++ b/clang/test/CodeGenHIP/dpp-const-fold.hip @@ -0,0 +1,48 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only \ +// RUN: %s | FileCheck %s + +constexpr static int OpCtrl() +{ + return 15 + 1; +} + +constexpr static int RowMask() +{ + return 3 + 1; +} + +constexpr static int BankMask() +{ + return 2 + 1; +} + +constexpr static bool BountCtrl() +{ + return true & false; +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false); +} + +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false) +__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b) +{ + *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl()); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits