Author: Farzon Lotfi Date: 2025-03-27T11:47:14-04:00 New Revision: 59d06071e9b509e874f24c465b13eb77cffa1d42
URL: https://github.com/llvm/llvm-project/commit/59d06071e9b509e874f24c465b13eb77cffa1d42 DIFF: https://github.com/llvm/llvm-project/commit/59d06071e9b509e874f24c465b13eb77cffa1d42.diff LOG: [NFC][HLSL] Move emitter out of AMDGPU.cpp (#133251) - Move all HLSL code out of AMDGPU.cpp to CGHLSLBuiltins.cpp - Fixes accidental reorganization of HLSL code into AMDGPU caused by (https://github.com/llvm/llvm-project/pull/132252, https://github.com/llvm/llvm-project/commit/7f920e2e5f70b) Added: clang/lib/CodeGen/CGHLSLBuiltins.cpp Modified: .github/new-prs-labeler.yml clang/lib/CodeGen/CMakeLists.txt clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp Removed: ################################################################################ diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index c375fa5dc7516..b93cdff8af345 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -637,6 +637,7 @@ hlsl: - clang/lib/Sema/HLSLExternalSemaSource.cpp - clang/lib/Sema/SemaHLSL.cpp - clang/lib/CodeGen/CGHLSLRuntime.* + - clang/lib/CodeGen/CGHLSLBuiltins.cpp - llvm/include/llvm/Frontend/HLSL/** - llvm/lib/Frontend/HLSL/** diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp new file mode 100644 index 0000000000000..5709594a34826 --- /dev/null +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -0,0 +1,740 @@ +//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit HLSL Builtin calls as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CGBuiltin.h" +#include "CGHLSLRuntime.h" + +using namespace clang; +using namespace CodeGen; +using namespace llvm; + +static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) { + assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() && + E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) && + "asdouble operands types mismatch"); + Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0)); + Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1)); + + llvm::Type *ResultType = CGF.DoubleTy; + int N = 1; + if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) { + N = VTy->getNumElements(); + ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N); + } + + if (CGF.CGM.getTarget().getTriple().isDXIL()) + return CGF.Builder.CreateIntrinsic( + /*ReturnType=*/ResultType, Intrinsic::dx_asdouble, + {OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble"); + + if (!E->getArg(0)->getType()->isVectorType()) { + OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits); + OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits); + } + + llvm::SmallVector<int> Mask; + for (int i = 0; i < N; i++) { + Mask.push_back(i); + Mask.push_back(i + N); + } + + Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask); + + return CGF.Builder.CreateBitCast(BitVec, ResultType); +} + +static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { + Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); + + Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy); + Value *CMP; + Value *LastInstr; + + if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) { + FZeroConst = ConstantVector::getSplat( + ElementCount::getFixed(VecTy->getNumElements()), FZeroConst); + auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); + CMP = CGF->Builder.CreateIntrinsic( + CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(), + {FCompInst}); + } else + CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); + + if (CGF->CGM.getTarget().getTriple().isDXIL()) + LastInstr = + CGF->Builder.CreateIntrinsic(CGF->VoidTy, Intrinsic::dx_discard, {CMP}); + else if (CGF->CGM.getTarget().getTriple().isSPIRV()) { + BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn); + BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn); + + CGF->Builder.CreateCondBr(CMP, LT0, End); + + CGF->Builder.SetInsertPoint(LT0); + + CGF->Builder.CreateIntrinsic(CGF->VoidTy, Intrinsic::spv_discard, {}); + + LastInstr = CGF->Builder.CreateBr(End); + CGF->Builder.SetInsertPoint(End); + } else { + llvm_unreachable("Backend Codegen not supported."); + } + + return LastInstr; +} + +static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { + Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); + const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1)); + const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2)); + + CallArgList Args; + LValue Op1TmpLValue = + CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType()); + LValue Op2TmpLValue = + CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType()); + + if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) + Args.reverseWritebacks(); + + Value *LowBits = nullptr; + Value *HighBits = nullptr; + + if (CGF->CGM.getTarget().getTriple().isDXIL()) { + + llvm::Type *RetElementTy = CGF->Int32Ty; + if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) + RetElementTy = llvm::VectorType::get( + CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements())); + auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy); + + CallInst *CI = CGF->Builder.CreateIntrinsic( + RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble"); + + LowBits = CGF->Builder.CreateExtractValue(CI, 0); + HighBits = CGF->Builder.CreateExtractValue(CI, 1); + + } else { + // For Non DXIL targets we generate the instructions. + + if (!Op0->getType()->isVectorTy()) { + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); + Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy); + + LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0); + HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1); + } else { + int NumElements = 1; + if (const auto *VecTy = + E->getArg(0)->getType()->getAs<clang::VectorType>()) + NumElements = VecTy->getNumElements(); + + FixedVectorType *Uint32VecTy = + FixedVectorType::get(CGF->Int32Ty, NumElements * 2); + Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy); + if (NumElements == 1) { + LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0); + HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1); + } else { + SmallVector<int> EvenMask, OddMask; + for (int I = 0, E = NumElements; I != E; ++I) { + EvenMask.push_back(I * 2); + OddMask.push_back(I * 2 + 1); + } + LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask); + HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask); + } + } + } + CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress()); + auto *LastInst = + CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress()); + CGF->EmitWritebacks(Args); + return LastInst; +} + +// Return dot product intrinsic that corresponds to the QT scalar type +static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { + if (QT->isFloatingType()) + return RT.getFDotIntrinsic(); + if (QT->isSignedIntegerType()) + return RT.getSDotIntrinsic(); + assert(QT->isUnsignedIntegerType()); + return RT.getUDotIntrinsic(); +} + +static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { + if (QT->hasSignedIntegerRepresentation()) { + return RT.getFirstBitSHighIntrinsic(); + } + + assert(QT->hasUnsignedIntegerRepresentation()); + return RT.getFirstBitUHighIntrinsic(); +} + +// Return wave active sum that corresponds to the QT scalar type +static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, + CGHLSLRuntime &RT, QualType QT) { + switch (Arch) { + case llvm::Triple::spirv: + return Intrinsic::spv_wave_reduce_sum; + case llvm::Triple::dxil: { + if (QT->isUnsignedIntegerType()) + return Intrinsic::dx_wave_reduce_usum; + return Intrinsic::dx_wave_reduce_sum; + } + default: + llvm_unreachable("Intrinsic WaveActiveSum" + " not supported by target architecture"); + } +} + +// Return wave active sum that corresponds to the QT scalar type +static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch, + CGHLSLRuntime &RT, QualType QT) { + switch (Arch) { + case llvm::Triple::spirv: + if (QT->isUnsignedIntegerType()) + return Intrinsic::spv_wave_reduce_umax; + return Intrinsic::spv_wave_reduce_max; + case llvm::Triple::dxil: { + if (QT->isUnsignedIntegerType()) + return Intrinsic::dx_wave_reduce_umax; + return Intrinsic::dx_wave_reduce_max; + } + default: + llvm_unreachable("Intrinsic WaveActiveMax" + " not supported by target architecture"); + } +} + +Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + ReturnValueSlot ReturnValue) { + if (!getLangOpts().HLSL) + return nullptr; + + switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_adduint64: { + Value *OpA = EmitScalarExpr(E->getArg(0)); + Value *OpB = EmitScalarExpr(E->getArg(1)); + QualType Arg0Ty = E->getArg(0)->getType(); + uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements(); + assert(Arg0Ty == E->getArg(1)->getType() && + "AddUint64 operand types must match"); + assert(Arg0Ty->hasIntegerRepresentation() && + "AddUint64 operands must have an integer representation"); + assert((NumElements == 2 || NumElements == 4) && + "AddUint64 operands must have 2 or 4 elements"); + + llvm::Value *LowA; + llvm::Value *HighA; + llvm::Value *LowB; + llvm::Value *HighB; + + // Obtain low and high words of inputs A and B + if (NumElements == 2) { + LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA"); + HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA"); + LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB"); + HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB"); + } else { + LowA = Builder.CreateShuffleVector(OpA, {0, 2}, "LowA"); + HighA = Builder.CreateShuffleVector(OpA, {1, 3}, "HighA"); + LowB = Builder.CreateShuffleVector(OpB, {0, 2}, "LowB"); + HighB = Builder.CreateShuffleVector(OpB, {1, 3}, "HighB"); + } + + // Use an uadd_with_overflow to compute the sum of low words and obtain a + // carry value + llvm::Value *Carry; + llvm::Value *LowSum = EmitOverflowIntrinsic( + *this, Intrinsic::uadd_with_overflow, LowA, LowB, Carry); + llvm::Value *ZExtCarry = + Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt"); + + // Sum the high words and the carry + llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum"); + llvm::Value *HighSumPlusCarry = + Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry"); + + if (NumElements == 4) { + return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, {0, 2, 1, 3}, + "hlsl.AddUint64"); + } + + llvm::Value *Result = PoisonValue::get(OpA->getType()); + Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0, + "hlsl.AddUint64.upto0"); + Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1, + "hlsl.AddUint64"); + return Result; + } + case Builtin::BI__builtin_hlsl_resource_getpointer: { + Value *HandleOp = EmitScalarExpr(E->getArg(0)); + Value *IndexOp = EmitScalarExpr(E->getArg(1)); + + // TODO: Map to an hlsl_device address space. + llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext()); + + return Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(), + ArrayRef<Value *>{HandleOp, IndexOp}); + } + case Builtin::BI__builtin_hlsl_all: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + return Builder.CreateIntrinsic( + /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), + CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, + "hlsl.all"); + } + case Builtin::BI__builtin_hlsl_and: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateAnd(Op0, Op1, "hlsl.and"); + } + case Builtin::BI__builtin_hlsl_or: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateOr(Op0, Op1, "hlsl.or"); + } + case Builtin::BI__builtin_hlsl_any: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + return Builder.CreateIntrinsic( + /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), + CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, + "hlsl.any"); + } + case Builtin::BI__builtin_hlsl_asdouble: + return handleAsDoubleBuiltin(*this, E); + case Builtin::BI__builtin_hlsl_elementwise_clamp: { + Value *OpX = EmitScalarExpr(E->getArg(0)); + Value *OpMin = EmitScalarExpr(E->getArg(1)); + Value *OpMax = EmitScalarExpr(E->getArg(2)); + + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + + Intrinsic::ID Intr; + if (Ty->isFloatingType()) { + Intr = CGM.getHLSLRuntime().getNClampIntrinsic(); + } else if (Ty->isUnsignedIntegerType()) { + Intr = CGM.getHLSLRuntime().getUClampIntrinsic(); + } else { + assert(Ty->isSignedIntegerType()); + Intr = CGM.getHLSLRuntime().getSClampIntrinsic(); + } + return Builder.CreateIntrinsic( + /*ReturnType=*/OpX->getType(), Intr, + ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp"); + } + case Builtin::BI__builtin_hlsl_cross: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasFloatingRepresentation() && + "cross operands must have a float representation"); + // make sure each vector has exactly 3 elements + assert( + E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 && + E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 && + "input vectors must have 3 elements each"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(), + ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross"); + } + case Builtin::BI__builtin_hlsl_dot: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *T0 = Op0->getType(); + llvm::Type *T1 = Op1->getType(); + + // If the arguments are scalars, just emit a multiply + if (!T0->isVectorTy() && !T1->isVectorTy()) { + if (T0->isFloatingPointTy()) + return Builder.CreateFMul(Op0, Op1, "hlsl.dot"); + + if (T0->isIntegerTy()) + return Builder.CreateMul(Op0, Op1, "hlsl.dot"); + + llvm_unreachable( + "Scalar dot product is only supported on ints and floats."); + } + // For vectors, validate types and emit the appropriate intrinsic + + // A VectorSplat should have happened + assert(T0->isVectorTy() && T1->isVectorTy() && + "Dot product of vector and scalar is not supported."); + + auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>(); + [[maybe_unused]] auto *VecTy1 = + E->getArg(1)->getType()->castAs<VectorType>(); + + assert(VecTy0->getElementType() == VecTy1->getElementType() && + "Dot product of vectors need the same element types."); + + assert(VecTy0->getNumElements() == VecTy1->getNumElements() && + "Dot product requires vectors to be of the same size."); + + return Builder.CreateIntrinsic( + /*ReturnType=*/T0->getScalarType(), + getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()), + ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot"); + } + case Builtin::BI__builtin_hlsl_dot4add_i8packed: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + + Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic(); + return Builder.CreateIntrinsic( + /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr, + "hlsl.dot4add.i8packed"); + } + case Builtin::BI__builtin_hlsl_dot4add_u8packed: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + + Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic(); + return Builder.CreateIntrinsic( + /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr, + "hlsl.dot4add.u8packed"); + } + case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { + Value *X = EmitScalarExpr(E->getArg(0)); + + return Builder.CreateIntrinsic( + /*ReturnType=*/ConvertType(E->getType()), + getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()), + ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh"); + } + case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { + Value *X = EmitScalarExpr(E->getArg(0)); + + return Builder.CreateIntrinsic( + /*ReturnType=*/ConvertType(E->getType()), + CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X}, + nullptr, "hlsl.firstbitlow"); + } + case Builtin::BI__builtin_hlsl_lerp: { + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *S = EmitScalarExpr(E->getArg(2)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("lerp operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), + ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp"); + } + case Builtin::BI__builtin_hlsl_normalize: { + Value *X = EmitScalarExpr(E->getArg(0)); + + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "normalize operand must have a float representation"); + + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType(), + CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X}, + nullptr, "hlsl.normalize"); + } + case Builtin::BI__builtin_hlsl_elementwise_degrees: { + Value *X = EmitScalarExpr(E->getArg(0)); + + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "degree operand must have a float representation"); + + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(), + ArrayRef<Value *>{X}, nullptr, "hlsl.degrees"); + } + case Builtin::BI__builtin_hlsl_elementwise_frac: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("frac operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(), + ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac"); + } + case Builtin::BI__builtin_hlsl_elementwise_isinf: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *Xty = Op0->getType(); + llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext()); + if (Xty->isVectorTy()) { + auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>(); + retType = llvm::VectorType::get( + retType, ElementCount::getFixed(XVecTy->getNumElements())); + } + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("isinf operand must have a float representation"); + return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf, + ArrayRef<Value *>{Op0}, nullptr, "dx.isinf"); + } + case Builtin::BI__builtin_hlsl_mad: { + Value *M = EmitScalarExpr(E->getArg(0)); + Value *A = EmitScalarExpr(E->getArg(1)); + Value *B = EmitScalarExpr(E->getArg(2)); + if (E->getArg(0)->getType()->hasFloatingRepresentation()) + return Builder.CreateIntrinsic( + /*ReturnType*/ M->getType(), Intrinsic::fmuladd, + ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad"); + + if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { + if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) + return Builder.CreateIntrinsic( + /*ReturnType*/ M->getType(), Intrinsic::dx_imad, + ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad"); + + Value *Mul = Builder.CreateNSWMul(M, A); + return Builder.CreateNSWAdd(Mul, B); + } + assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); + if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) + return Builder.CreateIntrinsic( + /*ReturnType=*/M->getType(), Intrinsic::dx_umad, + ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad"); + + Value *Mul = Builder.CreateNUWMul(M, A); + return Builder.CreateNUWAdd(Mul, B); + } + case Builtin::BI__builtin_hlsl_elementwise_rcp: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("rcp operand must have a float representation"); + llvm::Type *Ty = Op0->getType(); + llvm::Type *EltTy = Ty->getScalarType(); + Constant *One = Ty->isVectorTy() + ? ConstantVector::getSplat( + ElementCount::getFixed( + cast<FixedVectorType>(Ty)->getNumElements()), + ConstantFP::get(EltTy, 1.0)) + : ConstantFP::get(EltTy, 1.0); + return Builder.CreateFDiv(One, Op0, "hlsl.rcp"); + } + case Builtin::BI__builtin_hlsl_elementwise_rsqrt: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("rsqrt operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), + ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt"); + } + case Builtin::BI__builtin_hlsl_elementwise_saturate: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "saturate operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), + CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0}, + nullptr, "hlsl.saturate"); + } + case Builtin::BI__builtin_hlsl_select: { + Value *OpCond = EmitScalarExpr(E->getArg(0)); + RValue RValTrue = EmitAnyExpr(E->getArg(1)); + Value *OpTrue = + RValTrue.isScalar() + ? RValTrue.getScalarVal() + : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this); + RValue RValFalse = EmitAnyExpr(E->getArg(2)); + Value *OpFalse = + RValFalse.isScalar() + ? RValFalse.getScalarVal() + : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this); + if (auto *VTy = E->getType()->getAs<VectorType>()) { + if (!OpTrue->getType()->isVectorTy()) + OpTrue = + Builder.CreateVectorSplat(VTy->getNumElements(), OpTrue, "splat"); + if (!OpFalse->getType()->isVectorTy()) + OpFalse = + Builder.CreateVectorSplat(VTy->getNumElements(), OpFalse, "splat"); + } + + Value *SelectVal = + Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select"); + if (!RValTrue.isScalar()) + Builder.CreateStore(SelectVal, ReturnValue.getAddress(), + ReturnValue.isVolatile()); + + return SelectVal; + } + case Builtin::BI__builtin_hlsl_step: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasFloatingRepresentation() && + "step operands must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(), + ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step"); + } + case Builtin::BI__builtin_hlsl_wave_active_all_true: { + Value *Op = EmitScalarExpr(E->getArg(0)); + assert(Op->getType()->isIntegerTy(1) && + "Intrinsic WaveActiveAllTrue operand must be a bool"); + + Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); + } + case Builtin::BI__builtin_hlsl_wave_active_any_true: { + Value *Op = EmitScalarExpr(E->getArg(0)); + assert(Op->getType()->isIntegerTy(1) && + "Intrinsic WaveActiveAnyTrue operand must be a bool"); + + Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); + } + case Builtin::BI__builtin_hlsl_wave_active_count_bits: { + Value *OpExpr = EmitScalarExpr(E->getArg(0)); + Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), + ArrayRef{OpExpr}); + } + case Builtin::BI__builtin_hlsl_wave_active_sum: { + // Due to the use of variadic arguments, explicitly retreive argument + Value *OpExpr = EmitScalarExpr(E->getArg(0)); + llvm::FunctionType *FT = llvm::FunctionType::get( + OpExpr->getType(), ArrayRef{OpExpr->getType()}, false); + Intrinsic::ID IID = getWaveActiveSumIntrinsic( + getTarget().getTriple().getArch(), CGM.getHLSLRuntime(), + E->getArg(0)->getType()); + + // Get overloaded name + std::string Name = + Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule()); + return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {}, + /*Local=*/false, + /*AssumeConvergent=*/true), + ArrayRef{OpExpr}, "hlsl.wave.active.sum"); + } + case Builtin::BI__builtin_hlsl_wave_active_max: { + // Due to the use of variadic arguments, explicitly retreive argument + Value *OpExpr = EmitScalarExpr(E->getArg(0)); + llvm::FunctionType *FT = llvm::FunctionType::get( + OpExpr->getType(), ArrayRef{OpExpr->getType()}, false); + Intrinsic::ID IID = getWaveActiveMaxIntrinsic( + getTarget().getTriple().getArch(), CGM.getHLSLRuntime(), + E->getArg(0)->getType()); + + // Get overloaded name + std::string Name = + Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule()); + return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {}, + /*Local=*/false, + /*AssumeConvergent=*/true), + ArrayRef{OpExpr}, "hlsl.wave.active.max"); + } + case Builtin::BI__builtin_hlsl_wave_get_lane_index: { + // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in + // defined in SPIRVBuiltins.td. So instead we manually get the matching name + // for the DirectX intrinsic and the demangled builtin name + switch (CGM.getTarget().getTriple().getArch()) { + case llvm::Triple::dxil: + return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), Intrinsic::dx_wave_getlaneindex)); + case llvm::Triple::spirv: + return EmitRuntimeCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, {}, false), + "__hlsl_wave_get_lane_index", {}, false, true)); + default: + llvm_unreachable( + "Intrinsic WaveGetLaneIndex not supported by target architecture"); + } + } + case Builtin::BI__builtin_hlsl_wave_is_first_lane: { + Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } + case Builtin::BI__builtin_hlsl_wave_read_lane_at: { + // Due to the use of variadic arguments we must explicitly retreive them and + // create our function type. + Value *OpExpr = EmitScalarExpr(E->getArg(0)); + Value *OpIndex = EmitScalarExpr(E->getArg(1)); + llvm::FunctionType *FT = llvm::FunctionType::get( + OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()}, + false); + + // Get overloaded name + std::string Name = + Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(), + ArrayRef{OpExpr->getType()}, &CGM.getModule()); + return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {}, + /*Local=*/false, + /*AssumeConvergent=*/true), + ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane"); + } + case Builtin::BI__builtin_hlsl_elementwise_sign: { + auto *Arg0 = E->getArg(0); + Value *Op0 = EmitScalarExpr(Arg0); + llvm::Type *Xty = Op0->getType(); + llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext()); + if (Xty->isVectorTy()) { + auto *XVecTy = Arg0->getType()->castAs<VectorType>(); + retType = llvm::VectorType::get( + retType, ElementCount::getFixed(XVecTy->getNumElements())); + } + assert((Arg0->getType()->hasFloatingRepresentation() || + Arg0->getType()->hasIntegerRepresentation()) && + "sign operand must have a float or int representation"); + + if (Arg0->getType()->hasUnsignedIntegerRepresentation()) { + Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0)); + return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0), + ConstantInt::get(retType, 1), "hlsl.sign"); + } + + return Builder.CreateIntrinsic( + retType, CGM.getHLSLRuntime().getSignIntrinsic(), + ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign"); + } + case Builtin::BI__builtin_hlsl_elementwise_radians: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "radians operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), + CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0}, + nullptr, "hlsl.radians"); + } + case Builtin::BI__builtin_hlsl_buffer_update_counter: { + Value *ResHandle = EmitScalarExpr(E->getArg(0)); + Value *Offset = EmitScalarExpr(E->getArg(1)); + Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true); + return Builder.CreateIntrinsic( + /*ReturnType=*/Offset->getType(), + CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(), + ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr); + } + case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { + + assert((E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() && + E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) && + "asuint operands types mismatch"); + return handleHlslSplitdouble(E, this); + } + case Builtin::BI__builtin_hlsl_elementwise_clip: + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "clip operands types mismatch"); + return handleHlslClip(E, this); + case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: { + Intrinsic::ID ID = + CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } + } + return nullptr; +} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 94a908197d795..ebe2fbd7db295 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -81,6 +81,7 @@ add_clang_library(clangCodeGen CGExprScalar.cpp CGGPUBuiltin.cpp CGHLSLRuntime.cpp + CGHLSLBuiltins.cpp CGLoopInfo.cpp CGNonTrivialStruct.cpp CGObjC.cpp diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 72f9e5a8174d2..e312e59aaae4b 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "CGBuiltin.h" -#include "CGHLSLRuntime.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -185,150 +184,6 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, return Call; } -static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) { - assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() && - E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) && - "asdouble operands types mismatch"); - Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0)); - Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1)); - - llvm::Type *ResultType = CGF.DoubleTy; - int N = 1; - if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) { - N = VTy->getNumElements(); - ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N); - } - - if (CGF.CGM.getTarget().getTriple().isDXIL()) - return CGF.Builder.CreateIntrinsic( - /*ReturnType=*/ResultType, Intrinsic::dx_asdouble, - {OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble"); - - if (!E->getArg(0)->getType()->isVectorType()) { - OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits); - OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits); - } - - llvm::SmallVector<int> Mask; - for (int i = 0; i < N; i++) { - Mask.push_back(i); - Mask.push_back(i + N); - } - - Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask); - - return CGF.Builder.CreateBitCast(BitVec, ResultType); -} - -static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { - Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); - - Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy); - Value *CMP; - Value *LastInstr; - - if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) { - FZeroConst = ConstantVector::getSplat( - ElementCount::getFixed(VecTy->getNumElements()), FZeroConst); - auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); - CMP = CGF->Builder.CreateIntrinsic( - CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(), - {FCompInst}); - } else - CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); - - if (CGF->CGM.getTarget().getTriple().isDXIL()) - LastInstr = CGF->Builder.CreateIntrinsic( - CGF->VoidTy, Intrinsic::dx_discard, {CMP}); - else if (CGF->CGM.getTarget().getTriple().isSPIRV()) { - BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn); - BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn); - - CGF->Builder.CreateCondBr(CMP, LT0, End); - - CGF->Builder.SetInsertPoint(LT0); - - CGF->Builder.CreateIntrinsic(CGF->VoidTy, Intrinsic::spv_discard, {}); - - LastInstr = CGF->Builder.CreateBr(End); - CGF->Builder.SetInsertPoint(End); - } else { - llvm_unreachable("Backend Codegen not supported."); - } - - return LastInstr; -} - -static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { - Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); - const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1)); - const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2)); - - CallArgList Args; - LValue Op1TmpLValue = - CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType()); - LValue Op2TmpLValue = - CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType()); - - if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) - Args.reverseWritebacks(); - - Value *LowBits = nullptr; - Value *HighBits = nullptr; - - if (CGF->CGM.getTarget().getTriple().isDXIL()) { - - llvm::Type *RetElementTy = CGF->Int32Ty; - if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) - RetElementTy = llvm::VectorType::get( - CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements())); - auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy); - - CallInst *CI = CGF->Builder.CreateIntrinsic( - RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble"); - - LowBits = CGF->Builder.CreateExtractValue(CI, 0); - HighBits = CGF->Builder.CreateExtractValue(CI, 1); - - } else { - // For Non DXIL targets we generate the instructions. - - if (!Op0->getType()->isVectorTy()) { - FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); - Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy); - - LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0); - HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1); - } else { - int NumElements = 1; - if (const auto *VecTy = - E->getArg(0)->getType()->getAs<clang::VectorType>()) - NumElements = VecTy->getNumElements(); - - FixedVectorType *Uint32VecTy = - FixedVectorType::get(CGF->Int32Ty, NumElements * 2); - Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy); - if (NumElements == 1) { - LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0); - HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1); - } else { - SmallVector<int> EvenMask, OddMask; - for (int I = 0, E = NumElements; I != E; ++I) { - EvenMask.push_back(I * 2); - OddMask.push_back(I * 2 + 1); - } - LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask); - HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask); - } - } - } - CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress()); - auto *LastInst = - CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress()); - CGF->EmitWritebacks(Args); - return LastInst; -} - // For processing memory ordering and memory scope arguments of various // amdgcn builtins. // \p Order takes a C++11 comptabile memory-ordering specifier and converts @@ -409,585 +264,6 @@ llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, return Arg; } -// Return dot product intrinsic that corresponds to the QT scalar type -static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { - if (QT->isFloatingType()) - return RT.getFDotIntrinsic(); - if (QT->isSignedIntegerType()) - return RT.getSDotIntrinsic(); - assert(QT->isUnsignedIntegerType()); - return RT.getUDotIntrinsic(); -} - -static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { - if (QT->hasSignedIntegerRepresentation()) { - return RT.getFirstBitSHighIntrinsic(); - } - - assert(QT->hasUnsignedIntegerRepresentation()); - return RT.getFirstBitUHighIntrinsic(); -} - -// Return wave active sum that corresponds to the QT scalar type -static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, - CGHLSLRuntime &RT, QualType QT) { - switch (Arch) { - case llvm::Triple::spirv: - return Intrinsic::spv_wave_reduce_sum; - case llvm::Triple::dxil: { - if (QT->isUnsignedIntegerType()) - return Intrinsic::dx_wave_reduce_usum; - return Intrinsic::dx_wave_reduce_sum; - } - default: - llvm_unreachable("Intrinsic WaveActiveSum" - " not supported by target architecture"); - } -} - -// Return wave active sum that corresponds to the QT scalar type -static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch, - CGHLSLRuntime &RT, QualType QT) { - switch (Arch) { - case llvm::Triple::spirv: - if (QT->isUnsignedIntegerType()) - return Intrinsic::spv_wave_reduce_umax; - return Intrinsic::spv_wave_reduce_max; - case llvm::Triple::dxil: { - if (QT->isUnsignedIntegerType()) - return Intrinsic::dx_wave_reduce_umax; - return Intrinsic::dx_wave_reduce_max; - } - default: - llvm_unreachable("Intrinsic WaveActiveMax" - " not supported by target architecture"); - } -} - -Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, - const CallExpr *E, - ReturnValueSlot ReturnValue) { - if (!getLangOpts().HLSL) - return nullptr; - - switch (BuiltinID) { - case Builtin::BI__builtin_hlsl_adduint64: { - Value *OpA = EmitScalarExpr(E->getArg(0)); - Value *OpB = EmitScalarExpr(E->getArg(1)); - QualType Arg0Ty = E->getArg(0)->getType(); - uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements(); - assert(Arg0Ty == E->getArg(1)->getType() && - "AddUint64 operand types must match"); - assert(Arg0Ty->hasIntegerRepresentation() && - "AddUint64 operands must have an integer representation"); - assert((NumElements == 2 || NumElements == 4) && - "AddUint64 operands must have 2 or 4 elements"); - - llvm::Value *LowA; - llvm::Value *HighA; - llvm::Value *LowB; - llvm::Value *HighB; - - // Obtain low and high words of inputs A and B - if (NumElements == 2) { - LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA"); - HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA"); - LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB"); - HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB"); - } else { - LowA = Builder.CreateShuffleVector(OpA, {0, 2}, "LowA"); - HighA = Builder.CreateShuffleVector(OpA, {1, 3}, "HighA"); - LowB = Builder.CreateShuffleVector(OpB, {0, 2}, "LowB"); - HighB = Builder.CreateShuffleVector(OpB, {1, 3}, "HighB"); - } - - // Use an uadd_with_overflow to compute the sum of low words and obtain a - // carry value - llvm::Value *Carry; - llvm::Value *LowSum = EmitOverflowIntrinsic( - *this, Intrinsic::uadd_with_overflow, LowA, LowB, Carry); - llvm::Value *ZExtCarry = - Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt"); - - // Sum the high words and the carry - llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum"); - llvm::Value *HighSumPlusCarry = - Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry"); - - if (NumElements == 4) { - return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, - {0, 2, 1, 3}, - "hlsl.AddUint64"); - } - - llvm::Value *Result = PoisonValue::get(OpA->getType()); - Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0, - "hlsl.AddUint64.upto0"); - Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1, - "hlsl.AddUint64"); - return Result; - } - case Builtin::BI__builtin_hlsl_resource_getpointer: { - Value *HandleOp = EmitScalarExpr(E->getArg(0)); - Value *IndexOp = EmitScalarExpr(E->getArg(1)); - - // TODO: Map to an hlsl_device address space. - llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext()); - - return Builder.CreateIntrinsic( - RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(), - ArrayRef<Value *>{HandleOp, IndexOp}); - } - case Builtin::BI__builtin_hlsl_all: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - return Builder.CreateIntrinsic( - /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), - CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, - "hlsl.all"); - } - case Builtin::BI__builtin_hlsl_and: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - return Builder.CreateAnd(Op0, Op1, "hlsl.and"); - } - case Builtin::BI__builtin_hlsl_or: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - return Builder.CreateOr(Op0, Op1, "hlsl.or"); - } - case Builtin::BI__builtin_hlsl_any: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - return Builder.CreateIntrinsic( - /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), - CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, - "hlsl.any"); - } - case Builtin::BI__builtin_hlsl_asdouble: - return handleAsDoubleBuiltin(*this, E); - case Builtin::BI__builtin_hlsl_elementwise_clamp: { - Value *OpX = EmitScalarExpr(E->getArg(0)); - Value *OpMin = EmitScalarExpr(E->getArg(1)); - Value *OpMax = EmitScalarExpr(E->getArg(2)); - - QualType Ty = E->getArg(0)->getType(); - if (auto *VecTy = Ty->getAs<VectorType>()) - Ty = VecTy->getElementType(); - - Intrinsic::ID Intr; - if (Ty->isFloatingType()) { - Intr = CGM.getHLSLRuntime().getNClampIntrinsic(); - } else if (Ty->isUnsignedIntegerType()) { - Intr = CGM.getHLSLRuntime().getUClampIntrinsic(); - } else { - assert(Ty->isSignedIntegerType()); - Intr = CGM.getHLSLRuntime().getSClampIntrinsic(); - } - return Builder.CreateIntrinsic( - /*ReturnType=*/OpX->getType(), Intr, - ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp"); - } - case Builtin::BI__builtin_hlsl_cross: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - E->getArg(1)->getType()->hasFloatingRepresentation() && - "cross operands must have a float representation"); - // make sure each vector has exactly 3 elements - assert( - E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 && - E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 && - "input vectors must have 3 elements each"); - return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(), - ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross"); - } - case Builtin::BI__builtin_hlsl_dot: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *T0 = Op0->getType(); - llvm::Type *T1 = Op1->getType(); - - // If the arguments are scalars, just emit a multiply - if (!T0->isVectorTy() && !T1->isVectorTy()) { - if (T0->isFloatingPointTy()) - return Builder.CreateFMul(Op0, Op1, "hlsl.dot"); - - if (T0->isIntegerTy()) - return Builder.CreateMul(Op0, Op1, "hlsl.dot"); - - llvm_unreachable( - "Scalar dot product is only supported on ints and floats."); - } - // For vectors, validate types and emit the appropriate intrinsic - - // A VectorSplat should have happened - assert(T0->isVectorTy() && T1->isVectorTy() && - "Dot product of vector and scalar is not supported."); - - auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>(); - [[maybe_unused]] auto *VecTy1 = - E->getArg(1)->getType()->castAs<VectorType>(); - - assert(VecTy0->getElementType() == VecTy1->getElementType() && - "Dot product of vectors need the same element types."); - - assert(VecTy0->getNumElements() == VecTy1->getNumElements() && - "Dot product requires vectors to be of the same size."); - - return Builder.CreateIntrinsic( - /*ReturnType=*/T0->getScalarType(), - getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()), - ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot"); - } - case Builtin::BI__builtin_hlsl_dot4add_i8packed: { - Value *A = EmitScalarExpr(E->getArg(0)); - Value *B = EmitScalarExpr(E->getArg(1)); - Value *C = EmitScalarExpr(E->getArg(2)); - - Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic(); - return Builder.CreateIntrinsic( - /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr, - "hlsl.dot4add.i8packed"); - } - case Builtin::BI__builtin_hlsl_dot4add_u8packed: { - Value *A = EmitScalarExpr(E->getArg(0)); - Value *B = EmitScalarExpr(E->getArg(1)); - Value *C = EmitScalarExpr(E->getArg(2)); - - Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic(); - return Builder.CreateIntrinsic( - /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr, - "hlsl.dot4add.u8packed"); - } - case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { - Value *X = EmitScalarExpr(E->getArg(0)); - - return Builder.CreateIntrinsic( - /*ReturnType=*/ConvertType(E->getType()), - getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()), - ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh"); - } - case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { - Value *X = EmitScalarExpr(E->getArg(0)); - - return Builder.CreateIntrinsic( - /*ReturnType=*/ConvertType(E->getType()), - CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X}, - nullptr, "hlsl.firstbitlow"); - } - case Builtin::BI__builtin_hlsl_lerp: { - Value *X = EmitScalarExpr(E->getArg(0)); - Value *Y = EmitScalarExpr(E->getArg(1)); - Value *S = EmitScalarExpr(E->getArg(2)); - if (!E->getArg(0)->getType()->hasFloatingRepresentation()) - llvm_unreachable("lerp operand must have a float representation"); - return Builder.CreateIntrinsic( - /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), - ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp"); - } - case Builtin::BI__builtin_hlsl_normalize: { - Value *X = EmitScalarExpr(E->getArg(0)); - - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - "normalize operand must have a float representation"); - - return Builder.CreateIntrinsic( - /*ReturnType=*/X->getType(), - CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X}, - nullptr, "hlsl.normalize"); - } - case Builtin::BI__builtin_hlsl_elementwise_degrees: { - Value *X = EmitScalarExpr(E->getArg(0)); - - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - "degree operand must have a float representation"); - - return Builder.CreateIntrinsic( - /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(), - ArrayRef<Value *>{X}, nullptr, "hlsl.degrees"); - } - case Builtin::BI__builtin_hlsl_elementwise_frac: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - if (!E->getArg(0)->getType()->hasFloatingRepresentation()) - llvm_unreachable("frac operand must have a float representation"); - return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(), - ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac"); -} -case Builtin::BI__builtin_hlsl_elementwise_isinf: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - llvm::Type *Xty = Op0->getType(); - llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext()); - if (Xty->isVectorTy()) { - auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>(); - retType = llvm::VectorType::get( - retType, ElementCount::getFixed(XVecTy->getNumElements())); - } - if (!E->getArg(0)->getType()->hasFloatingRepresentation()) - llvm_unreachable("isinf operand must have a float representation"); - return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf, - ArrayRef<Value *>{Op0}, nullptr, "dx.isinf"); - } - case Builtin::BI__builtin_hlsl_mad: { - Value *M = EmitScalarExpr(E->getArg(0)); - Value *A = EmitScalarExpr(E->getArg(1)); - Value *B = EmitScalarExpr(E->getArg(2)); - if (E->getArg(0)->getType()->hasFloatingRepresentation()) - return Builder.CreateIntrinsic( - /*ReturnType*/ M->getType(), Intrinsic::fmuladd, - ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad"); - - if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { - if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) - return Builder.CreateIntrinsic( - /*ReturnType*/ M->getType(), Intrinsic::dx_imad, - ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad"); - - Value *Mul = Builder.CreateNSWMul(M, A); - return Builder.CreateNSWAdd(Mul, B); - } - assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); - if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) - return Builder.CreateIntrinsic( - /*ReturnType=*/M->getType(), Intrinsic::dx_umad, - ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad"); - - Value *Mul = Builder.CreateNUWMul(M, A); - return Builder.CreateNUWAdd(Mul, B); - } - case Builtin::BI__builtin_hlsl_elementwise_rcp: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - if (!E->getArg(0)->getType()->hasFloatingRepresentation()) - llvm_unreachable("rcp operand must have a float representation"); - llvm::Type *Ty = Op0->getType(); - llvm::Type *EltTy = Ty->getScalarType(); - Constant *One = Ty->isVectorTy() - ? ConstantVector::getSplat( - ElementCount::getFixed( - cast<FixedVectorType>(Ty)->getNumElements()), - ConstantFP::get(EltTy, 1.0)) - : ConstantFP::get(EltTy, 1.0); - return Builder.CreateFDiv(One, Op0, "hlsl.rcp"); - } - case Builtin::BI__builtin_hlsl_elementwise_rsqrt: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - if (!E->getArg(0)->getType()->hasFloatingRepresentation()) - llvm_unreachable("rsqrt operand must have a float representation"); - return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), - ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt"); - } - case Builtin::BI__builtin_hlsl_elementwise_saturate: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - "saturate operand must have a float representation"); - return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), - CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0}, - nullptr, "hlsl.saturate"); - } - case Builtin::BI__builtin_hlsl_select: { - Value *OpCond = EmitScalarExpr(E->getArg(0)); - RValue RValTrue = EmitAnyExpr(E->getArg(1)); - Value *OpTrue = - RValTrue.isScalar() - ? RValTrue.getScalarVal() - : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this); - RValue RValFalse = EmitAnyExpr(E->getArg(2)); - Value *OpFalse = - RValFalse.isScalar() - ? RValFalse.getScalarVal() - : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this); - if (auto *VTy = E->getType()->getAs<VectorType>()) { - if (!OpTrue->getType()->isVectorTy()) - OpTrue = - Builder.CreateVectorSplat(VTy->getNumElements(), OpTrue, "splat"); - if (!OpFalse->getType()->isVectorTy()) - OpFalse = - Builder.CreateVectorSplat(VTy->getNumElements(), OpFalse, "splat"); - } - - Value *SelectVal = - Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select"); - if (!RValTrue.isScalar()) - Builder.CreateStore(SelectVal, ReturnValue.getAddress(), - ReturnValue.isVolatile()); - - return SelectVal; - } - case Builtin::BI__builtin_hlsl_step: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - E->getArg(1)->getType()->hasFloatingRepresentation() && - "step operands must have a float representation"); - return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(), - ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step"); - } - case Builtin::BI__builtin_hlsl_wave_active_all_true: { - Value *Op = EmitScalarExpr(E->getArg(0)); - assert(Op->getType()->isIntegerTy(1) && - "Intrinsic WaveActiveAllTrue operand must be a bool"); - - Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic(); - return EmitRuntimeCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); - } - case Builtin::BI__builtin_hlsl_wave_active_any_true: { - Value *Op = EmitScalarExpr(E->getArg(0)); - assert(Op->getType()->isIntegerTy(1) && - "Intrinsic WaveActiveAnyTrue operand must be a bool"); - - Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic(); - return EmitRuntimeCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op}); - } - case Builtin::BI__builtin_hlsl_wave_active_count_bits: { - Value *OpExpr = EmitScalarExpr(E->getArg(0)); - Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic(); - return EmitRuntimeCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), - ArrayRef{OpExpr}); - } - case Builtin::BI__builtin_hlsl_wave_active_sum: { - // Due to the use of variadic arguments, explicitly retreive argument - Value *OpExpr = EmitScalarExpr(E->getArg(0)); - llvm::FunctionType *FT = llvm::FunctionType::get( - OpExpr->getType(), ArrayRef{OpExpr->getType()}, false); - Intrinsic::ID IID = getWaveActiveSumIntrinsic( - getTarget().getTriple().getArch(), CGM.getHLSLRuntime(), - E->getArg(0)->getType()); - - // Get overloaded name - std::string Name = - Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule()); - return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {}, - /*Local=*/false, - /*AssumeConvergent=*/true), - ArrayRef{OpExpr}, "hlsl.wave.active.sum"); - } - case Builtin::BI__builtin_hlsl_wave_active_max: { - // Due to the use of variadic arguments, explicitly retreive argument - Value *OpExpr = EmitScalarExpr(E->getArg(0)); - llvm::FunctionType *FT = llvm::FunctionType::get( - OpExpr->getType(), ArrayRef{OpExpr->getType()}, false); - Intrinsic::ID IID = getWaveActiveMaxIntrinsic( - getTarget().getTriple().getArch(), CGM.getHLSLRuntime(), - E->getArg(0)->getType()); - - // Get overloaded name - std::string Name = - Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule()); - return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {}, - /*Local=*/false, - /*AssumeConvergent=*/true), - ArrayRef{OpExpr}, "hlsl.wave.active.max"); - } - case Builtin::BI__builtin_hlsl_wave_get_lane_index: { - // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in - // defined in SPIRVBuiltins.td. So instead we manually get the matching name - // for the DirectX intrinsic and the demangled builtin name - switch (CGM.getTarget().getTriple().getArch()) { - case llvm::Triple::dxil: - return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration( - &CGM.getModule(), Intrinsic::dx_wave_getlaneindex)); - case llvm::Triple::spirv: - return EmitRuntimeCall(CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, {}, false), - "__hlsl_wave_get_lane_index", {}, false, true)); - default: - llvm_unreachable( - "Intrinsic WaveGetLaneIndex not supported by target architecture"); - } - } - case Builtin::BI__builtin_hlsl_wave_is_first_lane: { - Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic(); - return EmitRuntimeCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); - } - case Builtin::BI__builtin_hlsl_wave_read_lane_at: { - // Due to the use of variadic arguments we must explicitly retreive them and - // create our function type. - Value *OpExpr = EmitScalarExpr(E->getArg(0)); - Value *OpIndex = EmitScalarExpr(E->getArg(1)); - llvm::FunctionType *FT = llvm::FunctionType::get( - OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()}, - false); - - // Get overloaded name - std::string Name = - Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(), - ArrayRef{OpExpr->getType()}, &CGM.getModule()); - return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {}, - /*Local=*/false, - /*AssumeConvergent=*/true), - ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane"); - } - case Builtin::BI__builtin_hlsl_elementwise_sign: { - auto *Arg0 = E->getArg(0); - Value *Op0 = EmitScalarExpr(Arg0); - llvm::Type *Xty = Op0->getType(); - llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext()); - if (Xty->isVectorTy()) { - auto *XVecTy = Arg0->getType()->castAs<VectorType>(); - retType = llvm::VectorType::get( - retType, ElementCount::getFixed(XVecTy->getNumElements())); - } - assert((Arg0->getType()->hasFloatingRepresentation() || - Arg0->getType()->hasIntegerRepresentation()) && - "sign operand must have a float or int representation"); - - if (Arg0->getType()->hasUnsignedIntegerRepresentation()) { - Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0)); - return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0), - ConstantInt::get(retType, 1), "hlsl.sign"); - } - - return Builder.CreateIntrinsic( - retType, CGM.getHLSLRuntime().getSignIntrinsic(), - ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign"); - } - case Builtin::BI__builtin_hlsl_elementwise_radians: { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - "radians operand must have a float representation"); - return Builder.CreateIntrinsic( - /*ReturnType=*/Op0->getType(), - CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0}, - nullptr, "hlsl.radians"); - } - case Builtin::BI__builtin_hlsl_buffer_update_counter: { - Value *ResHandle = EmitScalarExpr(E->getArg(0)); - Value *Offset = EmitScalarExpr(E->getArg(1)); - Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true); - return Builder.CreateIntrinsic( - /*ReturnType=*/Offset->getType(), - CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(), - ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr); - } - case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { - - assert((E->getArg(0)->getType()->hasFloatingRepresentation() && - E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() && - E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) && - "asuint operands types mismatch"); - return handleHlslSplitdouble(E, this); - } - case Builtin::BI__builtin_hlsl_elementwise_clip: - assert(E->getArg(0)->getType()->hasFloatingRepresentation() && - "clip operands types mismatch"); - return handleHlslClip(E, this); - case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: { - Intrinsic::ID ID = - CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic(); - return EmitRuntimeCall( - Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); - } - } - return nullptr; -} - void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E) { constexpr const char *Tag = "amdgpu-as"; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits