https://github.com/bharadwajy updated https://github.com/llvm/llvm-project/pull/104619
>From e9fd01be8d43b2eb7ccd4894022ecec4c596d957 Mon Sep 17 00:00:00 2001 From: Bharadwaj Yadavalli <bharadwaj.yadava...@microsoft.com> Date: Thu, 1 Aug 2024 02:46:05 +0000 Subject: [PATCH 1/3] Implement support to compile HLSL intrinsic "saturate" to DXIL Add SPIRV Codegen support to transform saturate(x) to clamp(x, 0.0, 1.0) Add tests for DXIL and SPIRV CodeGen. --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGBuiltin.cpp | 9 + clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 41 ++- clang/lib/Sema/SemaHLSL.cpp | 3 +- clang/test/CodeGenHLSL/builtins/saturate.hlsl | 54 ++++ .../SemaHLSL/BuiltIns/saturate-errors.hlsl | 31 ++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 3 +- llvm/lib/Target/DirectX/DXIL.td | 10 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 2 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 80 ++++- llvm/test/CodeGen/DirectX/saturate.ll | 276 ++++++++++++++++++ llvm/test/CodeGen/DirectX/saturate_errors.ll | 14 + .../CodeGen/SPIRV/hlsl-intrinsics/saturate.ll | 83 ++++++ 15 files changed, 597 insertions(+), 17 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/saturate.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/saturate.ll create mode 100644 llvm/test/CodeGen/DirectX/saturate_errors.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 036366cdadf4aa..ac33672a32b336 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4745,6 +4745,12 @@ def HLSLRSqrt : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLSaturate : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_saturate"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f424ddaa175400..f4353f595efbac 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18667,6 +18667,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt"); } + case Builtin::BI__builtin_hlsl_elementwise_saturate: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("saturate operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), + CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0}, + nullptr, "hlsl.saturate"); + } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { return EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index cd604bea2e763d..b1455b5779acf9 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -79,6 +79,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) + GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate) GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id) //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 678cdc77f8a71b..6d38b668fe770e 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -916,7 +916,7 @@ float4 lerp(float4, float4, float4); /// \brief Returns the length of the specified floating-point vector. /// \param x [in] The vector of floats, or a scalar float. /// -/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + �). +/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...). _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_length) @@ -1564,6 +1564,45 @@ float3 round(float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven) float4 round(float4); +//===----------------------------------------------------------------------===// +// saturate builtins +//===----------------------------------------------------------------------===// + +/// \fn T saturate(T Val) +/// \brief Returns input value, \a Val, clamped within the range of 0.0f +/// to 1.0f. \param Val The input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half saturate(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half2 saturate(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half3 saturate(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +half4 saturate(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float saturate(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float2 saturate(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float3 saturate(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +float4 saturate(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double saturate(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double2 saturate(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double3 saturate(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate) +double4 saturate(double4); + //===----------------------------------------------------------------------===// // sin builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index e3e926465e799e..df01549cc2eeb6 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -356,7 +356,7 @@ static bool isLegalTypeForHLSLSV_DispatchThreadID(QualType T) { return true; } -void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) { +void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) { auto *VD = cast<ValueDecl>(D); if (!isLegalTypeForHLSLSV_DispatchThreadID(VD->getType())) { Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type) @@ -1045,6 +1045,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_saturate: case Builtin::BI__builtin_hlsl_elementwise_rcp: { if (CheckAllArgsHaveFloatRepresentation(&SemaRef, TheCall)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/saturate.hlsl b/clang/test/CodeGenHLSL/builtins/saturate.hlsl new file mode 100644 index 00000000000000..970d7b7371b1eb --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/saturate.hlsl @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF + +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.dx.saturate.f16( +// NO_HALF: define noundef float @"?test_saturate_half +// NO_HALF: call float @llvm.dx.saturate.f32( +half test_saturate_half(half p0) { return saturate(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.dx.saturate.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_saturate_half2 +// NO_HALF: call <2 x float> @llvm.dx.saturate.v2f32( +half2 test_saturate_half2(half2 p0) { return saturate(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.dx.saturate.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_saturate_half3 +// NO_HALF: call <3 x float> @llvm.dx.saturate.v3f32( +half3 test_saturate_half3(half3 p0) { return saturate(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.dx.saturate.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_saturate_half4 +// NO_HALF: call <4 x float> @llvm.dx.saturate.v4f32( +half4 test_saturate_half4(half4 p0) { return saturate(p0); } + +// CHECK: define noundef float @"?test_saturate_float +// CHECK: call float @llvm.dx.saturate.f32( +float test_saturate_float(float p0) { return saturate(p0); } +// CHECK: define noundef <2 x float> @"?test_saturate_float2 +// CHECK: call <2 x float> @llvm.dx.saturate.v2f32 +float2 test_saturate_float2(float2 p0) { return saturate(p0); } +// CHECK: define noundef <3 x float> @"?test_saturate_float3 +// CHECK: call <3 x float> @llvm.dx.saturate.v3f32 +float3 test_saturate_float3(float3 p0) { return saturate(p0); } +// CHECK: define noundef <4 x float> @"?test_saturate_float4 +// CHECK: call <4 x float> @llvm.dx.saturate.v4f32 +float4 test_saturate_float4(float4 p0) { return saturate(p0); } + +// CHECK: define noundef double @ +// CHECK: call double @llvm.dx.saturate.f64( +double test_saturate_double(double p0) { return saturate(p0); } +// CHECK: define noundef <2 x double> @ +// CHECK: call <2 x double> @llvm.dx.saturate.v2f64 +double2 test_saturate_double2(double2 p0) { return saturate(p0); } +// CHECK: define noundef <3 x double> @ +// CHECK: call <3 x double> @llvm.dx.saturate.v3f64 +double3 test_saturate_double3(double3 p0) { return saturate(p0); } +// CHECK: define noundef <4 x double> @ +// CHECK: call <4 x double> @llvm.dx.saturate.v4f64 +double4 test_saturate_double4(double4 p0) { return saturate(p0); } diff --git a/clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl new file mode 100644 index 00000000000000..721b28f86f950f --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected -Werror + +float2 test_no_arg() { + return saturate(); + // expected-error@-1 {{no matching function for call to 'saturate'}} +} + +float2 test_too_many_arg(float2 p0) { + return saturate(p0, p0, p0, p0); + // expected-error@-1 {{no matching function for call to 'saturate'}} +} + +float2 test_saturate_vector_size_mismatch(float3 p0) { + return saturate(p0); + // expected-error@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>'}} +} + +float2 test_saturate_float2_int_splat(int p0) { + return saturate(p0); + // expected-error@-1 {{call to 'saturate' is ambiguous}} +} + +float2 test_saturate_int_vect_to_float_vec_promotion(int2 p0) { + return saturate(p0); + // expected-error@-1 {{call to 'saturate' is ambiguous}} +} + +float test_saturate_bool_type_promotion(bool p0) { + return saturate(p0); + // expected-error@-1 {{call to 'saturate' is ambiguous}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index c9102aa3dd972b..a0807a01ea5ab2 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -34,6 +34,7 @@ def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; +def int_dx_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_dx_dot2 : Intrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 1b5e463822749e..4e130ad0c907d9 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -61,9 +61,10 @@ let TargetPrefix = "spv" in { def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; - def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], + def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem, IntrWillReturn] >; def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>; def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; + def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 67015cff78a79a..ac378db2c9b499 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -325,6 +325,16 @@ def Abs : DXILOp<6, unary> { let attributes = [Attributes<DXIL1_0, [ReadNone]>]; } +def Saturate : DXILOp<7, unary> { + let Doc = "Clamps a single or double precision floating point value to [0.0f...1.0f]."; + let LLVMIntrinsic = int_dx_saturate; + let arguments = [overloadTy]; + let result = overloadTy; + let overloads = [Overloads<DXIL1_0, [halfTy, floatTy, doubleTy]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + def IsInf : DXILOp<9, isSpecialFloat> { let Doc = "Determines if the specified value is infinite."; let LLVMIntrinsic = int_dx_isinf; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 2c481d15be5bde..9b467dff3e126f 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -46,6 +47,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_normalize: case Intrinsic::dx_sdot: case Intrinsic::dx_udot: + case Intrinsic::dx_saturate: return true; } return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 7cb19279518989..ecb3cee4e781af 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -247,6 +247,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectNormalize(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectSaturate(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectSpvThreadId(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; @@ -259,6 +262,7 @@ class SPIRVInstructionSelector : public InstructionSelector { Register buildZerosValF(const SPIRVType *ResType, MachineInstr &I) const; Register buildOnesVal(bool AllOnes, const SPIRVType *ResType, MachineInstr &I) const; + Register buildOnesValF(const SPIRVType *ResType, MachineInstr &I) const; bool wrapIntoSpecConstantOp(MachineInstr &I, SmallVector<Register> &CompositeArgs) const; @@ -1285,6 +1289,34 @@ static unsigned getBoolCmpOpcode(unsigned PredNum) { } } +static APFloat getZeroFP(const Type *LLVMFloatTy) { + if (!LLVMFloatTy) + return APFloat::getZero(APFloat::IEEEsingle()); + switch (LLVMFloatTy->getScalarType()->getTypeID()) { + case Type::HalfTyID: + return APFloat::getZero(APFloat::IEEEhalf()); + default: + case Type::FloatTyID: + return APFloat::getZero(APFloat::IEEEsingle()); + case Type::DoubleTyID: + return APFloat::getZero(APFloat::IEEEdouble()); + } +} + +static APFloat getOneFP(const Type *LLVMFloatTy) { + if (!LLVMFloatTy) + return APFloat::getOne(APFloat::IEEEsingle()); + switch (LLVMFloatTy->getScalarType()->getTypeID()) { + case Type::HalfTyID: + return APFloat::getOne(APFloat::IEEEhalf()); + default: + case Type::FloatTyID: + return APFloat::getOne(APFloat::IEEEsingle()); + case Type::DoubleTyID: + return APFloat::getOne(APFloat::IEEEdouble()); + } +} + bool SPIRVInstructionSelector::selectAnyOrAll(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, @@ -1446,6 +1478,28 @@ bool SPIRVInstructionSelector::selectRsqrt(Register ResVReg, .constrainAllUses(TII, TRI, RBI); } +/// Transform saturate(x) to clamp(x, 0.0f, 1.0f) as SPIRV +/// does not have a saturate builtin. +bool SPIRVInstructionSelector::selectSaturate(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + assert(I.getNumOperands() == 3); + assert(I.getOperand(2).isReg()); + MachineBasicBlock &BB = *I.getParent(); + Register VZero = buildZerosValF(ResType, I); + Register VOne = buildOnesValF(ResType, I); + + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450)) + .addImm(GL::FClamp) + .addUse(I.getOperand(2).getReg()) + .addUse(VZero) + .addUse(VOne) + .constrainAllUses(TII, TRI, RBI); +} + bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -1724,20 +1778,6 @@ Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType, return GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull); } -static APFloat getZeroFP(const Type *LLVMFloatTy) { - if (!LLVMFloatTy) - return APFloat::getZero(APFloat::IEEEsingle()); - switch (LLVMFloatTy->getScalarType()->getTypeID()) { - case Type::HalfTyID: - return APFloat::getZero(APFloat::IEEEhalf()); - default: - case Type::FloatTyID: - return APFloat::getZero(APFloat::IEEEsingle()); - case Type::DoubleTyID: - return APFloat::getZero(APFloat::IEEEdouble()); - } -} - Register SPIRVInstructionSelector::buildZerosValF(const SPIRVType *ResType, MachineInstr &I) const { // OpenCL uses nulls for Zero. In HLSL we don't use null constants. @@ -1748,6 +1788,16 @@ Register SPIRVInstructionSelector::buildZerosValF(const SPIRVType *ResType, return GR.getOrCreateConstFP(VZero, I, ResType, TII, ZeroAsNull); } +Register SPIRVInstructionSelector::buildOnesValF(const SPIRVType *ResType, + MachineInstr &I) const { + // OpenCL uses nulls for Zero. In HLSL we don't use null constants. + bool ZeroAsNull = STI.isOpenCLEnv(); + APFloat VOne = getOneFP(GR.getTypeForSPIRVType(ResType)); + if (ResType->getOpcode() == SPIRV::OpTypeVector) + return GR.getOrCreateConstVector(VOne, I, ResType, TII, ZeroAsNull); + return GR.getOrCreateConstFP(VOne, I, ResType, TII, ZeroAsNull); +} + Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, const SPIRVType *ResType, MachineInstr &I) const { @@ -2181,6 +2231,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, Size = 0; BuildMI(BB, I, I.getDebugLoc(), TII.get(Op)).addUse(PtrReg).addImm(Size); } break; + case Intrinsic::spv_saturate: + return selectSaturate(ResVReg, ResType, I); default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll new file mode 100644 index 00000000000000..0c96249ccea435 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/saturate.ll @@ -0,0 +1,276 @@ +; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxilv1.6-unknown-shadermodel6.6-library" + +; CHECK-LABEL: test_saturate_half +define noundef half @test_saturate_half(half noundef %p0) #0 { +entry: + %p0.addr = alloca half, align 2 + store half %p0, ptr %p0.addr, align 2, !tbaa !4 + %0 = load half, ptr %p0.addr, align 2, !tbaa !4 + ; CHECK: %1 = call half @dx.op.unary.f16(i32 7, half %0) + %hlsl.saturate = call half @llvm.dx.saturate.f16(half %0) + ; CHECK: ret half %1 + ret half %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare half @llvm.dx.saturate.f16(half) #1 + +; CHECK-LABEL: test_saturate_half2 +define noundef <2 x half> @test_saturate_half2(<2 x half> noundef %p0) #0 { +entry: + %p0.addr = alloca <2 x half>, align 4 + store <2 x half> %p0, ptr %p0.addr, align 4, !tbaa !8 + %0 = load <2 x half>, ptr %p0.addr, align 4, !tbaa !8 + ; CHECK: %1 = extractelement <2 x half> %0, i64 0 + ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1) + ; CHECK-NEXT: %3 = insertelement <2 x half> %0, half %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <2 x half> %0, i64 1 + ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4) + ; CHECK-NEXT: %6 = insertelement <2 x half> %0, half %5, i64 1 + %hlsl.saturate = call <2 x half> @llvm.dx.saturate.v2f16(<2 x half> %0) + ; CHECK: ret <2 x half> %6 + ret <2 x half> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <2 x half> @llvm.dx.saturate.v2f16(<2 x half>) #1 + +; CHECK-LABEL: test_saturate_half3 +define noundef <3 x half> @test_saturate_half3(<3 x half> noundef %p0) #0 { +entry: + %p0.addr = alloca <3 x half>, align 8 + store <3 x half> %p0, ptr %p0.addr, align 8, !tbaa !8 + %0 = load <3 x half>, ptr %p0.addr, align 8, !tbaa !8 + ; CHECK: %1 = extractelement <3 x half> %0, i64 0 + ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1) + ; CHECK-NEXT: %3 = insertelement <3 x half> %0, half %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <3 x half> %0, i64 1 + ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4) + ; CHECK-NEXT: %6 = insertelement <3 x half> %0, half %5, i64 1 + ; CHECK-NEXT: %7 = extractelement <3 x half> %0, i64 2 + ; CHECK-NEXT: %8 = call half @dx.op.unary.f16(i32 7, half %7) + ; CHECK-NEXT: %9 = insertelement <3 x half> %0, half %8, i64 2 + %hlsl.saturate = call <3 x half> @llvm.dx.saturate.v3f16(<3 x half> %0) + ; CHECK: ret <3 x half> %9 + ret <3 x half> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <3 x half> @llvm.dx.saturate.v3f16(<3 x half>) #1 + +; CHECK-LABEL: test_saturate_half4 +define noundef <4 x half> @test_saturate_half4(<4 x half> noundef %p0) #0 { +entry: + %p0.addr = alloca <4 x half>, align 8 + store <4 x half> %p0, ptr %p0.addr, align 8, !tbaa !8 + %0 = load <4 x half>, ptr %p0.addr, align 8, !tbaa !8 + ; CHECK: %1 = extractelement <4 x half> %0, i64 0 + ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1) + ; CHECK-NEXT: %3 = insertelement <4 x half> %0, half %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <4 x half> %0, i64 1 + ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4) + ; CHECK-NEXT: %6 = insertelement <4 x half> %0, half %5, i64 1 + ; CHECK-NEXT: %7 = extractelement <4 x half> %0, i64 2 + ; CHECK-NEXT: %8 = call half @dx.op.unary.f16(i32 7, half %7) + ; CHECK-NEXT: %9 = insertelement <4 x half> %0, half %8, i64 2 + ; CHECK-NEXT: %10 = extractelement <4 x half> %0, i64 3 + ; CHECK-NEXT: %11 = call half @dx.op.unary.f16(i32 7, half %10) + ; CHECK-NEXT: %12 = insertelement <4 x half> %0, half %11, i64 3 + %hlsl.saturate = call <4 x half> @llvm.dx.saturate.v4f16(<4 x half> %0) + ; CHECK: ret <4 x half> %12 + ret <4 x half> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x half> @llvm.dx.saturate.v4f16(<4 x half>) #1 + +; CHECK-LABEL: test_saturate_float +define noundef float @test_saturate_float(float noundef %p0) #0 { +entry: + %p0.addr = alloca float, align 4 + store float %p0, ptr %p0.addr, align 4, !tbaa !9 + %0 = load float, ptr %p0.addr, align 4, !tbaa !9 + ; CHECK: %1 = call float @dx.op.unary.f32(i32 7, float %0) + %hlsl.saturate = call float @llvm.dx.saturate.f32(float %0) + ; CHECK: ret float %1 + ret float %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare float @llvm.dx.saturate.f32(float) #1 + +; CHECK-LABEL: test_saturate_float2 +define noundef <2 x float> @test_saturate_float2(<2 x float> noundef %p0) #0 { +entry: + %p0.addr = alloca <2 x float>, align 8 + store <2 x float> %p0, ptr %p0.addr, align 8, !tbaa !8 + %0 = load <2 x float>, ptr %p0.addr, align 8, !tbaa !8 + ; CHECK: %1 = extractelement <2 x float> %0, i64 0 + ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1) + ; CHECK-NEXT: %3 = insertelement <2 x float> %0, float %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <2 x float> %0, i64 1 + ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4) + ; CHECK-NEXT: %6 = insertelement <2 x float> %0, float %5, i64 1 + %hlsl.saturate = call <2 x float> @llvm.dx.saturate.v2f32(<2 x float> %0) + ; CHECK: ret <2 x float> %6 + ret <2 x float> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <2 x float> @llvm.dx.saturate.v2f32(<2 x float>) #1 + +; CHECK-LABEL: test_saturate_float3 +define noundef <3 x float> @test_saturate_float3(<3 x float> noundef %p0) #0 { +entry: + %p0.addr = alloca <3 x float>, align 16 + store <3 x float> %p0, ptr %p0.addr, align 16, !tbaa !8 + %0 = load <3 x float>, ptr %p0.addr, align 16, !tbaa !8 + ; CHECK: %1 = extractelement <3 x float> %0, i64 0 + ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1) + ; CHECK-NEXT: %3 = insertelement <3 x float> %0, float %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <3 x float> %0, i64 1 + ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4) + ; CHECK-NEXT: %6 = insertelement <3 x float> %0, float %5, i64 1 + ; CHECK-NEXT: %7 = extractelement <3 x float> %0, i64 2 + ; CHECK-NEXT: %8 = call float @dx.op.unary.f32(i32 7, float %7) + ; CHECK-NEXT: %9 = insertelement <3 x float> %0, float %8, i64 2 + %hlsl.saturate = call <3 x float> @llvm.dx.saturate.v3f32(<3 x float> %0) + ; CHECK: ret <3 x float> %9 + ret <3 x float> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <3 x float> @llvm.dx.saturate.v3f32(<3 x float>) #1 + +; CHECK-LABEL: test_saturate_float4 +define noundef <4 x float> @test_saturate_float4(<4 x float> noundef %p0) #0 { +entry: + %p0.addr = alloca <4 x float>, align 16 + store <4 x float> %p0, ptr %p0.addr, align 16, !tbaa !8 + %0 = load <4 x float>, ptr %p0.addr, align 16, !tbaa !8 + ; CHECK: %1 = extractelement <4 x float> %0, i64 0 + ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1) + ; CHECK-NEXT: %3 = insertelement <4 x float> %0, float %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <4 x float> %0, i64 1 + ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4) + ; CHECK-NEXT: %6 = insertelement <4 x float> %0, float %5, i64 1 + ; CHECK-NEXT: %7 = extractelement <4 x float> %0, i64 2 + ; CHECK-NEXT: %8 = call float @dx.op.unary.f32(i32 7, float %7) + ; CHECK-NEXT: %9 = insertelement <4 x float> %0, float %8, i64 2 + ; CHECK-NEXT: %10 = extractelement <4 x float> %0, i64 3 + ; CHECK-NEXT: %11 = call float @dx.op.unary.f32(i32 7, float %10) + ; CHECK-NEXT: %12 = insertelement <4 x float> %0, float %11, i64 3 + %hlsl.saturate = call <4 x float> @llvm.dx.saturate.v4f32(<4 x float> %0) + ; CHECK: ret <4 x float> %12 + ret <4 x float> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x float> @llvm.dx.saturate.v4f32(<4 x float>) #1 + +; CHECK-LABEL: test_saturate_double +define noundef double @test_saturate_double(double noundef %p0) #0 { +entry: + %p0.addr = alloca double, align 8 + store double %p0, ptr %p0.addr, align 8, !tbaa !11 + %0 = load double, ptr %p0.addr, align 8, !tbaa !11 + ; CHECK: %1 = call double @dx.op.unary.f64(i32 7, double %0) + %hlsl.saturate = call double @llvm.dx.saturate.f64(double %0) + ; CHECK: ret double %1 + ret double %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare double @llvm.dx.saturate.f64(double) #1 + +; CHECK-LABEL: test_saturate_double2 +define noundef <2 x double> @test_saturate_double2(<2 x double> noundef %p0) #0 { +entry: + %p0.addr = alloca <2 x double>, align 16 + store <2 x double> %p0, ptr %p0.addr, align 16, !tbaa !8 + %0 = load <2 x double>, ptr %p0.addr, align 16, !tbaa !8 + ; CHECK: %1 = extractelement <2 x double> %0, i64 0 + ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1) + ; CHECK-NEXT: %3 = insertelement <2 x double> %0, double %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <2 x double> %0, i64 1 + ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4) + ; CHECK-NEXT: %6 = insertelement <2 x double> %0, double %5, i64 1 + %hlsl.saturate = call <2 x double> @llvm.dx.saturate.v2f64(<2 x double> %0) + ; CHECK: ret <2 x double> %6 + ret <2 x double> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <2 x double> @llvm.dx.saturate.v2f64(<2 x double>) #1 + +; CHECK-LABEL: test_saturate_double3 +define noundef <3 x double> @test_saturate_double3(<3 x double> noundef %p0) #0 { +entry: + %p0.addr = alloca <3 x double>, align 32 + store <3 x double> %p0, ptr %p0.addr, align 32, !tbaa !8 + %0 = load <3 x double>, ptr %p0.addr, align 32, !tbaa !8 + ; CHECK: %1 = extractelement <3 x double> %0, i64 0 + ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1) + ; CHECK-NEXT: %3 = insertelement <3 x double> %0, double %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <3 x double> %0, i64 1 + ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4) + ; CHECK-NEXT: %6 = insertelement <3 x double> %0, double %5, i64 1 + ; CHECK-NEXT: %7 = extractelement <3 x double> %0, i64 2 + ; CHECK-NEXT: %8 = call double @dx.op.unary.f64(i32 7, double %7) + ; CHECK-NEXT: %9 = insertelement <3 x double> %0, double %8, i64 2 + %hlsl.saturate = call <3 x double> @llvm.dx.saturate.v3f64(<3 x double> %0) + ; CHECK: ret <3 x double> %9 + ret <3 x double> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <3 x double> @llvm.dx.saturate.v3f64(<3 x double>) #1 + +; CHECK-LABEL: test_saturate_double4 +define noundef <4 x double> @test_saturate_double4(<4 x double> noundef %p0) #0 { +entry: + %p0.addr = alloca <4 x double>, align 32 + store <4 x double> %p0, ptr %p0.addr, align 32, !tbaa !8 + %0 = load <4 x double>, ptr %p0.addr, align 32, !tbaa !8 + ; CHECK: %1 = extractelement <4 x double> %0, i64 0 + ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1) + ; CHECK-NEXT: %3 = insertelement <4 x double> %0, double %2, i64 0 + ; CHECK-NEXT: %4 = extractelement <4 x double> %0, i64 1 + ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4) + ; CHECK-NEXT: %6 = insertelement <4 x double> %0, double %5, i64 1 + ; CHECK-NEXT: %7 = extractelement <4 x double> %0, i64 2 + ; CHECK-NEXT: %8 = call double @dx.op.unary.f64(i32 7, double %7) + ; CHECK-NEXT: %9 = insertelement <4 x double> %0, double %8, i64 2 + ; CHECK-NEXT: %10 = extractelement <4 x double> %0, i64 3 + ; CHECK-NEXT: %11 = call double @dx.op.unary.f64(i32 7, double %10) + ; CHECK-NEXT: %12 = insertelement <4 x double> %0, double %11, i64 3 + %hlsl.saturate = call <4 x double> @llvm.dx.saturate.v4f64(<4 x double> %0) + ; CHECK: ret <4 x double> %12 + ret <4 x double> %hlsl.saturate +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare <4 x double> @llvm.dx.saturate.v4f64(<4 x double>) #1 + +attributes #0 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nosync nounwind willreturn } + +!llvm.module.flags = !{!0, !1} +!dx.valver = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 7} +!4 = !{!5, !5, i64 0} +!5 = !{!"half", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C++ TBAA"} +!8 = !{!6, !6, i64 0} +!9 = !{!10, !10, i64 0} +!10 = !{!"float", !6, i64 0} +!11 = !{!12, !12, i64 0} +!12 = !{!"double", !6, i64 0} diff --git a/llvm/test/CodeGen/DirectX/saturate_errors.ll b/llvm/test/CodeGen/DirectX/saturate_errors.ll new file mode 100644 index 00000000000000..940843f5e58475 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/saturate_errors.ll @@ -0,0 +1,14 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s +; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxilv1.6-unknown-shadermodel6.6-library" + +; DXIL operation saturate does not support i32 overload +; CHECK: invalid intrinsic signature + +define noundef i32 @test_saturate_i32(i32 noundef %p0) #0 { +entry: + %hlsl.saturate = call i32 @llvm.dx.saturate.i32(i32 %p0) + ret i32 %hlsl.saturate +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll new file mode 100644 index 00000000000000..0b05b615c4ad17 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll @@ -0,0 +1,83 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for saturate are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4 +; CHECK-DAG: %[[#zero_float_16:]] = OpConstant %[[#float_16]] 0 +; CHECK-DAG: %[[#vec4_zero_float_16:]] = OpConstantComposite %[[#vec4_float_16]] %[[#zero_float_16]] %[[#zero_float_16]] %[[#zero_float_16]] +; CHECK-DAG: %[[#one_float_16:]] = OpConstant %[[#float_16]] 15360 +; CHECK-DAG: %[[#vec4_one_float_16:]] = OpConstantComposite %[[#vec4_float_16]] %[[#one_float_16]] %[[#one_float_16]] %[[#one_float_16]] +; CHECK-DAG: %[[#zero_float_32:]] = OpConstant %[[#float_32]] 0 +; CHECK-DAG: %[[#vec4_zero_float_32:]] = OpConstantComposite %[[#vec4_float_32]] %[[#zero_float_32]] %[[#zero_float_32]] %[[#zero_float_32]] +; CHECK-DAG: %[[#one_float_32:]] = OpConstant %[[#float_32]] 1 +; CHECK-DAG: %[[#vec4_one_float_32:]] = OpConstantComposite %[[#vec4_float_32]] %[[#one_float_32]] %[[#one_float_32]] %[[#one_float_32]] + +; CHECK-DAG: %[[#zero_float_64:]] = OpConstant %[[#float_64]] 0 +; CHECK-DAG: %[[#vec4_zero_float_64:]] = OpConstantComposite %[[#vec4_float_64]] %[[#zero_float_64]] %[[#zero_float_64]] %[[#zero_float_64]] +; CHECK-DAG: %[[#one_float_64:]] = OpConstant %[[#float_64]] 1 +; CHECK-DAG: %[[#vec4_one_float_64:]] = OpConstantComposite %[[#vec4_float_64]] %[[#one_float_64]] %[[#one_float_64]] %[[#one_float_64]] + +define noundef half @saturate_half(half noundef %a) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#zero_float_16]] %[[#one_float_16]] + %hlsl.saturate = call half @llvm.spv.saturate.f16(half %a) + ret half %hlsl.saturate +} + +define noundef float @saturate_float(float noundef %a) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#zero_float_32]] %[[#one_float_32]] + %hlsl.saturate = call float @llvm.spv.saturate.f32(float %a) + ret float %hlsl.saturate +} + +define noundef double @saturate_double(double noundef %a) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_64]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_64]] + ; CHECK: %[[#]] = OpExtInst %[[#float_64]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#zero_float_64]] %[[#one_float_64]] + %hlsl.saturate = call double @llvm.spv.saturate.f64(double %a) + ret double %hlsl.saturate +} + +define noundef <4 x half> @saturate_half4(<4 x half> noundef %a) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#vec4_zero_float_16]] %[[#vec4_one_float_16]] + %hlsl.saturate = call <4 x half> @llvm.spv.saturate.v4f16(<4 x half> %a) + ret <4 x half> %hlsl.saturate +} + +define noundef <4 x float> @saturate_float4(<4 x float> noundef %a) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#vec4_zero_float_32]] %[[#vec4_one_float_32]] + %hlsl.saturate = call <4 x float> @llvm.spv.saturate.v4f32(<4 x float> %a) + ret <4 x float> %hlsl.saturate +} + +define noundef <4 x double> @saturate_double4(<4 x double> noundef %a) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_64]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_64]] + ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_64]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#vec4_zero_float_64]] %[[#vec4_one_float_64]] + %hlsl.saturate = call <4 x double> @llvm.spv.saturate.v4f64(<4 x double> %a) + ret <4 x double> %hlsl.saturate +} + +declare <4 x half> @llvm.spv.saturate.v4f16(<4 x half>) +declare <4 x float> @llvm.spv.saturate.v4f32(<4 x float>) +declare <4 x double> @llvm.spv.saturate.v4f64(<4 x double>) >From 80824822c9852d57a137cea85082c1b464f40ec9 Mon Sep 17 00:00:00 2001 From: Bharadwaj Yadavalli <bharadwaj.yadava...@microsoft.com> Date: Fri, 16 Aug 2024 17:44:53 -0400 Subject: [PATCH 2/3] Add test to verify generation of llvm.spv.saturate.* instructions Change type check to assert. --- clang/lib/CodeGen/CGBuiltin.cpp | 4 +- clang/test/CodeGenHLSL/builtins/saturate.hlsl | 41 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f4353f595efbac..495fb3e1e5b697 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18669,8 +18669,8 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { } case Builtin::BI__builtin_hlsl_elementwise_saturate: { Value *Op0 = EmitScalarExpr(E->getArg(0)); - if (!E->getArg(0)->getType()->hasFloatingRepresentation()) - llvm_unreachable("saturate operand must have a float representation"); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "saturate operand must have a float representation"); return Builder.CreateIntrinsic( /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0}, diff --git a/clang/test/CodeGenHLSL/builtins/saturate.hlsl b/clang/test/CodeGenHLSL/builtins/saturate.hlsl index 970d7b7371b1eb..65a3cd74621cc0 100644 --- a/clang/test/CodeGenHLSL/builtins/saturate.hlsl +++ b/clang/test/CodeGenHLSL/builtins/saturate.hlsl @@ -6,49 +6,90 @@ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=SPIRV,SPIRV_HALF +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=SPIRV,SPIRV_NO_HALF + // NATIVE_HALF: define noundef half @ // NATIVE_HALF: call half @llvm.dx.saturate.f16( // NO_HALF: define noundef float @"?test_saturate_half // NO_HALF: call float @llvm.dx.saturate.f32( +// SPIRV_HALF: define spir_func noundef half @_Z18test_saturate_halfDh(half +// SPIRV_HALF: call half @llvm.spv.saturate.f16(half +// SPIRV_NO_HALF: define spir_func noundef float @_Z18test_saturate_halfDh(float +// SPIRV_NO_HALF: call float @llvm.spv.saturate.f32(float half test_saturate_half(half p0) { return saturate(p0); } // NATIVE_HALF: define noundef <2 x half> @ // NATIVE_HALF: call <2 x half> @llvm.dx.saturate.v2f16 // NO_HALF: define noundef <2 x float> @"?test_saturate_half2 // NO_HALF: call <2 x float> @llvm.dx.saturate.v2f32( +// SPIRV_HALF: define spir_func noundef <2 x half> @_Z19test_saturate_half2Dv2_Dh( +// SPIRV_HALF: call <2 x half> @llvm.spv.saturate.v2f16(<2 x half> +// SPIRV_NO_HALF: define spir_func noundef <2 x float> @_Z19test_saturate_half2Dv2_Dh(<2 x float> +// SPIRV_NO_HALF: call <2 x float> @llvm.spv.saturate.v2f32(<2 x float> half2 test_saturate_half2(half2 p0) { return saturate(p0); } // NATIVE_HALF: define noundef <3 x half> @ // NATIVE_HALF: call <3 x half> @llvm.dx.saturate.v3f16 // NO_HALF: define noundef <3 x float> @"?test_saturate_half3 // NO_HALF: call <3 x float> @llvm.dx.saturate.v3f32( +// SPIRV_HALF: define spir_func noundef <3 x half> @_Z19test_saturate_half3Dv3_Dh( +// SPIRV_HALF: call <3 x half> @llvm.spv.saturate.v3f16(<3 x half> +// SPIRV_NO_HALF: define spir_func noundef <3 x float> @_Z19test_saturate_half3Dv3_Dh(<3 x float> +// SPIRV_NO_HALF: call <3 x float> @llvm.spv.saturate.v3f32(<3 x float> half3 test_saturate_half3(half3 p0) { return saturate(p0); } // NATIVE_HALF: define noundef <4 x half> @ // NATIVE_HALF: call <4 x half> @llvm.dx.saturate.v4f16 // NO_HALF: define noundef <4 x float> @"?test_saturate_half4 // NO_HALF: call <4 x float> @llvm.dx.saturate.v4f32( +// SPIRV_HALF: define spir_func noundef <4 x half> @_Z19test_saturate_half4Dv4_Dh( +// SPIRV_HALF: call <4 x half> @llvm.spv.saturate.v4f16(<4 x half> +// SPIRV_NO_HALF: define spir_func noundef <4 x float> @_Z19test_saturate_half4Dv4_Dh(<4 x float> +// SPIRV_NO_HALF: call <4 x float> @llvm.spv.saturate.v4f32(<4 x float> half4 test_saturate_half4(half4 p0) { return saturate(p0); } // CHECK: define noundef float @"?test_saturate_float // CHECK: call float @llvm.dx.saturate.f32( +// SPIRV: define spir_func noundef float @_Z19test_saturate_floatf(float +// SPIRV: call float @llvm.spv.saturate.f32(float float test_saturate_float(float p0) { return saturate(p0); } // CHECK: define noundef <2 x float> @"?test_saturate_float2 // CHECK: call <2 x float> @llvm.dx.saturate.v2f32 +// SPIRV: define spir_func noundef <2 x float> @_Z20test_saturate_float2Dv2_f(<2 x float> +// SPIRV: call <2 x float> @llvm.spv.saturate.v2f32(<2 x float> float2 test_saturate_float2(float2 p0) { return saturate(p0); } // CHECK: define noundef <3 x float> @"?test_saturate_float3 // CHECK: call <3 x float> @llvm.dx.saturate.v3f32 +// SPIRV: define spir_func noundef <3 x float> @_Z20test_saturate_float3Dv3_f(<3 x float> +// SPIRV: call <3 x float> @llvm.spv.saturate.v3f32(<3 x float> float3 test_saturate_float3(float3 p0) { return saturate(p0); } // CHECK: define noundef <4 x float> @"?test_saturate_float4 // CHECK: call <4 x float> @llvm.dx.saturate.v4f32 +// SPIRV: define spir_func noundef <4 x float> @_Z20test_saturate_float4Dv4_f(<4 x float> +// SPIRV: call <4 x float> @llvm.spv.saturate.v4f32(<4 x float> float4 test_saturate_float4(float4 p0) { return saturate(p0); } // CHECK: define noundef double @ // CHECK: call double @llvm.dx.saturate.f64( +// SPIRV: define spir_func noundef double @_Z20test_saturate_doubled(double +// SPIRV: call double @llvm.spv.saturate.f64(double double test_saturate_double(double p0) { return saturate(p0); } // CHECK: define noundef <2 x double> @ // CHECK: call <2 x double> @llvm.dx.saturate.v2f64 +// SPIRV: define spir_func noundef <2 x double> @_Z21test_saturate_double2Dv2_d(<2 x double> +// SPIRV: call <2 x double> @llvm.spv.saturate.v2f64(<2 x double> double2 test_saturate_double2(double2 p0) { return saturate(p0); } // CHECK: define noundef <3 x double> @ // CHECK: call <3 x double> @llvm.dx.saturate.v3f64 +// SPIRV: define spir_func noundef <3 x double> @_Z21test_saturate_double3Dv3_d(<3 x double> +// SPIRV: call <3 x double> @llvm.spv.saturate.v3f64(<3 x double> double3 test_saturate_double3(double3 p0) { return saturate(p0); } // CHECK: define noundef <4 x double> @ // CHECK: call <4 x double> @llvm.dx.saturate.v4f64 +// SPIRV: define spir_func noundef <4 x double> @_Z21test_saturate_double4Dv4_d(<4 x double> +// SPIRV: call <4 x double> @llvm.spv.saturate.v4f64(<4 x double> double4 test_saturate_double4(double4 p0) { return saturate(p0); } >From 6fce77ab1180354195e4a74a8c9252567dc9d039 Mon Sep 17 00:00:00 2001 From: Bharadwaj Yadavalli <bharadwaj.yadava...@microsoft.com> Date: Mon, 19 Aug 2024 11:41:50 -0400 Subject: [PATCH 3/3] Drop vector operation expansion of saturate as it is expected to be handled in a scalarization pass planned to be implemented in the future. Remove saturate tests with vector operands. --- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 1 - llvm/test/CodeGen/DirectX/saturate.ll | 207 ------------------ 2 files changed, 208 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 9b467dff3e126f..f18adbccac95ee 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -47,7 +47,6 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_normalize: case Intrinsic::dx_sdot: case Intrinsic::dx_udot: - case Intrinsic::dx_saturate: return true; } return false; diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll index 0c96249ccea435..bc1945836f6a4b 100644 --- a/llvm/test/CodeGen/DirectX/saturate.ll +++ b/llvm/test/CodeGen/DirectX/saturate.ll @@ -19,75 +19,6 @@ entry: ; Function Attrs: nocallback nofree nosync nounwind willreturn declare half @llvm.dx.saturate.f16(half) #1 -; CHECK-LABEL: test_saturate_half2 -define noundef <2 x half> @test_saturate_half2(<2 x half> noundef %p0) #0 { -entry: - %p0.addr = alloca <2 x half>, align 4 - store <2 x half> %p0, ptr %p0.addr, align 4, !tbaa !8 - %0 = load <2 x half>, ptr %p0.addr, align 4, !tbaa !8 - ; CHECK: %1 = extractelement <2 x half> %0, i64 0 - ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1) - ; CHECK-NEXT: %3 = insertelement <2 x half> %0, half %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <2 x half> %0, i64 1 - ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4) - ; CHECK-NEXT: %6 = insertelement <2 x half> %0, half %5, i64 1 - %hlsl.saturate = call <2 x half> @llvm.dx.saturate.v2f16(<2 x half> %0) - ; CHECK: ret <2 x half> %6 - ret <2 x half> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <2 x half> @llvm.dx.saturate.v2f16(<2 x half>) #1 - -; CHECK-LABEL: test_saturate_half3 -define noundef <3 x half> @test_saturate_half3(<3 x half> noundef %p0) #0 { -entry: - %p0.addr = alloca <3 x half>, align 8 - store <3 x half> %p0, ptr %p0.addr, align 8, !tbaa !8 - %0 = load <3 x half>, ptr %p0.addr, align 8, !tbaa !8 - ; CHECK: %1 = extractelement <3 x half> %0, i64 0 - ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1) - ; CHECK-NEXT: %3 = insertelement <3 x half> %0, half %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <3 x half> %0, i64 1 - ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4) - ; CHECK-NEXT: %6 = insertelement <3 x half> %0, half %5, i64 1 - ; CHECK-NEXT: %7 = extractelement <3 x half> %0, i64 2 - ; CHECK-NEXT: %8 = call half @dx.op.unary.f16(i32 7, half %7) - ; CHECK-NEXT: %9 = insertelement <3 x half> %0, half %8, i64 2 - %hlsl.saturate = call <3 x half> @llvm.dx.saturate.v3f16(<3 x half> %0) - ; CHECK: ret <3 x half> %9 - ret <3 x half> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <3 x half> @llvm.dx.saturate.v3f16(<3 x half>) #1 - -; CHECK-LABEL: test_saturate_half4 -define noundef <4 x half> @test_saturate_half4(<4 x half> noundef %p0) #0 { -entry: - %p0.addr = alloca <4 x half>, align 8 - store <4 x half> %p0, ptr %p0.addr, align 8, !tbaa !8 - %0 = load <4 x half>, ptr %p0.addr, align 8, !tbaa !8 - ; CHECK: %1 = extractelement <4 x half> %0, i64 0 - ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1) - ; CHECK-NEXT: %3 = insertelement <4 x half> %0, half %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <4 x half> %0, i64 1 - ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4) - ; CHECK-NEXT: %6 = insertelement <4 x half> %0, half %5, i64 1 - ; CHECK-NEXT: %7 = extractelement <4 x half> %0, i64 2 - ; CHECK-NEXT: %8 = call half @dx.op.unary.f16(i32 7, half %7) - ; CHECK-NEXT: %9 = insertelement <4 x half> %0, half %8, i64 2 - ; CHECK-NEXT: %10 = extractelement <4 x half> %0, i64 3 - ; CHECK-NEXT: %11 = call half @dx.op.unary.f16(i32 7, half %10) - ; CHECK-NEXT: %12 = insertelement <4 x half> %0, half %11, i64 3 - %hlsl.saturate = call <4 x half> @llvm.dx.saturate.v4f16(<4 x half> %0) - ; CHECK: ret <4 x half> %12 - ret <4 x half> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <4 x half> @llvm.dx.saturate.v4f16(<4 x half>) #1 - ; CHECK-LABEL: test_saturate_float define noundef float @test_saturate_float(float noundef %p0) #0 { entry: @@ -103,75 +34,6 @@ entry: ; Function Attrs: nocallback nofree nosync nounwind willreturn declare float @llvm.dx.saturate.f32(float) #1 -; CHECK-LABEL: test_saturate_float2 -define noundef <2 x float> @test_saturate_float2(<2 x float> noundef %p0) #0 { -entry: - %p0.addr = alloca <2 x float>, align 8 - store <2 x float> %p0, ptr %p0.addr, align 8, !tbaa !8 - %0 = load <2 x float>, ptr %p0.addr, align 8, !tbaa !8 - ; CHECK: %1 = extractelement <2 x float> %0, i64 0 - ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1) - ; CHECK-NEXT: %3 = insertelement <2 x float> %0, float %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <2 x float> %0, i64 1 - ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4) - ; CHECK-NEXT: %6 = insertelement <2 x float> %0, float %5, i64 1 - %hlsl.saturate = call <2 x float> @llvm.dx.saturate.v2f32(<2 x float> %0) - ; CHECK: ret <2 x float> %6 - ret <2 x float> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <2 x float> @llvm.dx.saturate.v2f32(<2 x float>) #1 - -; CHECK-LABEL: test_saturate_float3 -define noundef <3 x float> @test_saturate_float3(<3 x float> noundef %p0) #0 { -entry: - %p0.addr = alloca <3 x float>, align 16 - store <3 x float> %p0, ptr %p0.addr, align 16, !tbaa !8 - %0 = load <3 x float>, ptr %p0.addr, align 16, !tbaa !8 - ; CHECK: %1 = extractelement <3 x float> %0, i64 0 - ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1) - ; CHECK-NEXT: %3 = insertelement <3 x float> %0, float %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <3 x float> %0, i64 1 - ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4) - ; CHECK-NEXT: %6 = insertelement <3 x float> %0, float %5, i64 1 - ; CHECK-NEXT: %7 = extractelement <3 x float> %0, i64 2 - ; CHECK-NEXT: %8 = call float @dx.op.unary.f32(i32 7, float %7) - ; CHECK-NEXT: %9 = insertelement <3 x float> %0, float %8, i64 2 - %hlsl.saturate = call <3 x float> @llvm.dx.saturate.v3f32(<3 x float> %0) - ; CHECK: ret <3 x float> %9 - ret <3 x float> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <3 x float> @llvm.dx.saturate.v3f32(<3 x float>) #1 - -; CHECK-LABEL: test_saturate_float4 -define noundef <4 x float> @test_saturate_float4(<4 x float> noundef %p0) #0 { -entry: - %p0.addr = alloca <4 x float>, align 16 - store <4 x float> %p0, ptr %p0.addr, align 16, !tbaa !8 - %0 = load <4 x float>, ptr %p0.addr, align 16, !tbaa !8 - ; CHECK: %1 = extractelement <4 x float> %0, i64 0 - ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1) - ; CHECK-NEXT: %3 = insertelement <4 x float> %0, float %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <4 x float> %0, i64 1 - ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4) - ; CHECK-NEXT: %6 = insertelement <4 x float> %0, float %5, i64 1 - ; CHECK-NEXT: %7 = extractelement <4 x float> %0, i64 2 - ; CHECK-NEXT: %8 = call float @dx.op.unary.f32(i32 7, float %7) - ; CHECK-NEXT: %9 = insertelement <4 x float> %0, float %8, i64 2 - ; CHECK-NEXT: %10 = extractelement <4 x float> %0, i64 3 - ; CHECK-NEXT: %11 = call float @dx.op.unary.f32(i32 7, float %10) - ; CHECK-NEXT: %12 = insertelement <4 x float> %0, float %11, i64 3 - %hlsl.saturate = call <4 x float> @llvm.dx.saturate.v4f32(<4 x float> %0) - ; CHECK: ret <4 x float> %12 - ret <4 x float> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <4 x float> @llvm.dx.saturate.v4f32(<4 x float>) #1 - ; CHECK-LABEL: test_saturate_double define noundef double @test_saturate_double(double noundef %p0) #0 { entry: @@ -187,75 +49,6 @@ entry: ; Function Attrs: nocallback nofree nosync nounwind willreturn declare double @llvm.dx.saturate.f64(double) #1 -; CHECK-LABEL: test_saturate_double2 -define noundef <2 x double> @test_saturate_double2(<2 x double> noundef %p0) #0 { -entry: - %p0.addr = alloca <2 x double>, align 16 - store <2 x double> %p0, ptr %p0.addr, align 16, !tbaa !8 - %0 = load <2 x double>, ptr %p0.addr, align 16, !tbaa !8 - ; CHECK: %1 = extractelement <2 x double> %0, i64 0 - ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1) - ; CHECK-NEXT: %3 = insertelement <2 x double> %0, double %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <2 x double> %0, i64 1 - ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4) - ; CHECK-NEXT: %6 = insertelement <2 x double> %0, double %5, i64 1 - %hlsl.saturate = call <2 x double> @llvm.dx.saturate.v2f64(<2 x double> %0) - ; CHECK: ret <2 x double> %6 - ret <2 x double> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <2 x double> @llvm.dx.saturate.v2f64(<2 x double>) #1 - -; CHECK-LABEL: test_saturate_double3 -define noundef <3 x double> @test_saturate_double3(<3 x double> noundef %p0) #0 { -entry: - %p0.addr = alloca <3 x double>, align 32 - store <3 x double> %p0, ptr %p0.addr, align 32, !tbaa !8 - %0 = load <3 x double>, ptr %p0.addr, align 32, !tbaa !8 - ; CHECK: %1 = extractelement <3 x double> %0, i64 0 - ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1) - ; CHECK-NEXT: %3 = insertelement <3 x double> %0, double %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <3 x double> %0, i64 1 - ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4) - ; CHECK-NEXT: %6 = insertelement <3 x double> %0, double %5, i64 1 - ; CHECK-NEXT: %7 = extractelement <3 x double> %0, i64 2 - ; CHECK-NEXT: %8 = call double @dx.op.unary.f64(i32 7, double %7) - ; CHECK-NEXT: %9 = insertelement <3 x double> %0, double %8, i64 2 - %hlsl.saturate = call <3 x double> @llvm.dx.saturate.v3f64(<3 x double> %0) - ; CHECK: ret <3 x double> %9 - ret <3 x double> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <3 x double> @llvm.dx.saturate.v3f64(<3 x double>) #1 - -; CHECK-LABEL: test_saturate_double4 -define noundef <4 x double> @test_saturate_double4(<4 x double> noundef %p0) #0 { -entry: - %p0.addr = alloca <4 x double>, align 32 - store <4 x double> %p0, ptr %p0.addr, align 32, !tbaa !8 - %0 = load <4 x double>, ptr %p0.addr, align 32, !tbaa !8 - ; CHECK: %1 = extractelement <4 x double> %0, i64 0 - ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1) - ; CHECK-NEXT: %3 = insertelement <4 x double> %0, double %2, i64 0 - ; CHECK-NEXT: %4 = extractelement <4 x double> %0, i64 1 - ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4) - ; CHECK-NEXT: %6 = insertelement <4 x double> %0, double %5, i64 1 - ; CHECK-NEXT: %7 = extractelement <4 x double> %0, i64 2 - ; CHECK-NEXT: %8 = call double @dx.op.unary.f64(i32 7, double %7) - ; CHECK-NEXT: %9 = insertelement <4 x double> %0, double %8, i64 2 - ; CHECK-NEXT: %10 = extractelement <4 x double> %0, i64 3 - ; CHECK-NEXT: %11 = call double @dx.op.unary.f64(i32 7, double %10) - ; CHECK-NEXT: %12 = insertelement <4 x double> %0, double %11, i64 3 - %hlsl.saturate = call <4 x double> @llvm.dx.saturate.v4f64(<4 x double> %0) - ; CHECK: ret <4 x double> %12 - ret <4 x double> %hlsl.saturate -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare <4 x double> @llvm.dx.saturate.v4f64(<4 x double>) #1 - attributes #0 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } attributes #1 = { nocallback nofree nosync nounwind willreturn } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits