llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-hlsl Author: Ashley Coleman (V-FEXrt) <details> <summary>Changes</summary> Closes https://github.com/llvm/llvm-project/issues/99116 - [x] Implement firstbitlow clang builtin, - [x] Link firstbitlow clang builtin with hlsl_intrinsics.h - [x] Add sema checks for firstbitlow to CheckHLSLBuiltinFunctionCall in SemaChecking.cpp - [x] Add codegen for firstbitlow to EmitHLSLBuiltinExpr in CGBuiltin.cpp - [x] Add codegen tests to clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl - [x] Add sema tests to clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl - [x] Create the int_dx_firstbitlow intrinsic in IntrinsicsDirectX.td - [x] Create the DXILOpMapping of int_dx_firstbitlow to 32 in DXIL.td - [x] Create the firstbitlow.ll and firstbitlow_errors.ll tests in llvm/test/CodeGen/DirectX/ - [x] Create the int_spv_firstbitlow intrinsic in IntrinsicsSPIRV.td - [x] In SPIRVInstructionSelector.cpp create the firstbitlow lowering and map it to int_spv_firstbitlow in SPIRVInstructionSelector::selectIntrinsic. - [x] Create SPIR-V backend test case in llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll --- Patch is 33.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116858.diff 16 Files Affected: - (modified) clang/include/clang/Basic/Builtins.td (+6) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+8-1) - (modified) clang/lib/CodeGen/CGHLSLRuntime.h (+1) - (modified) clang/lib/Headers/hlsl/hlsl_intrinsics.h (+72) - (modified) clang/lib/Sema/SemaHLSL.cpp (+2-1) - (added) clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl (+153) - (modified) clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl (+2-4) - (added) clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl (+26) - (modified) llvm/include/llvm/IR/IntrinsicsDirectX.td (+1) - (modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+1) - (modified) llvm/lib/Target/DirectX/DXIL.td (+12) - (modified) llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp (+1) - (modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+174) - (added) llvm/test/CodeGen/DirectX/firstbitlow.ll (+47) - (added) llvm/test/CodeGen/DirectX/firstbitlow_error.ll (+10) - (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbitlow.ll (+104) ``````````diff diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index e866605ac05c09..afe0a311f236d8 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4810,6 +4810,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLFrac : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_frac"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9e0c0bff0125c0..a65b96684f18eb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18956,7 +18956,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, "hlsl.dot4add.u8packed"); } case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { - Value *X = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( @@ -18964,6 +18963,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()), ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh"); } + case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { + Value *X = EmitScalarExpr(E->getArg(0)); + + return Builder.CreateIntrinsic( + /*ReturnType=*/ConvertType(E->getType()), + CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X}, + nullptr, "hlsl.firstbitlow"); + } case Builtin::BI__builtin_hlsl_lerp: { Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 381a5959ec098e..c9eb1b08ff6ba6 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -96,6 +96,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh) GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh) + GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow) GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp) GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp) GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 2ee3827d720495..610ddcea203d90 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1086,6 +1086,78 @@ uint3 firstbithigh(uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) uint4 firstbithigh(uint64_t4); +//===----------------------------------------------------------------------===// +// firstbitlow builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbitlow(T Val) +/// \brief Returns the location of the first set bit starting from the lowest +/// order bit and working upward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint firstbitlow(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint2 firstbitlow(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint3 firstbitlow(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow) +uint4 firstbitlow(uint64_t4); + //===----------------------------------------------------------------------===// // floor builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 65b0d9cd65637f..c90ba9ae9e0f84 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1947,7 +1947,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } - case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { + case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: + case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: { if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl new file mode 100644 index 00000000000000..5d490fabc5bc8d --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl @@ -0,0 +1,153 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s -DTARGET=spv + +#ifdef __HLSL_ENABLE_16_BIT +// CHECK-LABEL: test_firstbitlow_ushort +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16 +uint test_firstbitlow_ushort(uint16_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ushort2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16 +uint2 test_firstbitlow_ushort2(uint16_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ushort3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16 +uint3 test_firstbitlow_ushort3(uint16_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ushort4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16 +uint4 test_firstbitlow_ushort4(uint16_t4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16 +uint test_firstbitlow_short(int16_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16 +uint2 test_firstbitlow_short2(int16_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16 +uint3 test_firstbitlow_short3(int16_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_short4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16 +uint4 test_firstbitlow_short4(int16_t4 p0) { + return firstbitlow(p0); +} +#endif // __HLSL_ENABLE_16_BIT + +// CHECK-LABEL: test_firstbitlow_uint +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32 +uint test_firstbitlow_uint(uint p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_uint2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32 +uint2 test_firstbitlow_uint2(uint2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_uint3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32 +uint3 test_firstbitlow_uint3(uint3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_uint4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32 +uint4 test_firstbitlow_uint4(uint4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64 +uint test_firstbitlow_ulong(uint64_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64 +uint2 test_firstbitlow_ulong2(uint64_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64 +uint3 test_firstbitlow_ulong3(uint64_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_ulong4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64 +uint4 test_firstbitlow_ulong4(uint64_t4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32 +uint test_firstbitlow_int(int p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32 +uint2 test_firstbitlow_int2(int2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32 +uint3 test_firstbitlow_int3(int3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_int4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32 +uint4 test_firstbitlow_int4(int4 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long +// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64 +uint test_firstbitlow_long(int64_t p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64 +uint2 test_firstbitlow_long2(int64_t2 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64 +uint3 test_firstbitlow_long3(int64_t3 p0) { + return firstbitlow(p0); +} + +// CHECK-LABEL: test_firstbitlow_long4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64 +uint4 test_firstbitlow_long4(int64_t4 p0) { + return firstbitlow(p0); +} diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl index 1912ab3ae806b3..b4024418dbba4f 100644 --- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl @@ -17,12 +17,10 @@ double test_int_builtin(double p0) { double2 test_int_builtin_2(double2 p0) { return __builtin_hlsl_elementwise_firstbithigh(p0); - // expected-error@-1 {{1st argument must be a vector of integers - // (was 'double2' (aka 'vector<double, 2>'))}} + // expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}} } float test_int_builtin_3(float p0) { return __builtin_hlsl_elementwise_firstbithigh(p0); - // expected-error@-1 {{1st argument must be a vector of integers - // (was 'float')}} + // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}} } diff --git a/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl new file mode 100644 index 00000000000000..95c25e9e2fb60d --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected + +int test_too_few_arg() { + return firstbitlow(); + // expected-error@-1 {{no matching function for call to 'firstbitlow'}} +} + +int test_too_many_arg(int p0) { + return firstbitlow(p0, p0); + // expected-error@-1 {{no matching function for call to 'firstbitlow'}} +} + +double test_int_builtin(double p0) { + return firstbitlow(p0); + // expected-error@-1 {{call to 'firstbitlow' is ambiguous}} +} + +double2 test_int_builtin_2(double2 p0) { + return __builtin_hlsl_elementwise_firstbitlow(p0); + // expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}} +} + +float test_int_builtin_3(float p0) { + return __builtin_hlsl_elementwise_firstbitlow(p0); + // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 6093664c908dc5..6148dcaff64470 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -103,4 +103,5 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0> def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; +def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index f29eb7ee22b2d2..63e99524511143 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -106,6 +106,7 @@ let TargetPrefix = "spv" in { [IntrNoMem]>; def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; + def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; // Read a value from the image buffer. It does not translate directly to a // single OpImageRead because the result type is not necessarily a 4 element diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 078f0591a67515..efdd91c9c27f62 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -564,6 +564,18 @@ def CountBits : DXILOp<31, unaryBits> { let attributes = [Attributes<DXIL1_0, [ReadNone]>]; } +def FirstbitLo : DXILOp<32, unaryBits> { + let Doc = "Returns the location of the first set bit starting from " + "the lowest order bit and working upward."; + let LLVMIntrinsic = int_dx_firstbitlow; + let arguments = [OverloadTy]; + let result = Int32Ty; + let overloads = + [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + def FirstbitHi : DXILOp<33, unaryBits> { let Doc = "Returns the location of the first set bit starting from " "the highest order bit and working downward."; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index b0436a39423405..c37e0dbb82e5a0 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -34,6 +34,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_splitdouble: case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_firstbitshigh: + case Intrinsic::dx_firstbitlow: return true; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 8a8835e0269200..c033f04305d0f0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -106,6 +106,18 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, bool IsSigned) const; + bool selectFirstBitLow(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectFirstBitLow16(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + + bool selectFirstBitLow32(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, Register SrcReg) const; + + bool selectFirstBitLow64(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; @@ -2786,6 +2798,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true); + case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb + return selectFirstBitLow(ResVReg, ResType, I); case Intrinsic::spv_group_memory_barrier_with_group_sync: { bool Result = true; auto MemSemConstant = @@ -3158,6 +3172,166 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, } } +bool SPIRVInstructionSelector::selectFirstBitLow16(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + // OpUConvert treats the operand bits as an unsigned i16 and zero extends it + // to an unsigned i32. As this leaves all the least significant bits unchanged + // the first set bit from the LSB side doesn't change. + Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + bool Result = selectNAryOpWithSrcs( + ExtReg, ResType, I, {I.getOperand(2).getReg()}, SPIRV::OpUConvert); + return Result && selectFirstBitLow32(ResVReg, ResType, I, ExtReg); +} + +bool SPIRVInstructionSelector::selectFirstBitLow32(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + Register SrcReg) const { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450)) + .addImm(GL::FindILsb) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectFirstBitLow64(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + Register OpReg = I.getOperand(2).getReg(); + + // 1. Split int64 into 2 pieces using a bitcast + unsigned ComponentCount = GR.getScalarOrVectorComponentCount(ResType); + SPIRVType *BaseType = GR.retrieveScalarOrVectorIntType(ResType); ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/116858 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits