https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/166419
>From a8fc9962edf7cb032ef45fd189ea9f40d91f362d Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Mon, 3 Nov 2025 19:00:15 -0800 Subject: [PATCH 1/8] Invert firstbithigh --- .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 72 ---------------- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 9 ++ clang/lib/Headers/hlsl/hlsl_intrinsics.h | 61 ++++++++++++++ .../CodeGenHLSL/builtins/firstbithigh.hlsl | 84 ++++++++++++------- 4 files changed, 124 insertions(+), 102 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 4c5861c2c5f9d..c0914914a1262 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -1073,78 +1073,6 @@ float3 f16tof32(uint3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) float4 f16tof32(uint4); -//===----------------------------------------------------------------------===// -// firstbithigh builtins -//===----------------------------------------------------------------------===// - -/// \fn T firstbithigh(T Val) -/// \brief Returns the location of the first set bit starting from the highest -/// order bit and working downward, per component. -/// \param Val the input value. - -#ifdef __HLSL_ENABLE_16_BIT -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int16_t4); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint16_t); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint16_t2); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint16_t3); -_HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint16_t4); -#endif - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint firstbithigh(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint2 firstbithigh(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint3 firstbithigh(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) -uint4 firstbithigh(uint64_t4); - //===----------------------------------------------------------------------===// // firstbitlow builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index c877234479ad1..8560c75016b4f 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -148,6 +148,15 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) { return exp2(Exp) * X; } +template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) { + return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X); +} + +template <typename T, int N, int Bitwidth> +constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) { + return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X); +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 5ba5bfb9abde0..192c3a2c974d9 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -261,6 +261,67 @@ faceforward(__detail::HLSL_FIXED_VECTOR<float, L> N, return __detail::faceforward_impl(N, I, Ng); } +//===----------------------------------------------------------------------===// +// firstbithigh builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbithigh(T Val) +/// \brief Returns the location of the first set bit starting from the lowest +/// order bit and working upward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT + +template <typename T> +_HLSL_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value || + __detail::is_same<uint16_t, T>::value, + uint> firstbithigh(T X) { + return __detail::firstbithigh_impl<T, 16>(X); +} + +template <typename T, int N> +_HLSL_AVAILABILITY(shadermodel, 6.2) +const + inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value || + __detail::is_same<uint16_t, T>::value, + vector<uint, N>> firstbithigh(vector<T, N> X) { + return __detail::firstbithigh_impl<T, N, 16>(X); +} + +#endif + +template <typename T> +const inline __detail::enable_if_t< + __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, uint> +firstbithigh(T X) { + return __detail::firstbithigh_impl<T, 32>(X); +} + +template <typename T, int N> +const inline __detail::enable_if_t<__detail::is_same<int, T>::value || + __detail::is_same<uint, T>::value, + vector<uint, N>> +firstbithigh(vector<T, N> X) { + return __detail::firstbithigh_impl<T, N, 32>(X); +} + +template <typename T> +const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value || + __detail::is_same<uint64_t, T>::value, + uint> +firstbithigh(T X) { + return __detail::firstbithigh_impl<T, 64>(X); +} + +template <typename T, int N> +const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value || + __detail::is_same<uint64_t, T>::value, + vector<uint, N>> +firstbithigh(vector<T, N> X) { + return __detail::firstbithigh_impl<T, N, 64>(X); +} + //===----------------------------------------------------------------------===// // fmod builtins //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl index 368d652a6f779..c8fa942fa81ff 100644 --- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -1,160 +1,184 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \ -// RUN: -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s -DTARGET=spv +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv #ifdef __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_ushort -// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16 +// CHECK: sub i32 15, [[FBH]] uint test_firstbithigh_ushort(uint16_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 +// CHECK: sub <2 x i32> splat (i32 15), [[FBH]] uint2 test_firstbithigh_ushort2(uint16_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 +// CHECK: sub <3 x i32> splat (i32 15), [[FBH]] uint3 test_firstbithigh_ushort3(uint16_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 +// CHECK: sub <4 x i32> splat (i32 15), [[FBH]] uint4 test_firstbithigh_ushort4(uint16_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short -// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16 +// CHECK: sub i32 15, [[FBH]] uint test_firstbithigh_short(int16_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 +// CHECK: sub <2 x i32> splat (i32 15), [[FBH]] uint2 test_firstbithigh_short2(int16_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 +// CHECK: sub <3 x i32> splat (i32 15), [[FBH]] uint3 test_firstbithigh_short3(int16_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 +// CHECK: sub <4 x i32> splat (i32 15), [[FBH]] uint4 test_firstbithigh_short4(int16_t4 p0) { return firstbithigh(p0); } #endif // __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_uint -// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32 +// CHECK: sub i32 31, [[FBH]] uint test_firstbithigh_uint(uint p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 +// CHECK: sub <2 x i32> splat (i32 31), [[FBH]] uint2 test_firstbithigh_uint2(uint2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 +// CHECK: sub <3 x i32> splat (i32 31), [[FBH]] uint3 test_firstbithigh_uint3(uint3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 +// CHECK: sub <4 x i32> splat (i32 31), [[FBH]] uint4 test_firstbithigh_uint4(uint4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong -// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64 +// CHECK: sub i32 63, [[FBH]] uint test_firstbithigh_ulong(uint64_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 +// CHECK: sub <2 x i32> splat (i32 63), [[FBH]] uint2 test_firstbithigh_ulong2(uint64_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 +// CHECK: sub <3 x i32> splat (i32 63), [[FBH]] uint3 test_firstbithigh_ulong3(uint64_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 +// CHECK: sub <4 x i32> splat (i32 63), [[FBH]] uint4 test_firstbithigh_ulong4(uint64_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int -// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32 +// CHECK: sub i32 31, [[FBH]] uint test_firstbithigh_int(int p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 +// CHECK: sub <2 x i32> splat (i32 31), [[FBH]] uint2 test_firstbithigh_int2(int2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 +// CHECK: sub <3 x i32> splat (i32 31), [[FBH]] uint3 test_firstbithigh_int3(int3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 +// CHECK: sub <4 x i32> splat (i32 31), [[FBH]] uint4 test_firstbithigh_int4(int4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long -// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64 +// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64 +// CHECK: sub i32 63, [[FBH]] uint test_firstbithigh_long(int64_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long2 -// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 +// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 +// CHECK: sub <2 x i32> splat (i32 63), [[FBH]] uint2 test_firstbithigh_long2(int64_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long3 -// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 +// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 +// CHECK: sub <3 x i32> splat (i32 63), [[FBH]] uint3 test_firstbithigh_long3(int64_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long4 -// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 +// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 +// CHECK: sub <4 x i32> splat (i32 63), [[FBH]] uint4 test_firstbithigh_long4(int64_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_upcast // CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}}) -// CHECK: [[CONV:%.*]] = zext <4 x i32> [[FBH]] to <4 x i64> +// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]] +// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64> // CHECK: ret <4 x i64> [[CONV]] uint64_t4 test_firstbithigh_upcast(uint4 p0) { return firstbithigh(p0); >From ee253210af4261efa62fa7c1f6a3be5297ca1326 Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 4 Nov 2025 12:21:53 -0800 Subject: [PATCH 2/8] Fix firstbithigh sema errors test --- clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl index f99e606fc6562..1f70186c78ad9 100644 --- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl @@ -12,7 +12,7 @@ int test_too_many_arg(int p0) { double test_int_builtin(double p0) { return firstbithigh(p0); - // expected-error@-1 {{call to 'firstbithigh' is ambiguous}} + // expected-error@-1 {{no matching function for call to 'firstbithigh'}} } double2 test_int_builtin_2(double2 p0) { >From be827344e2fd122a4c9d65d9ff41358e8644da52 Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 4 Nov 2025 15:20:15 -0800 Subject: [PATCH 3/8] Do not invert firstbithigh under SPIR-V or if the result is -1 --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 22 +- .../CodeGenHLSL/builtins/firstbithigh.hlsl | 229 +++++++++++++----- 2 files changed, 193 insertions(+), 58 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 8560c75016b4f..65d2095e9c5c2 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -148,13 +148,27 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) { return exp2(Exp) * X; } -template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) { - return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X); +template <typename T, int BitWidth> constexpr uint firstbithigh_impl(T X) { + uint FBH = __builtin_hlsl_elementwise_firstbithigh(X); +#if defined(__DIRECTX__) + // The firstbithigh DXIL ops count bits from the wrong side, so we need to + // invert it for DirectX. + uint Inversion = (BitWidth - 1) - FBH; + FBH = select(FBH == -1, FBH, Inversion); +#endif + return FBH; } -template <typename T, int N, int Bitwidth> +template <typename T, int N, int BitWidth> constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) { - return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X); + vector<uint, N> FBH = __builtin_hlsl_elementwise_firstbithigh(X); +#if defined(__DIRECTX__) + // The firstbithigh DXIL ops count bits from the wrong side, so we need to + // invert it for DirectX. + vector<uint, N> Inversion = (BitWidth - 1) - FBH; + FBH = select(FBH == -1, FBH, Inversion); +#endif + return FBH; } } // namespace __detail diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl index c8fa942fa81ff..69e1ec8572d33 100644 --- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -1,185 +1,306 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx --check-prefixes=CHECK,DXCHECK // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv --check-prefixes=CHECK,SPVCHECK #ifdef __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_ushort -// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16 -// CHECK: sub i32 15, [[FBH]] +// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16 +// DXCHECK: sub i32 15, {{.*}} +// SPVCHECK-NOT: sub i32 15, {{.*}} +// DXCHECK: icmp eq i32 {{.*}}, -1 +// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1 +// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} uint test_firstbithigh_ushort(uint16_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort2 -// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 -// CHECK: sub <2 x i32> splat (i32 15), [[FBH]] +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 +// DXCHECK: sub <2 x i32> splat (i32 15), {{.*}} +// SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}} +// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_ushort2(uint16_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort3 -// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 -// CHECK: sub <3 x i32> splat (i32 15), [[FBH]] +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 +// DXCHECK: sub <3 x i32> splat (i32 15), {{.*}} +// SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}} +// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_ushort3(uint16_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ushort4 -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 -// CHECK: sub <4 x i32> splat (i32 15), [[FBH]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 +// DXCHECK: sub <4 x i32> splat (i32 15), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_ushort4(uint16_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short -// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16 -// CHECK: sub i32 15, [[FBH]] +// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16 +// DXCHECK: sub i32 15, {{.*}} +// SPVCHECK-NOT: sub i32 15, {{.*}} +// DXCHECK: icmp eq i32 {{.*}}, -1 +// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1 +// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} uint test_firstbithigh_short(int16_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short2 -// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 -// CHECK: sub <2 x i32> splat (i32 15), [[FBH]] +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 +// DXCHECK: sub <2 x i32> splat (i32 15), {{.*}} +// SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}} +// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_short2(int16_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short3 -// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 -// CHECK: sub <3 x i32> splat (i32 15), [[FBH]] +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 +// DXCHECK: sub <3 x i32> splat (i32 15), {{.*}} +// SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}} +// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_short3(int16_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_short4 -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 -// CHECK: sub <4 x i32> splat (i32 15), [[FBH]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 +// DXCHECK: sub <4 x i32> splat (i32 15), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_short4(int16_t4 p0) { return firstbithigh(p0); } #endif // __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_uint -// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32 -// CHECK: sub i32 31, [[FBH]] +// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32 +// DXCHECK: sub i32 31, {{.*}} +// SPVCHECK-NOT: sub i32 31, {{.*}} +// DXCHECK: icmp eq i32 {{.*}}, -1 +// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1 +// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} uint test_firstbithigh_uint(uint p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint2 -// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 -// CHECK: sub <2 x i32> splat (i32 31), [[FBH]] +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 +// DXCHECK: sub <2 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_uint2(uint2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint3 -// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 -// CHECK: sub <3 x i32> splat (i32 31), [[FBH]] +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 +// DXCHECK: sub <3 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_uint3(uint3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_uint4 -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 -// CHECK: sub <4 x i32> splat (i32 31), [[FBH]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 +// DXCHECK: sub <4 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_uint4(uint4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong -// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64 -// CHECK: sub i32 63, [[FBH]] +// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64 +// DXCHECK: sub i32 63, {{.*}} +// SPVCHECK-NOT: sub i32 63, {{.*}} +// DXCHECK: icmp eq i32 {{.*}}, -1 +// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1 +// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} uint test_firstbithigh_ulong(uint64_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong2 -// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 -// CHECK: sub <2 x i32> splat (i32 63), [[FBH]] +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 +// DXCHECK: sub <2 x i32> splat (i32 63), {{.*}} +// SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}} +// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_ulong2(uint64_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong3 -// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 -// CHECK: sub <3 x i32> splat (i32 63), [[FBH]] +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 +// DXCHECK: sub <3 x i32> splat (i32 63), {{.*}} +// SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}} +// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_ulong3(uint64_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_ulong4 -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 -// CHECK: sub <4 x i32> splat (i32 63), [[FBH]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 +// DXCHECK: sub <4 x i32> splat (i32 63), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_ulong4(uint64_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int -// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32 -// CHECK: sub i32 31, [[FBH]] +// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32 +// DXCHECK: sub i32 31, {{.*}} +// SPVCHECK-NOT: sub i32 31, {{.*}} +// DXCHECK: icmp eq i32 {{.*}}, -1 +// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1 +// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} uint test_firstbithigh_int(int p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int2 -// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 -// CHECK: sub <2 x i32> splat (i32 31), [[FBH]] +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 +// DXCHECK: sub <2 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_int2(int2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int3 -// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 -// CHECK: sub <3 x i32> splat (i32 31), [[FBH]] +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 +// DXCHECK: sub <3 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_int3(int3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_int4 -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 -// CHECK: sub <4 x i32> splat (i32 31), [[FBH]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 +// DXCHECK: sub <4 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_int4(int4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long -// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64 -// CHECK: sub i32 63, [[FBH]] +// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64 +// DXCHECK: sub i32 63, {{.*}} +// SPVCHECK-NOT: sub i32 63, {{.*}} +// DXCHECK: icmp eq i32 {{.*}}, -1 +// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1 +// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}} uint test_firstbithigh_long(int64_t p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long2 -// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 -// CHECK: sub <2 x i32> splat (i32 63), [[FBH]] +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 +// DXCHECK: sub <2 x i32> splat (i32 63), {{.*}} +// SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}} +// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_long2(int64_t2 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long3 -// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 -// CHECK: sub <3 x i32> splat (i32 63), [[FBH]] +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 +// DXCHECK: sub <3 x i32> splat (i32 63), {{.*}} +// SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}} +// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_long3(int64_t3 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_long4 -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 -// CHECK: sub <4 x i32> splat (i32 63), [[FBH]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 +// DXCHECK: sub <4 x i32> splat (i32 63), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_long4(int64_t4 p0) { return firstbithigh(p0); } // CHECK-LABEL: test_firstbithigh_upcast -// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}}) -// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]] -// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64> -// CHECK: ret <4 x i64> [[CONV]] +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}}) +// DXCHECK: sub <4 x i32> splat (i32 31), {{.*}} +// SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}} +// CHECK: zext <4 x i32> {{.*}} to <4 x i64> +// CHECK: ret <4 x i64> {{.*}} uint64_t4 test_firstbithigh_upcast(uint4 p0) { return firstbithigh(p0); } >From 43110da7efe6d0c6a690e407e60f30aa9971ae3e Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 4 Nov 2025 15:29:31 -0800 Subject: [PATCH 4/8] Wrap RUN lines to 80 cols --- clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl index 69e1ec8572d33..6e1e28c066656 100644 --- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx --check-prefixes=CHECK,DXCHECK +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx \ +// RUN: --check-prefixes=CHECK,DXCHECK // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv --check-prefixes=CHECK,SPVCHECK +// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv \ +// RUN: --check-prefixes=CHECK,SPVCHECK #ifdef __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_firstbithigh_ushort >From fa70428bd3738462140e36a9caf2a06cd1ea8609 Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 4 Nov 2025 16:17:43 -0800 Subject: [PATCH 5/8] Fix type of cond in select CHECKs in test --- .../CodeGenHLSL/builtins/firstbithigh.hlsl | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl index 6e1e28c066656..ddb82e5a156a8 100644 --- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -26,8 +26,8 @@ uint test_firstbithigh_ushort(uint16_t p0) { // SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}} // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_ushort2(uint16_t2 p0) { return firstbithigh(p0); } @@ -38,8 +38,8 @@ uint2 test_firstbithigh_ushort2(uint16_t2 p0) { // SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}} // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_ushort3(uint16_t3 p0) { return firstbithigh(p0); } @@ -50,8 +50,8 @@ uint3 test_firstbithigh_ushort3(uint16_t3 p0) { // SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}} // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_ushort4(uint16_t4 p0) { return firstbithigh(p0); } @@ -74,8 +74,8 @@ uint test_firstbithigh_short(int16_t p0) { // SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}} // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_short2(int16_t2 p0) { return firstbithigh(p0); } @@ -86,8 +86,8 @@ uint2 test_firstbithigh_short2(int16_t2 p0) { // SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}} // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_short3(int16_t3 p0) { return firstbithigh(p0); } @@ -98,8 +98,8 @@ uint3 test_firstbithigh_short3(int16_t3 p0) { // SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}} // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_short4(int16_t4 p0) { return firstbithigh(p0); } @@ -123,8 +123,8 @@ uint test_firstbithigh_uint(uint p0) { // SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}} // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_uint2(uint2 p0) { return firstbithigh(p0); } @@ -135,8 +135,8 @@ uint2 test_firstbithigh_uint2(uint2 p0) { // SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}} // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_uint3(uint3 p0) { return firstbithigh(p0); } @@ -147,8 +147,8 @@ uint3 test_firstbithigh_uint3(uint3 p0) { // SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}} // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_uint4(uint4 p0) { return firstbithigh(p0); } @@ -171,8 +171,8 @@ uint test_firstbithigh_ulong(uint64_t p0) { // SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}} // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_ulong2(uint64_t2 p0) { return firstbithigh(p0); } @@ -183,8 +183,8 @@ uint2 test_firstbithigh_ulong2(uint64_t2 p0) { // SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}} // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_ulong3(uint64_t3 p0) { return firstbithigh(p0); } @@ -195,8 +195,8 @@ uint3 test_firstbithigh_ulong3(uint64_t3 p0) { // SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}} // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_ulong4(uint64_t4 p0) { return firstbithigh(p0); } @@ -219,8 +219,8 @@ uint test_firstbithigh_int(int p0) { // SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}} // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_int2(int2 p0) { return firstbithigh(p0); } @@ -231,8 +231,8 @@ uint2 test_firstbithigh_int2(int2 p0) { // SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}} // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_int3(int3 p0) { return firstbithigh(p0); } @@ -243,8 +243,8 @@ uint3 test_firstbithigh_int3(int3 p0) { // SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}} // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_int4(int4 p0) { return firstbithigh(p0); } @@ -267,8 +267,8 @@ uint test_firstbithigh_long(int64_t p0) { // SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}} // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} +// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}} uint2 test_firstbithigh_long2(int64_t2 p0) { return firstbithigh(p0); } @@ -279,8 +279,8 @@ uint2 test_firstbithigh_long2(int64_t2 p0) { // SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}} // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} +// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}} uint3 test_firstbithigh_long3(int64_t3 p0) { return firstbithigh(p0); } @@ -291,8 +291,8 @@ uint3 test_firstbithigh_long3(int64_t3 p0) { // SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}} // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) -// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} -// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} uint4 test_firstbithigh_long4(int64_t4 p0) { return firstbithigh(p0); } >From 41971ee62164516b29922d680dfaed45accc6bda Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 4 Nov 2025 16:22:11 -0800 Subject: [PATCH 6/8] Add missing check for icmp and select for test_firstbithigh_upcast --- clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl index ddb82e5a156a8..461897cd5b377 100644 --- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -301,6 +301,10 @@ uint4 test_firstbithigh_long4(int64_t4 p0) { // CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}}) // DXCHECK: sub <4 x i32> splat (i32 31), {{.*}} // SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}} +// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1) +// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} +// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}} // CHECK: zext <4 x i32> {{.*}} to <4 x i64> // CHECK: ret <4 x i64> {{.*}} uint64_t4 test_firstbithigh_upcast(uint4 p0) { >From c6c2fa22e90d8026950c5eda1341cc0a2a5c422a Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Wed, 5 Nov 2025 10:44:08 -0800 Subject: [PATCH 7/8] Combine firstbithigh_impl to remove code duplication --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 18 +++--------------- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 12 ++++++------ 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 65d2095e9c5c2..a4e66f84a9937 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -148,24 +148,12 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) { return exp2(Exp) * X; } -template <typename T, int BitWidth> constexpr uint firstbithigh_impl(T X) { - uint FBH = __builtin_hlsl_elementwise_firstbithigh(X); +template <typename K, typename T, int BitWidth> constexpr uint firstbithigh_impl(T X) { + K FBH = __builtin_hlsl_elementwise_firstbithigh(X); #if defined(__DIRECTX__) // The firstbithigh DXIL ops count bits from the wrong side, so we need to // invert it for DirectX. - uint Inversion = (BitWidth - 1) - FBH; - FBH = select(FBH == -1, FBH, Inversion); -#endif - return FBH; -} - -template <typename T, int N, int BitWidth> -constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) { - vector<uint, N> FBH = __builtin_hlsl_elementwise_firstbithigh(X); -#if defined(__DIRECTX__) - // The firstbithigh DXIL ops count bits from the wrong side, so we need to - // invert it for DirectX. - vector<uint, N> Inversion = (BitWidth - 1) - FBH; + K Inversion = (BitWidth - 1) - FBH; FBH = select(FBH == -1, FBH, Inversion); #endif return FBH; diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 192c3a2c974d9..33ed14328ee8a 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -277,7 +277,7 @@ _HLSL_AVAILABILITY(shadermodel, 6.2) const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value || __detail::is_same<uint16_t, T>::value, uint> firstbithigh(T X) { - return __detail::firstbithigh_impl<T, 16>(X); + return __detail::firstbithigh_impl<uint, T, 16>(X); } template <typename T, int N> @@ -286,7 +286,7 @@ const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value || __detail::is_same<uint16_t, T>::value, vector<uint, N>> firstbithigh(vector<T, N> X) { - return __detail::firstbithigh_impl<T, N, 16>(X); + return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 16>(X); } #endif @@ -295,7 +295,7 @@ template <typename T> const inline __detail::enable_if_t< __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, uint> firstbithigh(T X) { - return __detail::firstbithigh_impl<T, 32>(X); + return __detail::firstbithigh_impl<uint, T, 32>(X); } template <typename T, int N> @@ -303,7 +303,7 @@ const inline __detail::enable_if_t<__detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, vector<uint, N>> firstbithigh(vector<T, N> X) { - return __detail::firstbithigh_impl<T, N, 32>(X); + return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 32>(X); } template <typename T> @@ -311,7 +311,7 @@ const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value || __detail::is_same<uint64_t, T>::value, uint> firstbithigh(T X) { - return __detail::firstbithigh_impl<T, 64>(X); + return __detail::firstbithigh_impl<uint, T, 64>(X); } template <typename T, int N> @@ -319,7 +319,7 @@ const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value || __detail::is_same<uint64_t, T>::value, vector<uint, N>> firstbithigh(vector<T, N> X) { - return __detail::firstbithigh_impl<T, N, 64>(X); + return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 64>(X); } //===----------------------------------------------------------------------===// >From 362a09a0566d9cfe4e5376a1b3768396a5289f14 Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Wed, 5 Nov 2025 10:47:42 -0800 Subject: [PATCH 8/8] Apply clang-format --- clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index a4e66f84a9937..0a4efe6f8f6de 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -148,7 +148,8 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) { return exp2(Exp) * X; } -template <typename K, typename T, int BitWidth> constexpr uint firstbithigh_impl(T X) { +template <typename K, typename T, int BitWidth> +constexpr uint firstbithigh_impl(T X) { K FBH = __builtin_hlsl_elementwise_firstbithigh(X); #if defined(__DIRECTX__) // The firstbithigh DXIL ops count bits from the wrong side, so we need to _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
