[clang] [HLSL] [DirectX] Invert the result of `firstbithigh` (PR #166419)

Deric C. via cfe-commits Wed, 05 Nov 2025 10:48:10 -0800

https://github.com/Icohedron updated 
https://github.com/llvm/llvm-project/pull/166419


>From a8fc9962edf7cb032ef45fd189ea9f40d91f362d Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Mon, 3 Nov 2025 19:00:15 -0800
Subject: [PATCH 1/8] Invert firstbithigh

---
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  | 72 ----------------
 .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h |  9 ++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 61 ++++++++++++++
 .../CodeGenHLSL/builtins/firstbithigh.hlsl    | 84 ++++++++++++-------
 4 files changed, 124 insertions(+), 102 deletions(-)

diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 4c5861c2c5f9d..c0914914a1262 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,78 +1073,6 @@ float3 f16tof32(uint3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
 float4 f16tof32(uint4);
 
-//===----------------------------------------------------------------------===//
-// firstbithigh builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T firstbithigh(T Val)
-/// \brief Returns the location of the first set bit starting from the highest
-/// order bit and working downward, per component.
-/// \param Val the input value.
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int16_t4);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint64_t4);
-
 
//===----------------------------------------------------------------------===//
 // firstbitlow builtins
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index c877234479ad1..8560c75016b4f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,6 +148,15 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
   return exp2(Exp) * X;
 }
 
+template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) {
+  return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+}
+
+template <typename T, int N, int Bitwidth>
+constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) {
+  return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+}
+
 } // namespace __detail
 } // namespace hlsl
 
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5ba5bfb9abde0..192c3a2c974d9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -261,6 +261,67 @@ faceforward(__detail::HLSL_FIXED_VECTOR<float, L> N,
   return __detail::faceforward_impl(N, I, Ng);
 }
 
+//===----------------------------------------------------------------------===//
+// firstbithigh builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbithigh(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+
+template <typename T>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+                                       __detail::is_same<uint16_t, T>::value,
+                                   uint> firstbithigh(T X) {
+  return __detail::firstbithigh_impl<T, 16>(X);
+}
+
+template <typename T, int N>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const
+    inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+                                     __detail::is_same<uint16_t, T>::value,
+                                 vector<uint, N>> firstbithigh(vector<T, N> X) 
{
+  return __detail::firstbithigh_impl<T, N, 16>(X);
+}
+
+#endif
+
+template <typename T>
+const inline __detail::enable_if_t<
+    __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, 
uint>
+firstbithigh(T X) {
+  return __detail::firstbithigh_impl<T, 32>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int, T>::value ||
+                                       __detail::is_same<uint, T>::value,
+                                   vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+  return __detail::firstbithigh_impl<T, N, 32>(X);
+}
+
+template <typename T>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+                                       __detail::is_same<uint64_t, T>::value,
+                                   uint>
+firstbithigh(T X) {
+  return __detail::firstbithigh_impl<T, 64>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+                                       __detail::is_same<uint64_t, T>::value,
+                                   vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+  return __detail::firstbithigh_impl<T, N, 64>(X);
+}
+
 
//===----------------------------------------------------------------------===//
 // fmod builtins
 
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl 
b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index 368d652a6f779..c8fa942fa81ff 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -1,160 +1,184 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type 
-fnative-int16-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type 
-fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s -DTARGET=spv
+// RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv
 
 #ifdef __HLSL_ENABLE_16_BIT
 // CHECK-LABEL: test_firstbithigh_ushort
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: sub i32 15, [[FBH]]
 uint test_firstbithigh_ushort(uint16_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ushort2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
 uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ushort3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
 uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ushort4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
 uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: sub i32 15, [[FBH]]
 uint test_firstbithigh_short(int16_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
 uint2 test_firstbithigh_short2(int16_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
 uint3 test_firstbithigh_short3(int16_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
 uint4 test_firstbithigh_short4(int16_t4 p0) {
   return firstbithigh(p0);
 }
 #endif // __HLSL_ENABLE_16_BIT
 
 // CHECK-LABEL: test_firstbithigh_uint
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: sub i32 31, [[FBH]]
 uint test_firstbithigh_uint(uint p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_uint2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
 uint2 test_firstbithigh_uint2(uint2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_uint3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
 uint3 test_firstbithigh_uint3(uint3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_uint4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
 uint4 test_firstbithigh_uint4(uint4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: sub i32 63, [[FBH]]
 uint test_firstbithigh_ulong(uint64_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
 uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
 uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
 uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: sub i32 31, [[FBH]]
 uint test_firstbithigh_int(int p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
 uint2 test_firstbithigh_int2(int2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
 uint3 test_firstbithigh_int3(int3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
 uint4 test_firstbithigh_int4(int4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: sub i32 63, [[FBH]]
 uint test_firstbithigh_long(int64_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
 uint2 test_firstbithigh_long2(int64_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
 uint3 test_firstbithigh_long3(int64_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
 uint4 test_firstbithigh_long4(int64_t4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_upcast
 // CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 
x i32> %{{.*}})
-// CHECK: [[CONV:%.*]] = zext <4 x i32> [[FBH]] to <4 x i64>
+// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
+// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64>
 // CHECK: ret <4 x i64> [[CONV]]
 uint64_t4 test_firstbithigh_upcast(uint4 p0) {
   return firstbithigh(p0);

>From ee253210af4261efa62fa7c1f6a3be5297ca1326 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 4 Nov 2025 12:21:53 -0800
Subject: [PATCH 2/8] Fix firstbithigh sema errors test

---
 clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
index f99e606fc6562..1f70186c78ad9 100644
--- a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
@@ -12,7 +12,7 @@ int test_too_many_arg(int p0) {
 
 double test_int_builtin(double p0) {
   return firstbithigh(p0);
-  // expected-error@-1 {{call to 'firstbithigh' is ambiguous}}
+  // expected-error@-1 {{no matching function for call to 'firstbithigh'}}
 }
 
 double2 test_int_builtin_2(double2 p0) {

>From be827344e2fd122a4c9d65d9ff41358e8644da52 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 4 Nov 2025 15:20:15 -0800
Subject: [PATCH 3/8] Do not invert firstbithigh under SPIR-V or if the result
 is -1

---
 .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h |  22 +-
 .../CodeGenHLSL/builtins/firstbithigh.hlsl    | 229 +++++++++++++-----
 2 files changed, 193 insertions(+), 58 deletions(-)

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index 8560c75016b4f..65d2095e9c5c2 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,13 +148,27 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
   return exp2(Exp) * X;
 }
 
-template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) {
-  return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+template <typename T, int BitWidth> constexpr uint firstbithigh_impl(T X) {
+  uint FBH = __builtin_hlsl_elementwise_firstbithigh(X);
+#if defined(__DIRECTX__)
+  // The firstbithigh DXIL ops count bits from the wrong side, so we need to
+  // invert it for DirectX.
+  uint Inversion = (BitWidth - 1) - FBH;
+  FBH = select(FBH == -1, FBH, Inversion);
+#endif
+  return FBH;
 }
 
-template <typename T, int N, int Bitwidth>
+template <typename T, int N, int BitWidth>
 constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) {
-  return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+  vector<uint, N> FBH = __builtin_hlsl_elementwise_firstbithigh(X);
+#if defined(__DIRECTX__)
+  // The firstbithigh DXIL ops count bits from the wrong side, so we need to
+  // invert it for DirectX.
+  vector<uint, N> Inversion = (BitWidth - 1) - FBH;
+  FBH = select(FBH == -1, FBH, Inversion);
+#endif
+  return FBH;
 }
 
 } // namespace __detail
diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl 
b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index c8fa942fa81ff..69e1ec8572d33 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -1,185 +1,306 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx
+// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx 
--check-prefixes=CHECK,DXCHECK
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
-// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv
+// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv 
--check-prefixes=CHECK,SPVCHECK
 
 #ifdef __HLSL_ENABLE_16_BIT
 // CHECK-LABEL: test_firstbithigh_ushort
-// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16
-// CHECK: sub i32 15, [[FBH]]
+// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// DXCHECK: sub i32 15, {{.*}}
+// SPVCHECK-NOT: sub i32 15, {{.*}}
+// DXCHECK: icmp eq i32 {{.*}}, -1
+// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1
+// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
 uint test_firstbithigh_ushort(uint16_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ushort2
-// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
-// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// DXCHECK: sub <2 x i32> splat (i32 15), {{.*}}
+// SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}}
+// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ushort3
-// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
-// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// DXCHECK: sub <3 x i32> splat (i32 15), {{.*}}
+// SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}}
+// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ushort4
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
-// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// DXCHECK: sub <4 x i32> splat (i32 15), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short
-// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16
-// CHECK: sub i32 15, [[FBH]]
+// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// DXCHECK: sub i32 15, {{.*}}
+// SPVCHECK-NOT: sub i32 15, {{.*}}
+// DXCHECK: icmp eq i32 {{.*}}, -1
+// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1
+// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
 uint test_firstbithigh_short(int16_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short2
-// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
-// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// DXCHECK: sub <2 x i32> splat (i32 15), {{.*}}
+// SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}}
+// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_short2(int16_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short3
-// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
-// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// DXCHECK: sub <3 x i32> splat (i32 15), {{.*}}
+// SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}}
+// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_short3(int16_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_short4
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
-// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// DXCHECK: sub <4 x i32> splat (i32 15), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_short4(int16_t4 p0) {
   return firstbithigh(p0);
 }
 #endif // __HLSL_ENABLE_16_BIT
 
 // CHECK-LABEL: test_firstbithigh_uint
-// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32
-// CHECK: sub i32 31, [[FBH]]
+// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// DXCHECK: sub i32 31, {{.*}}
+// SPVCHECK-NOT: sub i32 31, {{.*}}
+// DXCHECK: icmp eq i32 {{.*}}, -1
+// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1
+// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
 uint test_firstbithigh_uint(uint p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_uint2
-// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
-// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// DXCHECK: sub <2 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_uint2(uint2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_uint3
-// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
-// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// DXCHECK: sub <3 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_uint3(uint3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_uint4
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
-// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// DXCHECK: sub <4 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_uint4(uint4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong
-// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64
-// CHECK: sub i32 63, [[FBH]]
+// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// DXCHECK: sub i32 63, {{.*}}
+// SPVCHECK-NOT: sub i32 63, {{.*}}
+// DXCHECK: icmp eq i32 {{.*}}, -1
+// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1
+// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
 uint test_firstbithigh_ulong(uint64_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong2
-// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
-// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// DXCHECK: sub <2 x i32> splat (i32 63), {{.*}}
+// SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}}
+// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong3
-// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
-// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// DXCHECK: sub <3 x i32> splat (i32 63), {{.*}}
+// SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}}
+// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_ulong4
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
-// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// DXCHECK: sub <4 x i32> splat (i32 63), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int
-// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32
-// CHECK: sub i32 31, [[FBH]]
+// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// DXCHECK: sub i32 31, {{.*}}
+// SPVCHECK-NOT: sub i32 31, {{.*}}
+// DXCHECK: icmp eq i32 {{.*}}, -1
+// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1
+// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
 uint test_firstbithigh_int(int p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int2
-// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
-// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// DXCHECK: sub <2 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_int2(int2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int3
-// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
-// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// DXCHECK: sub <3 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_int3(int3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_int4
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
-// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// DXCHECK: sub <4 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_int4(int4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long
-// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64
-// CHECK: sub i32 63, [[FBH]]
+// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// DXCHECK: sub i32 63, {{.*}}
+// SPVCHECK-NOT: sub i32 63, {{.*}}
+// DXCHECK: icmp eq i32 {{.*}}, -1
+// SPVCHECK-NOT: icmp eq i32 {{.*}}, -1
+// DXCHECK: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, i32 {{.*}}, i32 {{.*}}
 uint test_firstbithigh_long(int64_t p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long2
-// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
-// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
+// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// DXCHECK: sub <2 x i32> splat (i32 63), {{.*}}
+// SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}}
+// DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_long2(int64_t2 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long3
-// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
-// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
+// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// DXCHECK: sub <3 x i32> splat (i32 63), {{.*}}
+// SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}}
+// DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_long3(int64_t3 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_long4
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
-// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// DXCHECK: sub <4 x i32> splat (i32 63), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_long4(int64_t4 p0) {
   return firstbithigh(p0);
 }
 
 // CHECK-LABEL: test_firstbithigh_upcast
-// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 
x i32> %{{.*}})
-// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
-// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64>
-// CHECK: ret <4 x i64> [[CONV]]
+// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> 
%{{.*}})
+// DXCHECK: sub <4 x i32> splat (i32 31), {{.*}}
+// SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}}
+// CHECK: zext <4 x i32> {{.*}} to <4 x i64>
+// CHECK: ret <4 x i64> {{.*}}
 uint64_t4 test_firstbithigh_upcast(uint4 p0) {
   return firstbithigh(p0);
 }

>From 43110da7efe6d0c6a690e407e60f30aa9971ae3e Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 4 Nov 2025 15:29:31 -0800
Subject: [PATCH 4/8] Wrap RUN lines to 80 cols

---
 clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl 
b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index 69e1ec8572d33..6e1e28c066656 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -1,9 +1,11 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx 
--check-prefixes=CHECK,DXCHECK
+// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx \
+// RUN:   --check-prefixes=CHECK,DXCHECK
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-unknown-vulkan-compute %s -fnative-half-type \
-// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv 
--check-prefixes=CHECK,SPVCHECK
+// RUN:   -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv \
+// RUN:   --check-prefixes=CHECK,SPVCHECK
 
 #ifdef __HLSL_ENABLE_16_BIT
 // CHECK-LABEL: test_firstbithigh_ushort

>From fa70428bd3738462140e36a9caf2a06cd1ea8609 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 4 Nov 2025 16:17:43 -0800
Subject: [PATCH 5/8] Fix type of cond in select CHECKs in test

---
 .../CodeGenHLSL/builtins/firstbithigh.hlsl    | 72 +++++++++----------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl 
b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index 6e1e28c066656..ddb82e5a156a8 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -26,8 +26,8 @@ uint test_firstbithigh_ushort(uint16_t p0) {
 // SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}}
 // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
   return firstbithigh(p0);
 }
@@ -38,8 +38,8 @@ uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
 // SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}}
 // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
   return firstbithigh(p0);
 }
@@ -50,8 +50,8 @@ uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}}
 // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
   return firstbithigh(p0);
 }
@@ -74,8 +74,8 @@ uint test_firstbithigh_short(int16_t p0) {
 // SPVCHECK-NOT: sub <2 x i32> splat (i32 15), {{.*}}
 // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_short2(int16_t2 p0) {
   return firstbithigh(p0);
 }
@@ -86,8 +86,8 @@ uint2 test_firstbithigh_short2(int16_t2 p0) {
 // SPVCHECK-NOT: sub <3 x i32> splat (i32 15), {{.*}}
 // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_short3(int16_t3 p0) {
   return firstbithigh(p0);
 }
@@ -98,8 +98,8 @@ uint3 test_firstbithigh_short3(int16_t3 p0) {
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 15), {{.*}}
 // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_short4(int16_t4 p0) {
   return firstbithigh(p0);
 }
@@ -123,8 +123,8 @@ uint test_firstbithigh_uint(uint p0) {
 // SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}}
 // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_uint2(uint2 p0) {
   return firstbithigh(p0);
 }
@@ -135,8 +135,8 @@ uint2 test_firstbithigh_uint2(uint2 p0) {
 // SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}}
 // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_uint3(uint3 p0) {
   return firstbithigh(p0);
 }
@@ -147,8 +147,8 @@ uint3 test_firstbithigh_uint3(uint3 p0) {
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}}
 // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_uint4(uint4 p0) {
   return firstbithigh(p0);
 }
@@ -171,8 +171,8 @@ uint test_firstbithigh_ulong(uint64_t p0) {
 // SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}}
 // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
   return firstbithigh(p0);
 }
@@ -183,8 +183,8 @@ uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
 // SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}}
 // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
   return firstbithigh(p0);
 }
@@ -195,8 +195,8 @@ uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}}
 // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
   return firstbithigh(p0);
 }
@@ -219,8 +219,8 @@ uint test_firstbithigh_int(int p0) {
 // SPVCHECK-NOT: sub <2 x i32> splat (i32 31), {{.*}}
 // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_int2(int2 p0) {
   return firstbithigh(p0);
 }
@@ -231,8 +231,8 @@ uint2 test_firstbithigh_int2(int2 p0) {
 // SPVCHECK-NOT: sub <3 x i32> splat (i32 31), {{.*}}
 // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_int3(int3 p0) {
   return firstbithigh(p0);
 }
@@ -243,8 +243,8 @@ uint3 test_firstbithigh_int3(int3 p0) {
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}}
 // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_int4(int4 p0) {
   return firstbithigh(p0);
 }
@@ -267,8 +267,8 @@ uint test_firstbithigh_long(int64_t p0) {
 // SPVCHECK-NOT: sub <2 x i32> splat (i32 63), {{.*}}
 // DXCHECK: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <2 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// DXCHECK: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
+// SPVCHECK-NOT: select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> {{.*}}
 uint2 test_firstbithigh_long2(int64_t2 p0) {
   return firstbithigh(p0);
 }
@@ -279,8 +279,8 @@ uint2 test_firstbithigh_long2(int64_t2 p0) {
 // SPVCHECK-NOT: sub <3 x i32> splat (i32 63), {{.*}}
 // DXCHECK: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <3 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// DXCHECK: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
+// SPVCHECK-NOT: select <3 x i1> {{.*}}, <3 x i32> {{.*}}, <3 x i32> {{.*}}
 uint3 test_firstbithigh_long3(int64_t3 p0) {
   return firstbithigh(p0);
 }
@@ -291,8 +291,8 @@ uint3 test_firstbithigh_long3(int64_t3 p0) {
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 63), {{.*}}
 // DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
 // SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
-// DXCHECK: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
-// SPVCHECK-NOT: select i1 {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 uint4 test_firstbithigh_long4(int64_t4 p0) {
   return firstbithigh(p0);
 }

>From 41971ee62164516b29922d680dfaed45accc6bda Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 4 Nov 2025 16:22:11 -0800
Subject: [PATCH 6/8] Add missing check for icmp and select for
 test_firstbithigh_upcast

---
 clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl 
b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index ddb82e5a156a8..461897cd5b377 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -301,6 +301,10 @@ uint4 test_firstbithigh_long4(int64_t4 p0) {
 // CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> 
%{{.*}})
 // DXCHECK: sub <4 x i32> splat (i32 31), {{.*}}
 // SPVCHECK-NOT: sub <4 x i32> splat (i32 31), {{.*}}
+// DXCHECK: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// SPVCHECK-NOT: icmp eq <4 x i32> {{.*}}, splat (i32 -1)
+// DXCHECK: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
+// SPVCHECK-NOT: select <4 x i1> {{.*}}, <4 x i32> {{.*}}, <4 x i32> {{.*}}
 // CHECK: zext <4 x i32> {{.*}} to <4 x i64>
 // CHECK: ret <4 x i64> {{.*}}
 uint64_t4 test_firstbithigh_upcast(uint4 p0) {

>From c6c2fa22e90d8026950c5eda1341cc0a2a5c422a Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Wed, 5 Nov 2025 10:44:08 -0800
Subject: [PATCH 7/8] Combine firstbithigh_impl to remove code duplication

---
 .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h  | 18 +++---------------
 clang/lib/Headers/hlsl/hlsl_intrinsics.h       | 12 ++++++------
 2 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index 65d2095e9c5c2..a4e66f84a9937 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,24 +148,12 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
   return exp2(Exp) * X;
 }
 
-template <typename T, int BitWidth> constexpr uint firstbithigh_impl(T X) {
-  uint FBH = __builtin_hlsl_elementwise_firstbithigh(X);
+template <typename K, typename T, int BitWidth> constexpr uint 
firstbithigh_impl(T X) {
+  K FBH = __builtin_hlsl_elementwise_firstbithigh(X);
 #if defined(__DIRECTX__)
   // The firstbithigh DXIL ops count bits from the wrong side, so we need to
   // invert it for DirectX.
-  uint Inversion = (BitWidth - 1) - FBH;
-  FBH = select(FBH == -1, FBH, Inversion);
-#endif
-  return FBH;
-}
-
-template <typename T, int N, int BitWidth>
-constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) {
-  vector<uint, N> FBH = __builtin_hlsl_elementwise_firstbithigh(X);
-#if defined(__DIRECTX__)
-  // The firstbithigh DXIL ops count bits from the wrong side, so we need to
-  // invert it for DirectX.
-  vector<uint, N> Inversion = (BitWidth - 1) - FBH;
+  K Inversion = (BitWidth - 1) - FBH;
   FBH = select(FBH == -1, FBH, Inversion);
 #endif
   return FBH;
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 192c3a2c974d9..33ed14328ee8a 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -277,7 +277,7 @@ _HLSL_AVAILABILITY(shadermodel, 6.2)
 const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
                                        __detail::is_same<uint16_t, T>::value,
                                    uint> firstbithigh(T X) {
-  return __detail::firstbithigh_impl<T, 16>(X);
+  return __detail::firstbithigh_impl<uint, T, 16>(X);
 }
 
 template <typename T, int N>
@@ -286,7 +286,7 @@ const
     inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
                                      __detail::is_same<uint16_t, T>::value,
                                  vector<uint, N>> firstbithigh(vector<T, N> X) 
{
-  return __detail::firstbithigh_impl<T, N, 16>(X);
+  return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 16>(X);
 }
 
 #endif
@@ -295,7 +295,7 @@ template <typename T>
 const inline __detail::enable_if_t<
     __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, 
uint>
 firstbithigh(T X) {
-  return __detail::firstbithigh_impl<T, 32>(X);
+  return __detail::firstbithigh_impl<uint, T, 32>(X);
 }
 
 template <typename T, int N>
@@ -303,7 +303,7 @@ const inline __detail::enable_if_t<__detail::is_same<int, 
T>::value ||
                                        __detail::is_same<uint, T>::value,
                                    vector<uint, N>>
 firstbithigh(vector<T, N> X) {
-  return __detail::firstbithigh_impl<T, N, 32>(X);
+  return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 32>(X);
 }
 
 template <typename T>
@@ -311,7 +311,7 @@ const inline 
__detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
                                        __detail::is_same<uint64_t, T>::value,
                                    uint>
 firstbithigh(T X) {
-  return __detail::firstbithigh_impl<T, 64>(X);
+  return __detail::firstbithigh_impl<uint, T, 64>(X);
 }
 
 template <typename T, int N>
@@ -319,7 +319,7 @@ const inline 
__detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
                                        __detail::is_same<uint64_t, T>::value,
                                    vector<uint, N>>
 firstbithigh(vector<T, N> X) {
-  return __detail::firstbithigh_impl<T, N, 64>(X);
+  return __detail::firstbithigh_impl<vector<uint, N>, vector<T, N>, 64>(X);
 }
 
 
//===----------------------------------------------------------------------===//

>From 362a09a0566d9cfe4e5376a1b3768396a5289f14 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Wed, 5 Nov 2025 10:47:42 -0800
Subject: [PATCH 8/8] Apply clang-format

---
 clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h 
b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index a4e66f84a9937..0a4efe6f8f6de 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,7 +148,8 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
   return exp2(Exp) * X;
 }
 
-template <typename K, typename T, int BitWidth> constexpr uint 
firstbithigh_impl(T X) {
+template <typename K, typename T, int BitWidth>
+constexpr uint firstbithigh_impl(T X) {
   K FBH = __builtin_hlsl_elementwise_firstbithigh(X);
 #if defined(__DIRECTX__)
   // The firstbithigh DXIL ops count bits from the wrong side, so we need to

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL] [DirectX] Invert the result of `firstbithigh` (PR #166419)

Reply via email to