https://github.com/spall updated https://github.com/llvm/llvm-project/pull/129939
>From 23debaf2084f953e60847b8f0814c5d1ee27c726 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Tue, 4 Mar 2025 09:53:56 -0800 Subject: [PATCH 01/12] extra scalar vector overloads for clamp --- .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 99 ++++++++++++------- clang/test/CodeGenHLSL/builtins/clamp.hlsl | 32 ++++++ .../test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 2 +- 3 files changed, 96 insertions(+), 37 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 89dfeb475488e..35246e222387e 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -35,26 +35,44 @@ namespace hlsl { #define _HLSL_16BIT_AVAILABILITY_STAGE(environment, version, stage) #endif -#define GEN_VEC_SCALAR_OVERLOADS(FUNC_NAME, BASE_TYPE, AVAIL) \ - GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##2, AVAIL) \ - GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##3, AVAIL) \ - GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##4, AVAIL) - -#define GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, VECTOR_TYPE, AVAIL) \ - IF_TRUE_##AVAIL( \ - _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_TYPE \ - FUNC_NAME(VECTOR_TYPE p0, BASE_TYPE p1) { \ - return __builtin_elementwise_##FUNC_NAME(p0, (VECTOR_TYPE)p1); \ +#define _HLSL_CAT(a,b) a##b +#define _HLSL_VEC_SCALAR_OVERLOADS(NAME, BASE_T, AVAIL) \ + _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, _HLSL_CAT(_HLSL_NUM_ARGS_,NAME)) + +#define _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, NUM_ARGS) \ + _HLSL_CAT(_HLSL_BOTH_OVERLOADS_,NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T,2), AVAIL) \ + _HLSL_CAT(_HLSL_BOTH_OVERLOADS_,NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T,3), AVAIL) \ + _HLSL_CAT(_HLSL_BOTH_OVERLOADS_,NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T,4), AVAIL) + +#define _HLSL_BOTH_OVERLOADS_2(NAME, BASE_T, VECTOR_T, AVAIL) \ + _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)( \ + _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T \ + NAME(VECTOR_T p0, BASE_T p1) { \ + return _HLSL_CAT(__builtin_elementwise_,NAME)(p0, (VECTOR_T)p1); \ } \ - IF_TRUE_##AVAIL( \ - _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_TYPE \ - FUNC_NAME(BASE_TYPE p0, VECTOR_TYPE p1) { \ - return __builtin_elementwise_##FUNC_NAME((VECTOR_TYPE)p0, p1); \ + _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)( \ + _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T \ + NAME(BASE_T p0, VECTOR_T p1) { \ + return _HLSL_CAT(__builtin_elementwise_,NAME)((VECTOR_T)p0, p1); \ } -#define IF_TRUE_0(EXPR) -#define IF_TRUE_1(EXPR) EXPR +#define _HLSL_BOTH_OVERLOADS_3(NAME, BASE_T, VECTOR_T, AVAIL) \ + _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)(_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) \ + constexpr VECTOR_T NAME(VECTOR_T p0, VECTOR_T p1, BASE_T p2) { \ + return _HLSL_CAT(__builtin_hlsl_elementwise_,NAME)(p0, p1, (VECTOR_T)p2); \ + } \ + _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)(_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) \ + constexpr VECTOR_T NAME(VECTOR_T p0, BASE_T p1, VECTOR_T p2) { \ + return _HLSL_CAT(__builtin_hlsl_elementwise_,NAME)(p0, (VECTOR_T)p1, p2); \ + } + +#define _HLSL_IF_TRUE_0(EXPR) +#define _HLSL_IF_TRUE_1(EXPR) EXPR +#define _HLSL_NUM_ARGS_min 2 +#define _HLSL_NUM_ARGS_max 2 +#define _HLSL_NUM_ARGS_clamp 3 + //===----------------------------------------------------------------------===// // abs builtins //===----------------------------------------------------------------------===// @@ -603,7 +621,8 @@ half3 clamp(half3, half3, half3); _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) half4 clamp(half4, half4, half4); - +_HLSL_VEC_SCALAR_OVERLOADS(clamp, half, 1) + #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) @@ -617,7 +636,8 @@ int16_t3 clamp(int16_t3, int16_t3, int16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int16_t4 clamp(int16_t4, int16_t4, int16_t4); - +_HLSL_VEC_SCALAR_OVERLOADS(clamp, int16_t, 1) + _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint16_t clamp(uint16_t, uint16_t, uint16_t); @@ -630,6 +650,7 @@ uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, uint16_t, 1) #endif _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) @@ -640,6 +661,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int3 clamp(int3, int3, int3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int4 clamp(int4, int4, int4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, int, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint clamp(uint, uint, uint); @@ -649,6 +671,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint3 clamp(uint3, uint3, uint3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint4 clamp(uint4, uint4, uint4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, uint, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int64_t clamp(int64_t, int64_t, int64_t); @@ -658,6 +681,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int64_t3 clamp(int64_t3, int64_t3, int64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int64_t4 clamp(int64_t4, int64_t4, int64_t4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, int64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint64_t clamp(uint64_t, uint64_t, uint64_t); @@ -667,6 +691,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, uint64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) float clamp(float, float, float); @@ -676,6 +701,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) float3 clamp(float3, float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) float4 clamp(float4, float4, float4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, float, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double clamp(double, double, double); @@ -685,6 +711,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double3 clamp(double3, double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double4 clamp(double4, double4, double4); +_HLSL_VEC_SCALAR_OVERLOADS(clamp, double, 0) //===----------------------------------------------------------------------===// // clip builtins @@ -1597,7 +1624,7 @@ half3 max(half3, half3); _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) half4 max(half4, half4); -GEN_VEC_SCALAR_OVERLOADS(max, half, 1) +_HLSL_VEC_SCALAR_OVERLOADS(max, half, 1) #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) @@ -1612,7 +1639,7 @@ int16_t3 max(int16_t3, int16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int16_t4 max(int16_t4, int16_t4); -GEN_VEC_SCALAR_OVERLOADS(max, int16_t, 1) +_HLSL_VEC_SCALAR_OVERLOADS(max, int16_t, 1) _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) @@ -1626,7 +1653,7 @@ uint16_t3 max(uint16_t3, uint16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint16_t4 max(uint16_t4, uint16_t4); -GEN_VEC_SCALAR_OVERLOADS(max, uint16_t, 1) +_HLSL_VEC_SCALAR_OVERLOADS(max, uint16_t, 1) #endif _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) @@ -1637,7 +1664,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int3 max(int3, int3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int4 max(int4, int4); -GEN_VEC_SCALAR_OVERLOADS(max, int, 0) +_HLSL_VEC_SCALAR_OVERLOADS(max, int, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint max(uint, uint); @@ -1647,7 +1674,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint3 max(uint3, uint3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint4 max(uint4, uint4); -GEN_VEC_SCALAR_OVERLOADS(max, uint, 0) +_HLSL_VEC_SCALAR_OVERLOADS(max, uint, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int64_t max(int64_t, int64_t); @@ -1657,7 +1684,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int64_t3 max(int64_t3, int64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int64_t4 max(int64_t4, int64_t4); -GEN_VEC_SCALAR_OVERLOADS(max, int64_t, 0) +_HLSL_VEC_SCALAR_OVERLOADS(max, int64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint64_t max(uint64_t, uint64_t); @@ -1667,7 +1694,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint64_t3 max(uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint64_t4 max(uint64_t4, uint64_t4); -GEN_VEC_SCALAR_OVERLOADS(max, uint64_t, 0) +_HLSL_VEC_SCALAR_OVERLOADS(max, uint64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) float max(float, float); @@ -1677,7 +1704,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) float3 max(float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) float4 max(float4, float4); -GEN_VEC_SCALAR_OVERLOADS(max, float, 0) +_HLSL_VEC_SCALAR_OVERLOADS(max, float, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) double max(double, double); @@ -1687,7 +1714,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) double3 max(double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) double4 max(double4, double4); -GEN_VEC_SCALAR_OVERLOADS(max, double, 0) +_HLSL_VEC_SCALAR_OVERLOADS(max, double, 0) //===----------------------------------------------------------------------===// // min builtins @@ -1710,7 +1737,7 @@ half3 min(half3, half3); _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) half4 min(half4, half4); -GEN_VEC_SCALAR_OVERLOADS(min, half, 1) +_HLSL_VEC_SCALAR_OVERLOADS(min, half, 1) #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) @@ -1725,7 +1752,7 @@ int16_t3 min(int16_t3, int16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int16_t4 min(int16_t4, int16_t4); -GEN_VEC_SCALAR_OVERLOADS(min, int16_t, 1) +_HLSL_VEC_SCALAR_OVERLOADS(min, int16_t, 1) _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) @@ -1739,7 +1766,7 @@ uint16_t3 min(uint16_t3, uint16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint16_t4 min(uint16_t4, uint16_t4); -GEN_VEC_SCALAR_OVERLOADS(min, uint16_t, 1) +_HLSL_VEC_SCALAR_OVERLOADS(min, uint16_t, 1) #endif _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) @@ -1750,7 +1777,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int3 min(int3, int3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int4 min(int4, int4); -GEN_VEC_SCALAR_OVERLOADS(min, int, 0) +_HLSL_VEC_SCALAR_OVERLOADS(min, int, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint min(uint, uint); @@ -1760,7 +1787,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint3 min(uint3, uint3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint4 min(uint4, uint4); -GEN_VEC_SCALAR_OVERLOADS(min, uint, 0) +_HLSL_VEC_SCALAR_OVERLOADS(min, uint, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) float min(float, float); @@ -1770,7 +1797,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) float3 min(float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) float4 min(float4, float4); -GEN_VEC_SCALAR_OVERLOADS(min, float, 0) +_HLSL_VEC_SCALAR_OVERLOADS(min, float, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int64_t min(int64_t, int64_t); @@ -1780,7 +1807,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int64_t3 min(int64_t3, int64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int64_t4 min(int64_t4, int64_t4); -GEN_VEC_SCALAR_OVERLOADS(min, int64_t, 0) +_HLSL_VEC_SCALAR_OVERLOADS(min, int64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint64_t min(uint64_t, uint64_t); @@ -1790,7 +1817,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint64_t3 min(uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint64_t4 min(uint64_t4, uint64_t4); -GEN_VEC_SCALAR_OVERLOADS(min, uint64_t, 0) +_HLSL_VEC_SCALAR_OVERLOADS(min, uint64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) double min(double, double); @@ -1800,7 +1827,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) double3 min(double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) double4 min(double4, double4); -GEN_VEC_SCALAR_OVERLOADS(min, double, 0) +_HLSL_VEC_SCALAR_OVERLOADS(min, double, 0) //===----------------------------------------------------------------------===// // normalize builtins diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl index d01c2a45c43c8..51454d6479708 100644 --- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl @@ -28,6 +28,9 @@ int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); // NATIVE_HALF: define [[FNATTRS]] <4 x i16> @_Z17test_clamp_short4 // NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].sclamp.v4i16 int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define [[FNATTRS]] <4 x i16> {{.*}}test_clamp_short4_mismatch +// NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].sclamp.v4i16 +int16_t4 test_clamp_short4_mismatch(int16_t4 p0, int16_t p1) { return clamp(p0, p0,p1); } // NATIVE_HALF: define [[FNATTRS]] i16 @_Z17test_clamp_ushort // NATIVE_HALF: call i16 @llvm.[[TARGET]].uclamp.i16( @@ -41,6 +44,9 @@ uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p // NATIVE_HALF: define [[FNATTRS]] <4 x i16> @_Z18test_clamp_ushort4 // NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].uclamp.v4i16 uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define [[FNATTRS]] <4 x i16> {{.*}}test_clamp_ushort4_mismatch +// NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].uclamp.v4i16 +uint16_t4 test_clamp_ushort4_mismatch(uint16_t4 p0, uint16_t p1) { return clamp(p0, p0,p1); } #endif // CHECK: define [[FNATTRS]] i32 @_Z14test_clamp_int @@ -55,6 +61,9 @@ int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] <4 x i32> @_Z15test_clamp_int4 // CHECK: call <4 x i32> @llvm.[[TARGET]].sclamp.v4i32 int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); } +// CHECK: define [[FNATTRS]] <4 x i32> {{.*}}test_clamp_int4_mismatch +// CHECK: call <4 x i32> @llvm.[[TARGET]].sclamp.v4i32 +int4 test_clamp_int4_mismatch(int4 p0, int p1) { return clamp(p0, p0,p1); } // CHECK: define [[FNATTRS]] i32 @_Z15test_clamp_uint // CHECK: call i32 @llvm.[[TARGET]].uclamp.i32( @@ -68,6 +77,9 @@ uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] <4 x i32> @_Z16test_clamp_uint4 // CHECK: call <4 x i32> @llvm.[[TARGET]].uclamp.v4i32 uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); } +// CHECK: define [[FNATTRS]] <4 x i32> {{.*}}test_clamp_uint4_mismatch +// CHECK: call <4 x i32> @llvm.[[TARGET]].uclamp.v4i32 +uint4 test_clamp_uint4_mismatch(uint4 p0, uint p1) { return clamp(p0, p0,p1); } // CHECK: define [[FNATTRS]] i64 @_Z15test_clamp_long // CHECK: call i64 @llvm.[[TARGET]].sclamp.i64( @@ -81,6 +93,9 @@ int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] <4 x i64> @_Z16test_clamp_long4 // CHECK: call <4 x i64> @llvm.[[TARGET]].sclamp.v4i64 int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); } +// CHECK: define [[FNATTRS]] <4 x i64> {{.*}}test_clamp_long4_mismatch +// CHECK: call <4 x i64> @llvm.[[TARGET]].sclamp.v4i64 +int64_t4 test_clamp_long4_mismatch(int64_t4 p0, int64_t4 p1) { return clamp(p0, p0,p1); } // CHECK: define [[FNATTRS]] i64 @_Z16test_clamp_ulong // CHECK: call i64 @llvm.[[TARGET]].uclamp.i64( @@ -94,6 +109,9 @@ uint64_t3 test_clamp_ulong3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1 // CHECK: define [[FNATTRS]] <4 x i64> @_Z17test_clamp_ulong4 // CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64 uint64_t4 test_clamp_ulong4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); } +// CHECK: define [[FNATTRS]] <4 x i64> {{.*}}test_clamp_ulong4_mismatch +// CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64 +uint64_t4 test_clamp_ulong4_mismatch(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p0,p1); } // NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] half @_Z15test_clamp_half // NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].nclamp.f16( @@ -115,6 +133,11 @@ half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); } // NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x float> @_Z16test_clamp_half4 // NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32( half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); } +// NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x half> {{.*}}test_clamp_half4_mismatch +// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.[[TARGET]].nclamp.v4f16 +// NO_HALF: define [[FNATTRS]] [[FFNATTRS]] <4 x float> {{.*}}test_clamp_half4_mismatch +// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32( +half4 test_clamp_half4_mismatch(half4 p0, half p1) { return clamp(p0, p0,p1); } // CHECK: define [[FNATTRS]] [[FFNATTRS]] float @_Z16test_clamp_float // CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].nclamp.f32( @@ -128,6 +151,9 @@ float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x float> @_Z17test_clamp_float4 // CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32 float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); } +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x float> {{.*}}test_clamp_float4_mismatch +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.[[TARGET]].nclamp.v4f32 +float4 test_clamp_float4_mismatch(float4 p0, float p1) { return clamp(p0, p0,p1); } // CHECK: define [[FNATTRS]] [[FFNATTRS]] double @_Z17test_clamp_double // CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.[[TARGET]].nclamp.f64( @@ -141,3 +167,9 @@ double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> @_Z18test_clamp_double4 // CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64 double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); } +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> {{.*}}test_clamp_double4_mismatch +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64 +double4 test_clamp_double4_mismatch(double4 p0, double p1) { return clamp(p0, p0,p1); } +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> {{.*}}test_clamp_double4_mismatch2 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64 +double4 test_clamp_double4_mismatch2(double4 p0, double p1) { return clamp(p0, p1,p0); } diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl index 036f04cdac0b5..a1850d47b105d 100644 --- a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl @@ -22,7 +22,7 @@ float2 test_clamp_no_second_arg(float2 p0) { float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) { return clamp(p0, p0, p1); - // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>' (vector of 2 'float' values)}} + // expected-error@-1 {{call to 'clamp' is ambiguous}} } float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) { >From 44a41ccbcaf7c4874c44802c1cceb5df12ecab26 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Wed, 5 Mar 2025 13:21:48 -0800 Subject: [PATCH 02/12] make clang format happy --- .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 63 ++++++++++--------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 35246e222387e..af306c9e6428e 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -35,35 +35,40 @@ namespace hlsl { #define _HLSL_16BIT_AVAILABILITY_STAGE(environment, version, stage) #endif -#define _HLSL_CAT(a,b) a##b -#define _HLSL_VEC_SCALAR_OVERLOADS(NAME, BASE_T, AVAIL) \ - _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, _HLSL_CAT(_HLSL_NUM_ARGS_,NAME)) - -#define _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, NUM_ARGS) \ - _HLSL_CAT(_HLSL_BOTH_OVERLOADS_,NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T,2), AVAIL) \ - _HLSL_CAT(_HLSL_BOTH_OVERLOADS_,NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T,3), AVAIL) \ - _HLSL_CAT(_HLSL_BOTH_OVERLOADS_,NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T,4), AVAIL) - -#define _HLSL_BOTH_OVERLOADS_2(NAME, BASE_T, VECTOR_T, AVAIL) \ - _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)( \ - _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T \ - NAME(VECTOR_T p0, BASE_T p1) { \ - return _HLSL_CAT(__builtin_elementwise_,NAME)(p0, (VECTOR_T)p1); \ +#define _HLSL_CAT(a, b) a##b +#define _HLSL_VEC_SCALAR_OVERLOADS(NAME, BASE_T, AVAIL) \ + _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, _HLSL_CAT(_HLSL_NUM_ARGS_, NAME)) + +#define _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, NUM_ARGS) \ + _HLSL_CAT(_HLSL_BOTH_OVERLOADS_, NUM_ARGS) \ + (NAME, BASE_T, _HLSL_CAT(BASE_T, 2), AVAIL) \ + _HLSL_CAT(_HLSL_BOTH_OVERLOADS_, NUM_ARGS)(NAME, BASE_T, \ + _HLSL_CAT(BASE_T, 3), AVAIL) \ + _HLSL_CAT(_HLSL_BOTH_OVERLOADS_, \ + NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T, 4), AVAIL) + +#define _HLSL_BOTH_OVERLOADS_2(NAME, BASE_T, VECTOR_T, AVAIL) \ + _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ + (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ + VECTOR_T p0, BASE_T p1) { \ + return _HLSL_CAT(__builtin_elementwise_, NAME)(p0, (VECTOR_T)p1); \ } \ - _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)( \ - _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T \ - NAME(BASE_T p0, VECTOR_T p1) { \ - return _HLSL_CAT(__builtin_elementwise_,NAME)((VECTOR_T)p0, p1); \ + _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ + (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ + BASE_T p0, VECTOR_T p1) { \ + return _HLSL_CAT(__builtin_elementwise_, NAME)((VECTOR_T)p0, p1); \ } -#define _HLSL_BOTH_OVERLOADS_3(NAME, BASE_T, VECTOR_T, AVAIL) \ - _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)(_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) \ - constexpr VECTOR_T NAME(VECTOR_T p0, VECTOR_T p1, BASE_T p2) { \ - return _HLSL_CAT(__builtin_hlsl_elementwise_,NAME)(p0, p1, (VECTOR_T)p2); \ - } \ - _HLSL_CAT(_HLSL_IF_TRUE_,AVAIL)(_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) \ - constexpr VECTOR_T NAME(VECTOR_T p0, BASE_T p1, VECTOR_T p2) { \ - return _HLSL_CAT(__builtin_hlsl_elementwise_,NAME)(p0, (VECTOR_T)p1, p2); \ +#define _HLSL_BOTH_OVERLOADS_3(NAME, BASE_T, VECTOR_T, AVAIL) \ + _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ + (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ + VECTOR_T p0, VECTOR_T p1, BASE_T p2) { \ + return _HLSL_CAT(__builtin_hlsl_elementwise_, NAME)(p0, p1, (VECTOR_T)p2); \ + } \ + _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ + (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ + VECTOR_T p0, BASE_T p1, VECTOR_T p2) { \ + return _HLSL_CAT(__builtin_hlsl_elementwise_, NAME)(p0, (VECTOR_T)p1, p2); \ } #define _HLSL_IF_TRUE_0(EXPR) @@ -72,7 +77,7 @@ namespace hlsl { #define _HLSL_NUM_ARGS_min 2 #define _HLSL_NUM_ARGS_max 2 #define _HLSL_NUM_ARGS_clamp 3 - + //===----------------------------------------------------------------------===// // abs builtins //===----------------------------------------------------------------------===// @@ -622,7 +627,7 @@ _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) half4 clamp(half4, half4, half4); _HLSL_VEC_SCALAR_OVERLOADS(clamp, half, 1) - + #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) @@ -637,7 +642,7 @@ _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int16_t4 clamp(int16_t4, int16_t4, int16_t4); _HLSL_VEC_SCALAR_OVERLOADS(clamp, int16_t, 1) - + _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint16_t clamp(uint16_t, uint16_t, uint16_t); >From ac260e6d3a04531bf863ac90b6901762e440ea8c Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Thu, 6 Mar 2025 14:09:52 -0800 Subject: [PATCH 03/12] implement clamp overloads with templates instead of macros --- clang/lib/Headers/CMakeLists.txt | 1 + .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 100 ++++++------------ .../lib/Headers/hlsl/hlsl_compat_overloads.h | 45 ++++++++ clang/lib/Sema/SemaHLSL.cpp | 10 +- 4 files changed, 90 insertions(+), 66 deletions(-) create mode 100644 clang/lib/Headers/hlsl/hlsl_compat_overloads.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index d26de236998ca..acf49e40c447e 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -87,6 +87,7 @@ set(hlsl_h set(hlsl_subdir_files hlsl/hlsl_basic_types.h hlsl/hlsl_alias_intrinsics.h + hlsl/hlsl_compat_overloads.h hlsl/hlsl_intrinsic_helpers.h hlsl/hlsl_intrinsics.h hlsl/hlsl_detail.h diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index af306c9e6428e..a6b30c74c6ae9 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -9,6 +9,8 @@ #ifndef _HLSL_HLSL_ALIAS_INTRINSICS_H_ #define _HLSL_HLSL_ALIAS_INTRINSICS_H_ +#include "hlsl_compat_overloads.h" + namespace hlsl { // Note: Functions in this file are sorted alphabetically, then grouped by base @@ -35,48 +37,25 @@ namespace hlsl { #define _HLSL_16BIT_AVAILABILITY_STAGE(environment, version, stage) #endif -#define _HLSL_CAT(a, b) a##b -#define _HLSL_VEC_SCALAR_OVERLOADS(NAME, BASE_T, AVAIL) \ - _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, _HLSL_CAT(_HLSL_NUM_ARGS_, NAME)) - -#define _HLSL_ALL_OVERLOADS(NAME, BASE_T, AVAIL, NUM_ARGS) \ - _HLSL_CAT(_HLSL_BOTH_OVERLOADS_, NUM_ARGS) \ - (NAME, BASE_T, _HLSL_CAT(BASE_T, 2), AVAIL) \ - _HLSL_CAT(_HLSL_BOTH_OVERLOADS_, NUM_ARGS)(NAME, BASE_T, \ - _HLSL_CAT(BASE_T, 3), AVAIL) \ - _HLSL_CAT(_HLSL_BOTH_OVERLOADS_, \ - NUM_ARGS)(NAME, BASE_T, _HLSL_CAT(BASE_T, 4), AVAIL) - -#define _HLSL_BOTH_OVERLOADS_2(NAME, BASE_T, VECTOR_T, AVAIL) \ - _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ - (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ - VECTOR_T p0, BASE_T p1) { \ - return _HLSL_CAT(__builtin_elementwise_, NAME)(p0, (VECTOR_T)p1); \ - } \ - _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ - (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ - BASE_T p0, VECTOR_T p1) { \ - return _HLSL_CAT(__builtin_elementwise_, NAME)((VECTOR_T)p0, p1); \ - } +#define GEN_VEC_SCALAR_OVERLOADS(FUNC_NAME, BASE_TYPE, AVAIL) \ + GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##2, AVAIL) \ + GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##3, AVAIL) \ + GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, BASE_TYPE##4, AVAIL) -#define _HLSL_BOTH_OVERLOADS_3(NAME, BASE_T, VECTOR_T, AVAIL) \ - _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ - (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ - VECTOR_T p0, VECTOR_T p1, BASE_T p2) { \ - return _HLSL_CAT(__builtin_hlsl_elementwise_, NAME)(p0, p1, (VECTOR_T)p2); \ +#define GEN_BOTH_OVERLOADS(FUNC_NAME, BASE_TYPE, VECTOR_TYPE, AVAIL) \ + IF_TRUE_##AVAIL( \ + _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_TYPE \ + FUNC_NAME(VECTOR_TYPE p0, BASE_TYPE p1) { \ + return __builtin_elementwise_##FUNC_NAME(p0, (VECTOR_TYPE)p1); \ } \ - _HLSL_CAT(_HLSL_IF_TRUE_, AVAIL) \ - (_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_T NAME( \ - VECTOR_T p0, BASE_T p1, VECTOR_T p2) { \ - return _HLSL_CAT(__builtin_hlsl_elementwise_, NAME)(p0, (VECTOR_T)p1, p2); \ + IF_TRUE_##AVAIL( \ + _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)) constexpr VECTOR_TYPE \ + FUNC_NAME(BASE_TYPE p0, VECTOR_TYPE p1) { \ + return __builtin_elementwise_##FUNC_NAME((VECTOR_TYPE)p0, p1); \ } -#define _HLSL_IF_TRUE_0(EXPR) -#define _HLSL_IF_TRUE_1(EXPR) EXPR - -#define _HLSL_NUM_ARGS_min 2 -#define _HLSL_NUM_ARGS_max 2 -#define _HLSL_NUM_ARGS_clamp 3 +#define IF_TRUE_0(EXPR) +#define IF_TRUE_1(EXPR) EXPR //===----------------------------------------------------------------------===// // abs builtins @@ -626,7 +605,6 @@ half3 clamp(half3, half3, half3); _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) half4 clamp(half4, half4, half4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, half, 1) #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) @@ -641,7 +619,6 @@ int16_t3 clamp(int16_t3, int16_t3, int16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int16_t4 clamp(int16_t4, int16_t4, int16_t4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, int16_t, 1) _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) @@ -655,7 +632,6 @@ uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, uint16_t, 1) #endif _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) @@ -666,7 +642,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int3 clamp(int3, int3, int3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int4 clamp(int4, int4, int4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, int, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint clamp(uint, uint, uint); @@ -676,7 +651,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint3 clamp(uint3, uint3, uint3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint4 clamp(uint4, uint4, uint4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, uint, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int64_t clamp(int64_t, int64_t, int64_t); @@ -686,7 +660,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int64_t3 clamp(int64_t3, int64_t3, int64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) int64_t4 clamp(int64_t4, int64_t4, int64_t4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, int64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint64_t clamp(uint64_t, uint64_t, uint64_t); @@ -696,7 +669,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, uint64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) float clamp(float, float, float); @@ -706,7 +678,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) float3 clamp(float3, float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) float4 clamp(float4, float4, float4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, float, 0) _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double clamp(double, double, double); @@ -716,7 +687,6 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double3 clamp(double3, double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double4 clamp(double4, double4, double4); -_HLSL_VEC_SCALAR_OVERLOADS(clamp, double, 0) //===----------------------------------------------------------------------===// // clip builtins @@ -1629,7 +1599,7 @@ half3 max(half3, half3); _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) half4 max(half4, half4); -_HLSL_VEC_SCALAR_OVERLOADS(max, half, 1) +GEN_VEC_SCALAR_OVERLOADS(max, half, 1) #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) @@ -1644,7 +1614,7 @@ int16_t3 max(int16_t3, int16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int16_t4 max(int16_t4, int16_t4); -_HLSL_VEC_SCALAR_OVERLOADS(max, int16_t, 1) +GEN_VEC_SCALAR_OVERLOADS(max, int16_t, 1) _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) @@ -1658,7 +1628,7 @@ uint16_t3 max(uint16_t3, uint16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint16_t4 max(uint16_t4, uint16_t4); -_HLSL_VEC_SCALAR_OVERLOADS(max, uint16_t, 1) +GEN_VEC_SCALAR_OVERLOADS(max, uint16_t, 1) #endif _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) @@ -1669,7 +1639,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int3 max(int3, int3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int4 max(int4, int4); -_HLSL_VEC_SCALAR_OVERLOADS(max, int, 0) +GEN_VEC_SCALAR_OVERLOADS(max, int, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint max(uint, uint); @@ -1679,7 +1649,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint3 max(uint3, uint3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint4 max(uint4, uint4); -_HLSL_VEC_SCALAR_OVERLOADS(max, uint, 0) +GEN_VEC_SCALAR_OVERLOADS(max, uint, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int64_t max(int64_t, int64_t); @@ -1689,7 +1659,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int64_t3 max(int64_t3, int64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) int64_t4 max(int64_t4, int64_t4); -_HLSL_VEC_SCALAR_OVERLOADS(max, int64_t, 0) +GEN_VEC_SCALAR_OVERLOADS(max, int64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint64_t max(uint64_t, uint64_t); @@ -1699,7 +1669,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint64_t3 max(uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) uint64_t4 max(uint64_t4, uint64_t4); -_HLSL_VEC_SCALAR_OVERLOADS(max, uint64_t, 0) +GEN_VEC_SCALAR_OVERLOADS(max, uint64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) float max(float, float); @@ -1709,7 +1679,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) float3 max(float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) float4 max(float4, float4); -_HLSL_VEC_SCALAR_OVERLOADS(max, float, 0) +GEN_VEC_SCALAR_OVERLOADS(max, float, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) double max(double, double); @@ -1719,7 +1689,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) double3 max(double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_max) double4 max(double4, double4); -_HLSL_VEC_SCALAR_OVERLOADS(max, double, 0) +GEN_VEC_SCALAR_OVERLOADS(max, double, 0) //===----------------------------------------------------------------------===// // min builtins @@ -1742,7 +1712,7 @@ half3 min(half3, half3); _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) half4 min(half4, half4); -_HLSL_VEC_SCALAR_OVERLOADS(min, half, 1) +GEN_VEC_SCALAR_OVERLOADS(min, half, 1) #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) @@ -1757,7 +1727,7 @@ int16_t3 min(int16_t3, int16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int16_t4 min(int16_t4, int16_t4); -_HLSL_VEC_SCALAR_OVERLOADS(min, int16_t, 1) +GEN_VEC_SCALAR_OVERLOADS(min, int16_t, 1) _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) @@ -1771,7 +1741,7 @@ uint16_t3 min(uint16_t3, uint16_t3); _HLSL_AVAILABILITY(shadermodel, 6.2) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint16_t4 min(uint16_t4, uint16_t4); -_HLSL_VEC_SCALAR_OVERLOADS(min, uint16_t, 1) +GEN_VEC_SCALAR_OVERLOADS(min, uint16_t, 1) #endif _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) @@ -1782,7 +1752,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int3 min(int3, int3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int4 min(int4, int4); -_HLSL_VEC_SCALAR_OVERLOADS(min, int, 0) +GEN_VEC_SCALAR_OVERLOADS(min, int, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint min(uint, uint); @@ -1792,7 +1762,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint3 min(uint3, uint3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint4 min(uint4, uint4); -_HLSL_VEC_SCALAR_OVERLOADS(min, uint, 0) +GEN_VEC_SCALAR_OVERLOADS(min, uint, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) float min(float, float); @@ -1802,7 +1772,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) float3 min(float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) float4 min(float4, float4); -_HLSL_VEC_SCALAR_OVERLOADS(min, float, 0) +GEN_VEC_SCALAR_OVERLOADS(min, float, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int64_t min(int64_t, int64_t); @@ -1812,7 +1782,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int64_t3 min(int64_t3, int64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) int64_t4 min(int64_t4, int64_t4); -_HLSL_VEC_SCALAR_OVERLOADS(min, int64_t, 0) +GEN_VEC_SCALAR_OVERLOADS(min, int64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint64_t min(uint64_t, uint64_t); @@ -1822,7 +1792,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint64_t3 min(uint64_t3, uint64_t3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) uint64_t4 min(uint64_t4, uint64_t4); -_HLSL_VEC_SCALAR_OVERLOADS(min, uint64_t, 0) +GEN_VEC_SCALAR_OVERLOADS(min, uint64_t, 0) _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) double min(double, double); @@ -1832,7 +1802,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) double3 min(double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_min) double4 min(double4, double4); -_HLSL_VEC_SCALAR_OVERLOADS(min, double, 0) +GEN_VEC_SCALAR_OVERLOADS(min, double, 0) //===----------------------------------------------------------------------===// // normalize builtins diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h new file mode 100644 index 0000000000000..a6b2b1ea667ac --- /dev/null +++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h @@ -0,0 +1,45 @@ +//===--- hlsl_compat_overloads.h - Additional HLSL overload definitions for intrinsics --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _HLSL_COMPAT_OVERLOADS_H_ +#define _HLSl_COMPAT_OVERLOADS_H_ + +namespace hlsl { + +// Note: Functions in this file are sorted alphabetically, then grouped by base +// element type, and the element types are sorted by size, then singed integer, +// unsigned integer and floating point. Keeping this ordering consistent will +// help keep this file manageable as it grows. + +//===----------------------------------------------------------------------===// +// clamp builtins overloads +//===----------------------------------------------------------------------===// + +template<typename T, typename R, typename U, uint N> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T,N>> clamp(vector<T,N> p0, vector<R,N> p1, U p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +} +template<typename T, typename R, typename U, uint N> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T,N>> clamp(vector<T,N> p0, U p1, vector<R,N> p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +} +template<typename T, typename U, typename V, uint N> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && __detail::is_arithmetic<V>::Value, vector<T,N>> clamp(vector<T,N> p0, U p1, V p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +} +template<typename T, typename R, typename S, uint N> +constexpr vector<T,N> clamp(vector<T,N> p0, vector<R,N> p1, vector<S,N> p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +} +template<typename U, typename V, typename W> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && __detail::is_arithmetic<V>::Value && __detail::is_arithmetic<W>::Value, U> clamp(U p0, V p1, W p2) { + return __builtin_hlsl_elementwise_clamp(p0, (U) p1, (U) p2); +} + +} // namespace hlsl +#endif // _HLSL_COMPAT_OVERLOADS_H_ diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 2e6a333f3d768..f26f22dc87741 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2392,8 +2392,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case Builtin::BI__builtin_hlsl_elementwise_clamp: { if (SemaRef.checkArgCount(TheCall, 3)) return true; - if (CheckVectorElementCallArgs(&SemaRef, TheCall)) + if (CheckAnyScalarOrVector(&SemaRef, TheCall, 0) || + !(SemaRef.Context.hasSameUnqualifiedType(TheCall->getArg(0)->getType(), + TheCall->getArg(1)->getType()) && + SemaRef.Context.hasSameUnqualifiedType(TheCall->getArg(0)->getType(), + TheCall->getArg(2)->getType()))) { + SemaRef.Diag(TheCall->getBeginLoc(), diag::err_typecheck_call_different_arg_types) + << TheCall->getArg(0)->getType() << TheCall->getArg(1)->getType() + << TheCall->getBeginLoc() << TheCall->getBeginLoc(); return true; + } if (SemaRef.BuiltinElementwiseTernaryMath( TheCall, /*CheckForFloatArgs*/ TheCall->getArg(0)->getType()->hasFloatingRepresentation())) >From 3311cbd2b0a7dc6f766f1923e5f561cf5cc67806 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Mon, 10 Mar 2025 11:49:26 -0700 Subject: [PATCH 04/12] update semantic checking for __builtin_hlsl_elementwise_clamp; update test with new error message --- clang/lib/Sema/SemaHLSL.cpp | 23 ++++++++++----- .../test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 29 ++++++++++--------- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index f26f22dc87741..c0ed8f84d61ca 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2041,6 +2041,20 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { return false; } +static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) { + QualType ArgTy0 = TheCall->getArg(0)->getType(); + + for (unsigned I = 1, N = TheCall->getNumArgs(); I < N; ++I) { + if (!S->getASTContext().hasSameUnqualifiedType(ArgTy0, TheCall->getArg(I)->getType())) { + S->Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_incompatible_vector) + << TheCall->getDirectCallee() << /*useAllTerminology*/ true + << SourceRange(TheCall->getArg(0)->getBeginLoc(), TheCall->getArg(N-1)->getEndLoc()); + return true; + } + } + return false; +} + static bool CheckArgTypeMatches(Sema *S, Expr *Arg, QualType ExpectedType) { QualType ArgType = Arg->getType(); if (!S->getASTContext().hasSameUnqualifiedType(ArgType, ExpectedType)) { @@ -2393,15 +2407,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (SemaRef.checkArgCount(TheCall, 3)) return true; if (CheckAnyScalarOrVector(&SemaRef, TheCall, 0) || - !(SemaRef.Context.hasSameUnqualifiedType(TheCall->getArg(0)->getType(), - TheCall->getArg(1)->getType()) && - SemaRef.Context.hasSameUnqualifiedType(TheCall->getArg(0)->getType(), - TheCall->getArg(2)->getType()))) { - SemaRef.Diag(TheCall->getBeginLoc(), diag::err_typecheck_call_different_arg_types) - << TheCall->getArg(0)->getType() << TheCall->getArg(1)->getType() - << TheCall->getBeginLoc() << TheCall->getBeginLoc(); + CheckAllArgsHaveSameType(&SemaRef, TheCall)) return true; - } if (SemaRef.BuiltinElementwiseTernaryMath( TheCall, /*CheckForFloatArgs*/ TheCall->getArg(0)->getType()->hasFloatingRepresentation())) diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl index a1850d47b105d..cf53a1126dbaf 100644 --- a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected=note float2 test_no_second_arg(float2 p0) { return __builtin_hlsl_elementwise_clamp(p0); @@ -22,7 +22,8 @@ float2 test_clamp_no_second_arg(float2 p0) { float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) { return clamp(p0, p0, p1); - // expected-error@-1 {{call to 'clamp' is ambiguous}} + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>' (vector of 2 'float' values)}} + // expected-warning@-2 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>' (vector of 2 'float' values)}} } float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) { @@ -30,44 +31,46 @@ float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) { // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } +// allowed by the overloads in hlsl_compat_overloads.h +// support for this overload might be removed in a future version of hlsl float test_clamp_scalar_mismatch(float p0, half p1) { return clamp(p1, p0, p1); - // expected-error@-1 {{call to 'clamp' is ambiguous}} } +// allowed by the overloads in hlsl_compat_overloads.h +// support for this overload might be removed in a future version of hlsl float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) { return clamp(p1, p0, p1); - // expected-error@-1 {{call to 'clamp' is ambiguous}} } float2 test_builtin_clamp_float2_splat(float p0, float2 p1) { return __builtin_hlsl_elementwise_clamp(p0, p1, p1); - // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float3 test_builtin_clamp_float3_splat(float p0, float3 p1) { return __builtin_hlsl_elementwise_clamp(p0, p1, p1); - // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float4 test_builtin_clamp_float4_splat(float p0, float4 p1) { return __builtin_hlsl_elementwise_clamp(p0, p1, p1); - // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float2 test_clamp_float2_int_splat(float2 p0, int p1) { return __builtin_hlsl_elementwise_clamp(p0, p1, p1); - // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float3 test_clamp_float3_int_splat(float3 p0, int p1) { return __builtin_hlsl_elementwise_clamp(p0, p1, p1); - // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float2 test_builtin_clamp_int_vect_to_float_vec_promotion(int2 p0, float p1) { return __builtin_hlsl_elementwise_clamp(p0, p1, p1); - // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float test_builtin_clamp_bool_type_promotion(bool p0) { @@ -77,15 +80,15 @@ float test_builtin_clamp_bool_type_promotion(bool p0) { float builtin_bool_to_float_type_promotion(float p0, bool p1) { return __builtin_hlsl_elementwise_clamp(p0, p0, p1); - // expected-error@-1 {{3rd argument must be a floating point type (was 'bool')}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float builtin_bool_to_float_type_promotion2(bool p0, float p1) { return __builtin_hlsl_elementwise_clamp(p1, p0, p1); - // expected-error@-1 {{2nd argument must be a floating point type (was 'bool')}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } float builtin_clamp_int_to_float_promotion(float p0, int p1) { return __builtin_hlsl_elementwise_clamp(p0, p0, p1); - // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}} + // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}} } >From fec28cf6856b542e8f20e300794fae01840ed320 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Mon, 10 Mar 2025 11:54:59 -0700 Subject: [PATCH 05/12] make clang format happy --- .../lib/Headers/hlsl/hlsl_compat_overloads.h | 49 ++++++++++++------- clang/lib/Sema/SemaHLSL.cpp | 10 ++-- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h index a6b2b1ea667ac..2e769ec02e1df 100644 --- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h +++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h @@ -1,4 +1,5 @@ -//===--- hlsl_compat_overloads.h - Additional HLSL overload definitions for intrinsics --===// +//===--- hlsl_compat_overloads.h - Additional HLSL overload definitions for +//intrinsics --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -20,26 +21,40 @@ namespace hlsl { // clamp builtins overloads //===----------------------------------------------------------------------===// -template<typename T, typename R, typename U, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T,N>> clamp(vector<T,N> p0, vector<R,N> p1, U p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +template <typename T, typename R, typename U, uint N> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T, N>> +clamp(vector<T, N> p0, vector<R, N> p1, U p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, + (vector<T, N>)p2); } -template<typename T, typename R, typename U, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T,N>> clamp(vector<T,N> p0, U p1, vector<R,N> p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +template <typename T, typename R, typename U, uint N> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T, N>> +clamp(vector<T, N> p0, U p1, vector<R, N> p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, + (vector<T, N>)p2); } -template<typename T, typename U, typename V, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && __detail::is_arithmetic<V>::Value, vector<T,N>> clamp(vector<T,N> p0, U p1, V p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +template <typename T, typename U, typename V, uint N> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && + __detail::is_arithmetic<V>::Value, + vector<T, N>> +clamp(vector<T, N> p0, U p1, V p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, + (vector<T, N>)p2); } -template<typename T, typename R, typename S, uint N> -constexpr vector<T,N> clamp(vector<T,N> p0, vector<R,N> p1, vector<S,N> p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T,N>)p1, (vector<T,N>)p2); +template <typename T, typename R, typename S, uint N> +constexpr vector<T, N> clamp(vector<T, N> p0, vector<R, N> p1, + vector<S, N> p2) { + return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, + (vector<T, N>)p2); } -template<typename U, typename V, typename W> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && __detail::is_arithmetic<V>::Value && __detail::is_arithmetic<W>::Value, U> clamp(U p0, V p1, W p2) { - return __builtin_hlsl_elementwise_clamp(p0, (U) p1, (U) p2); +template <typename U, typename V, typename W> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && + __detail::is_arithmetic<V>::Value && + __detail::is_arithmetic<W>::Value, + U> +clamp(U p0, V p1, W p2) { + return __builtin_hlsl_elementwise_clamp(p0, (U)p1, (U)p2); } - + } // namespace hlsl #endif // _HLSL_COMPAT_OVERLOADS_H_ diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index c0ed8f84d61ca..564eed7bd2989 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2045,10 +2045,12 @@ static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) { QualType ArgTy0 = TheCall->getArg(0)->getType(); for (unsigned I = 1, N = TheCall->getNumArgs(); I < N; ++I) { - if (!S->getASTContext().hasSameUnqualifiedType(ArgTy0, TheCall->getArg(I)->getType())) { + if (!S->getASTContext().hasSameUnqualifiedType( + ArgTy0, TheCall->getArg(I)->getType())) { S->Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_incompatible_vector) - << TheCall->getDirectCallee() << /*useAllTerminology*/ true - << SourceRange(TheCall->getArg(0)->getBeginLoc(), TheCall->getArg(N-1)->getEndLoc()); + << TheCall->getDirectCallee() << /*useAllTerminology*/ true + << SourceRange(TheCall->getArg(0)->getBeginLoc(), + TheCall->getArg(N - 1)->getEndLoc()); return true; } } @@ -2407,7 +2409,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (SemaRef.checkArgCount(TheCall, 3)) return true; if (CheckAnyScalarOrVector(&SemaRef, TheCall, 0) || - CheckAllArgsHaveSameType(&SemaRef, TheCall)) + CheckAllArgsHaveSameType(&SemaRef, TheCall)) return true; if (SemaRef.BuiltinElementwiseTernaryMath( TheCall, /*CheckForFloatArgs*/ >From 101beb2f016131d669ea676359c7f57c33e0d917 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Mon, 10 Mar 2025 12:23:25 -0700 Subject: [PATCH 06/12] make clang format happy again --- clang/lib/Headers/hlsl/hlsl_compat_overloads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h index 2e769ec02e1df..673180f7a5802 100644 --- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h +++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h @@ -1,5 +1,5 @@ //===--- hlsl_compat_overloads.h - Additional HLSL overload definitions for -//intrinsics --===// +// intrinsics --===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. >From 69dab196e89f908972229a5f440ca6cc088b7eeb Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Tue, 11 Mar 2025 12:42:13 -0700 Subject: [PATCH 07/12] self review + respond to pr comments --- clang/lib/Headers/hlsl.h | 1 + .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 2 -- clang/lib/Sema/SemaHLSL.cpp | 1 + clang/test/CodeGenHLSL/builtins/clamp.hlsl | 20 +++++++++++++++++++ .../test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 20 +++++++++++++++++++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/hlsl.h b/clang/lib/Headers/hlsl.h index 4be68eb84a34a..e4c43ab5ee539 100644 --- a/clang/lib/Headers/hlsl.h +++ b/clang/lib/Headers/hlsl.h @@ -23,6 +23,7 @@ // HLSL standard library function declarations/definitions. #include "hlsl/hlsl_alias_intrinsics.h" #include "hlsl/hlsl_intrinsics.h" +#include "hlsl/hlsl_compat_overloads.h" #if defined(__clang__) #pragma clang diagnostic pop diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index a6b30c74c6ae9..89dfeb475488e 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -9,8 +9,6 @@ #ifndef _HLSL_HLSL_ALIAS_INTRINSICS_H_ #define _HLSL_HLSL_ALIAS_INTRINSICS_H_ -#include "hlsl_compat_overloads.h" - namespace hlsl { // Note: Functions in this file are sorted alphabetically, then grouped by base diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 564eed7bd2989..b55b8f9e43efa 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2042,6 +2042,7 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { } static bool CheckAllArgsHaveSameType(Sema *S, CallExpr *TheCall) { + assert(TheCall->getNumArgs() > 1); QualType ArgTy0 = TheCall->getArg(0)->getType(); for (unsigned I = 1, N = TheCall->getNumArgs(); I < N; ++I) { diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl index 51454d6479708..a3483898a0052 100644 --- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl @@ -173,3 +173,23 @@ double4 test_clamp_double4_mismatch(double4 p0, double p1) { return clamp(p0, p0 // CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> {{.*}}test_clamp_double4_mismatch2 // CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64 double4 test_clamp_double4_mismatch2(double4 p0, double p1) { return clamp(p0, p1,p0); } + +// CHECK: define [[FNATTRS]] <2 x i32> {{.*}}_overloads1 +// CHECK: call <2 x i32> @llvm.[[TARGET]].sclamp.v2i32 +int2 test_overloads1(int2 p0, float2 p1, uint p2) { return clamp(p0, p1, p2); } + +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x float> {{.*}}test_overloads2 +// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32 +float2 test_overloads2(float2 p0, uint p1, int2 p2) { return clamp(p0, p1, p2); } + +// CHECK: define [[FNATTRS]] <3 x i32> {{.*}}test_overloads3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].uclamp.v3i32 +uint3 test_overloads3(uint3 p0, int p1, float p2) { return clamp(p0, p1, p2); } + +// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> {{.*}}test_overloads4 +// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64 +double4 test_overloads4(double4 p0, float4 p1, int4 p2) { return clamp(p0, p1, p2); } + +// CHECK: define [[FNATTRS]] i32 {{.*}}test_overloads5 +// CHECK: call i32 @llvm.[[TARGET]].sclamp.i32( +int test_overloads5(int p0, uint p1, double p2) { return clamp(p0, p1, p2); } diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl index cf53a1126dbaf..58002047c9454 100644 --- a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl @@ -20,6 +20,26 @@ float2 test_clamp_no_second_arg(float2 p0) { // expected-error@-1 {{no matching function for call to 'clamp'}} } +float test_scalar_first_arg(float p0, float2 p1) { + return clamp(p0, p1, p1); + // expected-error@-1 {{call to 'clamp' is ambiguous}} +} + +float test_scalar_first_arg2(float p0, float2 p1) { + return clamp(p0, p0, p1); + // expected-error@-1 {{call to 'clamp' is ambiguous}} +} + +float2 test_scalar_first_arg3(float p0, float2 p1) { + return clamp(p0, p0, p1); + // expected-error@-1 {{call to 'clamp' is ambiguous}} +} + +float3 test_thing(float3 p0, float2 p1) { + return clamp(p0, p0, p1); + // expected-error@-1 {{cannot initialize return object of type 'float3' (aka 'vector<float, 3>') with an rvalue of type 'vector<float, 2>' (vector of 2 'float' values)}} +} + float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) { return clamp(p0, p0, p1); // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>' (vector of 2 'float' values)}} >From 47c90c4be48c0c54dbfa803cb15fada50061dfa5 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Tue, 11 Mar 2025 15:13:18 -0700 Subject: [PATCH 08/12] add vector size checks to clamp templates --- clang/lib/Headers/hlsl/hlsl_compat_overloads.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h index 673180f7a5802..15747b4ad0668 100644 --- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h +++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h @@ -22,27 +22,27 @@ namespace hlsl { //===----------------------------------------------------------------------===// template <typename T, typename R, typename U, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T, N>> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, vector<R, N> p1, U p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename R, typename U, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value, vector<T, N>> +constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, U p1, vector<R, N> p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename U, typename V, uint N> constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && - __detail::is_arithmetic<V>::Value, + __detail::is_arithmetic<V>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, U p1, V p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename R, typename S, uint N> -constexpr vector<T, N> clamp(vector<T, N> p0, vector<R, N> p1, +constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, vector<R, N> p1, vector<S, N> p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); >From dd0765a5d99256df3d3ee7cfacd9e9dbc0e1cdf6 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Tue, 11 Mar 2025 15:24:32 -0700 Subject: [PATCH 09/12] make clang format happy --- clang/lib/Headers/hlsl.h | 2 +- clang/lib/Headers/hlsl/hlsl_compat_overloads.h | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/lib/Headers/hlsl.h b/clang/lib/Headers/hlsl.h index e4c43ab5ee539..d233f6092ffcd 100644 --- a/clang/lib/Headers/hlsl.h +++ b/clang/lib/Headers/hlsl.h @@ -22,8 +22,8 @@ // HLSL standard library function declarations/definitions. #include "hlsl/hlsl_alias_intrinsics.h" -#include "hlsl/hlsl_intrinsics.h" #include "hlsl/hlsl_compat_overloads.h" +#include "hlsl/hlsl_intrinsics.h" #if defined(__clang__) #pragma clang diagnostic pop diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h index 15747b4ad0668..80008f108ba20 100644 --- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h +++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h @@ -22,28 +22,31 @@ namespace hlsl { //===----------------------------------------------------------------------===// template <typename T, typename R, typename U, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> +constexpr __detail::enable_if_t< + __detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, vector<R, N> p1, U p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename R, typename U, uint N> -constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> +constexpr __detail::enable_if_t< + __detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, U p1, vector<R, N> p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename U, typename V, uint N> constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && - __detail::is_arithmetic<V>::Value && (N > 1 && N <= 4), + __detail::is_arithmetic<V>::Value && + (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, U p1, V p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename R, typename S, uint N> -constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, vector<R, N> p1, - vector<S, N> p2) { +constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>> +clamp(vector<T, N> p0, vector<R, N> p1, vector<S, N> p2) { return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } >From 819cae525644f74353b72a49399880c5a4f26f94 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Tue, 11 Mar 2025 15:47:22 -0700 Subject: [PATCH 10/12] test for float5 error --- clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl index 58002047c9454..745516932442e 100644 --- a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl @@ -40,6 +40,14 @@ float3 test_thing(float3 p0, float2 p1) { // expected-error@-1 {{cannot initialize return object of type 'float3' (aka 'vector<float, 3>') with an rvalue of type 'vector<float, 2>' (vector of 2 'float' values)}} } +typedef float float5 __attribute__((ext_vector_type(5))); + +// check vectors of wrong size are rejected +float5 vec_too_big(float5 p0) { + return clamp(p0, p0, p0); + // expected-error@-1 {{call to 'clamp' is ambiguous}} +} + float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) { return clamp(p0, p0, p1); // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>' (vector of 2 'float' values)}} >From 8e01d99b985649deeb3febc56fd45c081b046d80 Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Thu, 13 Mar 2025 11:06:00 -0700 Subject: [PATCH 11/12] fix two tests --- clang/test/CodeGenHLSL/builtins/clamp.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl index a3483898a0052..f04b6096fa220 100644 --- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl @@ -95,7 +95,7 @@ int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); } int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] <4 x i64> {{.*}}test_clamp_long4_mismatch // CHECK: call <4 x i64> @llvm.[[TARGET]].sclamp.v4i64 -int64_t4 test_clamp_long4_mismatch(int64_t4 p0, int64_t4 p1) { return clamp(p0, p0,p1); } +int64_t4 test_clamp_long4_mismatch(int64_t4 p0, int64_t p1) { return clamp(p0, p0,p1); } // CHECK: define [[FNATTRS]] i64 @_Z16test_clamp_ulong // CHECK: call i64 @llvm.[[TARGET]].uclamp.i64( @@ -111,7 +111,7 @@ uint64_t3 test_clamp_ulong3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1 uint64_t4 test_clamp_ulong4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); } // CHECK: define [[FNATTRS]] <4 x i64> {{.*}}test_clamp_ulong4_mismatch // CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64 -uint64_t4 test_clamp_ulong4_mismatch(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p0,p1); } +uint64_t4 test_clamp_ulong4_mismatch(uint64_t4 p0, uint64_t p1) { return clamp(p0, p0,p1); } // NATIVE_HALF: define [[FNATTRS]] [[FFNATTRS]] half @_Z15test_clamp_half // NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].nclamp.f16( >From 4ed05077627897c64eacc8b78db1a1272c64d21b Mon Sep 17 00:00:00 2001 From: Sarah Spall <sarahsp...@microsoft.com> Date: Fri, 14 Mar 2025 17:03:05 -0700 Subject: [PATCH 12/12] change clamp templates so they call clamp instead of __builtin_hlsl_elementwise_clamp, to ensure clamp can't be called on 16 bit types in a shader model before 6.2. Add tests to show this --- .../lib/Headers/hlsl/hlsl_compat_overloads.h | 14 ++++------ .../SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl | 27 +++++++++++++++++++ 2 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 clang/test/SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h index 80008f108ba20..3a939c02464e8 100644 --- a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h +++ b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h @@ -25,15 +25,13 @@ template <typename T, typename R, typename U, uint N> constexpr __detail::enable_if_t< __detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, vector<R, N> p1, U p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, - (vector<T, N>)p2); + return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename R, typename U, uint N> constexpr __detail::enable_if_t< __detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, U p1, vector<R, N> p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, - (vector<T, N>)p2); + return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename U, typename V, uint N> constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && @@ -41,14 +39,12 @@ constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, U p1, V p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, - (vector<T, N>)p2); + return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename T, typename R, typename S, uint N> constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>> clamp(vector<T, N> p0, vector<R, N> p1, vector<S, N> p2) { - return __builtin_hlsl_elementwise_clamp(p0, (vector<T, N>)p1, - (vector<T, N>)p2); + return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2); } template <typename U, typename V, typename W> constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && @@ -56,7 +52,7 @@ constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value && __detail::is_arithmetic<W>::Value, U> clamp(U p0, V p1, W p2) { - return __builtin_hlsl_elementwise_clamp(p0, (U)p1, (U)p2); + return clamp(p0, (U)p1, (U)p2); } } // namespace hlsl diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl new file mode 100644 index 0000000000000..976dbaaa065b2 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl @@ -0,0 +1,27 @@ +// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s + +// check we error on 16 bit type if shader model is too old +// CHECK: '-enable-16bit-types' option requires target HLSL Version >= 2018 and shader model >= 6.2, but HLSL Version is 'hlsl202x' and shader model is '6.0' +int16_t test_int16_t_error(int16_t p0, int p1) { + return clamp(p0, p0, p1); +} + +int16_t3 test_int16_t3_error(int16_t3 p0, int3 p1) { + return clamp(p0, p0, p1); +} + +half test_half_error(half p0, int p1) { + return clamp(p0, p1, p1); +} + +half3 test_half3_error(half3 p0, int3 p1) { + return clamp(p0, p0, p1); +} + +uint16_t test_uint16_t_error(uint16_t p0, int p1) { + return clamp(p0, p0, p1); +} + +uint16_t3 test_uint16_t3_error(uint16_t3 p0, int3 p1) { + return clamp(p0, p1, p1); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits