[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,144 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -emit-llvm -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/130320 >From bdb66b5a68090e304647ddbbeb403ac408fcea65 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 25 Feb 2025 14:50:09 -0800 Subject: [PATCH 1/8] start implementation --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 ++ 1 file changed, 14 insertions(+) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 39254a3cc3a0a..e7e910e30957e 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -97,6 +97,20 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } +template +constexpr enable_if_t::value || is_same::value, T> +fmod_vec_impl(vector X, vector Y) { +#if !defined(__DirectX__) + return __builtin_elementwise_fmod(X, Y); +#else + vector div = X / Y; + vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; + vector condition = (div >= -div); + vector realResult = __builtin_hlsl_select(condition, result, -result); + return realResult; +#endif +} + } // namespace __detail } // namespace hlsl #endif //_HLSL_HLSL_DETAILS_H_ >From fe73beac17b99b0f7836ffdb7050e04595d96dba Mon Sep 17 00:00:00 2001 From: kmpeng Date: Wed, 26 Feb 2025 15:57:59 -0800 Subject: [PATCH 2/8] finished implementation, working on fmod.hlsl tests --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 +++--- clang/test/CodeGenHLSL/builtins/fmod.hlsl | 32 +++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index e7e910e30957e..bc2dd463e0404 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -97,6 +97,12 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } +template +constexpr enable_if_t::value || is_same::value, T> +fmod_impl(T X, T Y) { + return __builtin_elementwise_fmod(X, Y); +} + template constexpr enable_if_t::value || is_same::value, T> fmod_vec_impl(vector X, vector Y) { @@ -104,10 +110,10 @@ fmod_vec_impl(vector X, vector Y) { return __builtin_elementwise_fmod(X, Y); #else vector div = X / Y; - vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; - vector condition = (div >= -div); - vector realResult = __builtin_hlsl_select(condition, result, -result); - return realResult; + vector ge = div >= -div; + vector frc = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)); + vector realFrc = __builtin_hlsl_select(ge, frc, -frc); + return realFrc * Y; #endif } diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl index b62967114d456..22376638bd093 100644 --- a/clang/test/CodeGenHLSL/builtins/fmod.hlsl +++ b/clang/test/CodeGenHLSL/builtins/fmod.hlsl @@ -36,42 +36,42 @@ // CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] -// CHECK: ret [[TYPE]] %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}([[TYPE]] noundef nofpclass(nan inf) %{{.*}}, [[TYPE]] noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret [[TYPE]] %call half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> -// CHECK: ret <2 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <2 x [[TYPE]]> %splat.splat half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> -// CHECK: ret <3 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}} #{{.*}} +// CHECK: ret <3 x [[TYPE]]> %splat.splat half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> -// CHECK: ret <4 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <4 x [[TYPE]]> %splat.splat half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float -// CHECK: ret float %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] float @{{.*}}(float noundef nofpclass(nan inf) %{{.*}}, float noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: re
[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,144 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -emit-llvm -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half // // -- No Native Half support test --- // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \ // RUN: -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float +// DXCHECK: define [[FNATTRS]] [[TYPE]] @ kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng created https://github.com/llvm/llvm-project/pull/130320 Replaced the current `fmod` definition with a templatized version, implemented `fmod` algorithm for DirectX targets that matches the DXC implementation, added corresponding tests in `clang/test/CodeGenHLSL/builtins/fmod.hlsl` and `clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl`. >From bdb66b5a68090e304647ddbbeb403ac408fcea65 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 25 Feb 2025 14:50:09 -0800 Subject: [PATCH 1/7] start implementation --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 ++ 1 file changed, 14 insertions(+) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 39254a3cc3a0a..e7e910e30957e 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -97,6 +97,20 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } +template +constexpr enable_if_t::value || is_same::value, T> +fmod_vec_impl(vector X, vector Y) { +#if !defined(__DirectX__) + return __builtin_elementwise_fmod(X, Y); +#else + vector div = X / Y; + vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; + vector condition = (div >= -div); + vector realResult = __builtin_hlsl_select(condition, result, -result); + return realResult; +#endif +} + } // namespace __detail } // namespace hlsl #endif //_HLSL_HLSL_DETAILS_H_ >From fe73beac17b99b0f7836ffdb7050e04595d96dba Mon Sep 17 00:00:00 2001 From: kmpeng Date: Wed, 26 Feb 2025 15:57:59 -0800 Subject: [PATCH 2/7] finished implementation, working on fmod.hlsl tests --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 +++--- clang/test/CodeGenHLSL/builtins/fmod.hlsl | 32 +++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index e7e910e30957e..bc2dd463e0404 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -97,6 +97,12 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } +template +constexpr enable_if_t::value || is_same::value, T> +fmod_impl(T X, T Y) { + return __builtin_elementwise_fmod(X, Y); +} + template constexpr enable_if_t::value || is_same::value, T> fmod_vec_impl(vector X, vector Y) { @@ -104,10 +110,10 @@ fmod_vec_impl(vector X, vector Y) { return __builtin_elementwise_fmod(X, Y); #else vector div = X / Y; - vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; - vector condition = (div >= -div); - vector realResult = __builtin_hlsl_select(condition, result, -result); - return realResult; + vector ge = div >= -div; + vector frc = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)); + vector realFrc = __builtin_hlsl_select(ge, frc, -frc); + return realFrc * Y; #endif } diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl index b62967114d456..22376638bd093 100644 --- a/clang/test/CodeGenHLSL/builtins/fmod.hlsl +++ b/clang/test/CodeGenHLSL/builtins/fmod.hlsl @@ -36,42 +36,42 @@ // CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] -// CHECK: ret [[TYPE]] %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}([[TYPE]] noundef nofpclass(nan inf) %{{.*}}, [[TYPE]] noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret [[TYPE]] %call half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> -// CHECK: ret <2 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <2 x [[TYPE]]> %splat.splat half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> -// CHECK: ret <3 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}} #{{.*}} +// CHECK: ret <3 x [[TYPE]]> %splat.splat half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> -// CHECK: ret <4 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <4 x [[TYPE]]> %splat.splat half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); } // CHECK: define [[FNA
[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,144 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -emit-llvm -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half // // -- No Native Half support test --- // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \ // RUN: -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float +// DXCHECK: define [[FNATTRS]] [[TYPE]] @ +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] kmpeng wrote: @Icohedron Would you recommend running the DirectX tests with -O1 then if we want to check the operands? Currently the dependent instructions are not necessarily receiving the same operands/outputs from the instructions they depend on (e.g. `%fneg.i` depends on `%div1.i`, but `%div1.i` becomes `%7` and then `%7` is passed to `%fneg.i` instead). Running with -O1 solves this, but it also optimizes away the first `fneg` instruction I check for. https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,136 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half +// RUN: -emit-llvm -o - | FileCheck %s \ +// RUN: -DTYPE=half --check-prefixes=CHECK,SPVCHECK // // -- No Native Half support test --- // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \ // RUN: -o - | FileCheck %s \ -// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float +// RUN: -DTYPE=float --check-prefixes=CHECK,SPVCHECK -// CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] -// CHECK: ret [[TYPE]] %fmod +// CHECK-LABEL: test_fmod_half +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] [[X:%.*]], [[Y:%.*]] +// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] [[DIV1_I:%.*]] +// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge [[TYPE]] [[DIV1_I_2:%.*]], %fneg.i +// DXNATIVE_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.f16([[TYPE]] [[DIV1_I_3:%.*]]) +// DXNO_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.f32([[TYPE]] [[DIV1_I_3:%.*]]) +// DXNATIVE_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.f16([[TYPE]] %elt.abs.i) +// DXNO_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.f32([[TYPE]] %elt.abs.i) +// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] [[HLSL_FRAC_I:%.*]] +// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 [[CMP_I:%.*]], [[TYPE]] [[HLSL_FRAC_I_2:%.*]], [[TYPE]] %fneg2.i +// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn [[TYPE]] %hlsl.select.i, [[Y_2:%.*]] +// DXCHECK: ret [[TYPE]] %mul.i +// SPVCHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn [[TYPE]] [[X:%.*]], [[Y:%.*]] +// SPVCHECK: ret [[TYPE]] %fmod.i half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } -// CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> -// CHECK: ret <2 x [[TYPE]]> %fmod +// CHECK-LABEL: test_fmod_half2 +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[X:%.*]], [[Y:%.*]] +// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[DIV1_I:%.*]] +// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x [[TYPE]]> [[DIV1_I_2:%.*]], %fneg.i +// DXNATIVE_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2f16(<2 x [[TYPE]]> [[DIV1_I_3:%.*]]) +// DXNO_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2f32(<2 x [[TYPE]]> [[DIV1_I_3:%.*]]) +// DXNATIVE_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2f16(<2 x [[TYPE]]> %elt.abs.i) +// DXNO_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2f32(<2 x [[TYPE]]> %elt.abs.i) +// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[HLSL_FRAC_I:%.*]] +// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> [[CMP_I:%.*]], <2 x [[TYPE]]> [[HLSL_FRAC_I_2:%.*]], <2 x [[TYPE]]> %fneg2.i +// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %hlsl.select.i, [[Y_2:%.*]] +// DXCHECK: ret <2 x [[TYPE]]> %mul.i +// SPVCHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[X:%.*]], [[Y:%.*]] +// SPVCHECK: ret <2 x [[TYPE]]> %fmod.i half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } -// CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> -// CHECK: ret <3 x [[TYPE]]> %fmod +// CHECK-LABEL: test_fmod_half3 +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> [[X:%.*]], [[Y:%.*]] +// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> [[DIV1_I:%.*]] kmpeng wrote: Do you think I need to add DXCHECKs for the other operators that are like the `DIV1_I` ones? Adding it for `HLSL_FRAC_I` wouldn't be much of a problem, but `CMP_I` would require a lot of lines. ```llvm %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x half> %6, %fneg.i %sext.i = sext <2 x i1> %cmp.i to <2 x i16> %insertvec.i = shufflevector <2 x i1> %cmp.i, <2 x i1> poison, <8 x i32> %8 = bitcast <8 x i1> %insertvec.i to i8 ... %load_bits.i = load i8, ptr %ge.i, align 1 %10 = bitcast i8 %load_bits.i to <8 x i1> %extractvec.i = shufflevector <8 x i1> %10, <8 x i1> poison, <2 x i32> ... %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> %extractvec.i, <2 x h
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/130320 >From bdb66b5a68090e304647ddbbeb403ac408fcea65 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 25 Feb 2025 14:50:09 -0800 Subject: [PATCH 1/9] start implementation --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 ++ 1 file changed, 14 insertions(+) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index 39254a3cc3a0a..e7e910e30957e 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -97,6 +97,20 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } +template +constexpr enable_if_t::value || is_same::value, T> +fmod_vec_impl(vector X, vector Y) { +#if !defined(__DirectX__) + return __builtin_elementwise_fmod(X, Y); +#else + vector div = X / Y; + vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; + vector condition = (div >= -div); + vector realResult = __builtin_hlsl_select(condition, result, -result); + return realResult; +#endif +} + } // namespace __detail } // namespace hlsl #endif //_HLSL_HLSL_DETAILS_H_ >From fe73beac17b99b0f7836ffdb7050e04595d96dba Mon Sep 17 00:00:00 2001 From: kmpeng Date: Wed, 26 Feb 2025 15:57:59 -0800 Subject: [PATCH 2/9] finished implementation, working on fmod.hlsl tests --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 +++--- clang/test/CodeGenHLSL/builtins/fmod.hlsl | 32 +++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index e7e910e30957e..bc2dd463e0404 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -97,6 +97,12 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } +template +constexpr enable_if_t::value || is_same::value, T> +fmod_impl(T X, T Y) { + return __builtin_elementwise_fmod(X, Y); +} + template constexpr enable_if_t::value || is_same::value, T> fmod_vec_impl(vector X, vector Y) { @@ -104,10 +110,10 @@ fmod_vec_impl(vector X, vector Y) { return __builtin_elementwise_fmod(X, Y); #else vector div = X / Y; - vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; - vector condition = (div >= -div); - vector realResult = __builtin_hlsl_select(condition, result, -result); - return realResult; + vector ge = div >= -div; + vector frc = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)); + vector realFrc = __builtin_hlsl_select(ge, frc, -frc); + return realFrc * Y; #endif } diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl index b62967114d456..22376638bd093 100644 --- a/clang/test/CodeGenHLSL/builtins/fmod.hlsl +++ b/clang/test/CodeGenHLSL/builtins/fmod.hlsl @@ -36,42 +36,42 @@ // CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] -// CHECK: ret [[TYPE]] %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}([[TYPE]] noundef nofpclass(nan inf) %{{.*}}, [[TYPE]] noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret [[TYPE]] %call half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> -// CHECK: ret <2 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <2 x [[TYPE]]> %splat.splat half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> -// CHECK: ret <3 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}} #{{.*}} +// CHECK: ret <3 x [[TYPE]]> %splat.splat half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> -// CHECK: ret <4 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <4 x [[TYPE]]> %splat.splat half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float -// CHECK: ret float %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] float @{{.*}}(float noundef nofpclass(nan inf) %{{.*}}, float noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: re
[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,144 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -emit-llvm -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half // // -- No Native Half support test --- // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \ // RUN: -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float +// DXCHECK: define [[FNATTRS]] [[TYPE]] @ +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,144 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -emit-llvm -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half // // -- No Native Half support test --- // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \ // RUN: -o - | FileCheck %s \ // RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float +// DXCHECK: define [[FNATTRS]] [[TYPE]] @ +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] kmpeng wrote: `%div1.i` is used as an operand in a lot following instructions and a different register is used for each instruction (`%7`, `%8`, `%9`). Would it be better practice to replace all of these register names with `[[DIV1_I:%.*]]` so it's clear they're all `%div1.i`, or would it be better to replace the first one with `[[DIV1_I:%.*]]`, the second with `[[DIV1_I_2:%.*]]`, etc.? https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
@@ -22,56 +22,136 @@ // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=half +// RUN: -emit-llvm -o - | FileCheck %s \ +// RUN: -DTYPE=half --check-prefixes=CHECK,SPVCHECK // // -- No Native Half support test --- // // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm \ // RUN: -o - | FileCheck %s \ -// RUN: -DFNATTRS="spir_func noundef nofpclass(nan inf)" -DTYPE=float +// RUN: -DTYPE=float --check-prefixes=CHECK,SPVCHECK -// CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] -// CHECK: ret [[TYPE]] %fmod +// CHECK-LABEL: test_fmod_half +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn [[TYPE]] [[X:%.*]], [[Y:%.*]] +// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] [[DIV1_I:%.*]] +// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge [[TYPE]] [[DIV1_I_2:%.*]], %fneg.i +// DXNATIVE_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.f16([[TYPE]] [[DIV1_I_3:%.*]]) +// DXNO_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.fabs.f32([[TYPE]] [[DIV1_I_3:%.*]]) +// DXNATIVE_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.f16([[TYPE]] %elt.abs.i) +// DXNO_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn [[TYPE]] @llvm.dx.frac.f32([[TYPE]] %elt.abs.i) +// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn [[TYPE]] [[HLSL_FRAC_I:%.*]] +// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 [[CMP_I:%.*]], [[TYPE]] [[HLSL_FRAC_I_2:%.*]], [[TYPE]] %fneg2.i +// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn [[TYPE]] %hlsl.select.i, [[Y_2:%.*]] +// DXCHECK: ret [[TYPE]] %mul.i +// SPVCHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn [[TYPE]] [[X:%.*]], [[Y:%.*]] +// SPVCHECK: ret [[TYPE]] %fmod.i half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } -// CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> -// CHECK: ret <2 x [[TYPE]]> %fmod +// CHECK-LABEL: test_fmod_half2 +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[X:%.*]], [[Y:%.*]] +// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[DIV1_I:%.*]] +// DXCHECK: %cmp.i = fcmp reassoc nnan ninf nsz arcp afn oge <2 x [[TYPE]]> [[DIV1_I_2:%.*]], %fneg.i +// DXNATIVE_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2f16(<2 x [[TYPE]]> [[DIV1_I_3:%.*]]) +// DXNO_HALF: %elt.abs.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.fabs.v2f32(<2 x [[TYPE]]> [[DIV1_I_3:%.*]]) +// DXNATIVE_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2f16(<2 x [[TYPE]]> %elt.abs.i) +// DXNO_HALF: %hlsl.frac.i = call reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> @llvm.dx.frac.v2f32(<2 x [[TYPE]]> %elt.abs.i) +// DXCHECK: %fneg2.i = fneg reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[HLSL_FRAC_I:%.*]] +// DXCHECK: %hlsl.select.i = select reassoc nnan ninf nsz arcp afn <2 x i1> [[CMP_I:%.*]], <2 x [[TYPE]]> [[HLSL_FRAC_I_2:%.*]], <2 x [[TYPE]]> %fneg2.i +// DXCHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> %hlsl.select.i, [[Y_2:%.*]] +// DXCHECK: ret <2 x [[TYPE]]> %mul.i +// SPVCHECK: %fmod.i = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> [[X:%.*]], [[Y:%.*]] +// SPVCHECK: ret <2 x [[TYPE]]> %fmod.i half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } -// CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> -// CHECK: ret <3 x [[TYPE]]> %fmod +// CHECK-LABEL: test_fmod_half3 +// DXCHECK: %div1.i = fdiv reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> [[X:%.*]], [[Y:%.*]] +// DXCHECK: %fneg.i = fneg reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> [[DIV1_I:%.*]] kmpeng wrote: Changes updated with Farzon's suggestions to include just the [relevant instructions for fmod](https://github.com/llvm/llvm-project/issues/99118#:~:text=%251%20%3D%20fdiv%20fast,5%2C%20%25p2) and their exact matches with the exception of `-D`. https://github.com/llvm/llvm-project/pull/130320 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Implement the `fmod` intrinsic (PR #130320)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/130320 >From fd3db87977a52d7560d9eec15522c8afb85c9171 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 25 Feb 2025 14:50:09 -0800 Subject: [PATCH 01/10] start implementation --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 ++ 1 file changed, 14 insertions(+) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index c691d85283de4..c6dd5494f149f 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -45,6 +45,20 @@ template struct is_arithmetic { static const bool Value = __is_arithmetic(T); }; +template +constexpr enable_if_t::value || is_same::value, T> +fmod_vec_impl(vector X, vector Y) { +#if !defined(__DirectX__) + return __builtin_elementwise_fmod(X, Y); +#else + vector div = X / Y; + vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; + vector condition = (div >= -div); + vector realResult = __builtin_hlsl_select(condition, result, -result); + return realResult; +#endif +} + } // namespace __detail } // namespace hlsl #endif //_HLSL_HLSL_DETAILS_H_ >From 69e1d874bb2d679151e2a44e55a8a9d38dafa581 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Wed, 26 Feb 2025 15:57:59 -0800 Subject: [PATCH 02/10] finished implementation, working on fmod.hlsl tests --- clang/lib/Headers/hlsl/hlsl_detail.h | 14 +++--- clang/test/CodeGenHLSL/builtins/fmod.hlsl | 32 +++ 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h index c6dd5494f149f..f43d449ce8b7b 100644 --- a/clang/lib/Headers/hlsl/hlsl_detail.h +++ b/clang/lib/Headers/hlsl/hlsl_detail.h @@ -45,6 +45,12 @@ template struct is_arithmetic { static const bool Value = __is_arithmetic(T); }; +template +constexpr enable_if_t::value || is_same::value, T> +fmod_impl(T X, T Y) { + return __builtin_elementwise_fmod(X, Y); +} + template constexpr enable_if_t::value || is_same::value, T> fmod_vec_impl(vector X, vector Y) { @@ -52,10 +58,10 @@ fmod_vec_impl(vector X, vector Y) { return __builtin_elementwise_fmod(X, Y); #else vector div = X / Y; - vector result = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)) * Y; - vector condition = (div >= -div); - vector realResult = __builtin_hlsl_select(condition, result, -result); - return realResult; + vector ge = div >= -div; + vector frc = __builtin_hlsl_elementwise_frac(__builtin_elementwise_abs(div)); + vector realFrc = __builtin_hlsl_select(ge, frc, -frc); + return realFrc * Y; #endif } diff --git a/clang/test/CodeGenHLSL/builtins/fmod.hlsl b/clang/test/CodeGenHLSL/builtins/fmod.hlsl index b62967114d456..22376638bd093 100644 --- a/clang/test/CodeGenHLSL/builtins/fmod.hlsl +++ b/clang/test/CodeGenHLSL/builtins/fmod.hlsl @@ -36,42 +36,42 @@ // CHECK: define [[FNATTRS]] [[TYPE]] @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn [[TYPE]] -// CHECK: ret [[TYPE]] %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}([[TYPE]] noundef nofpclass(nan inf) %{{.*}}, [[TYPE]] noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret [[TYPE]] %call half test_fmod_half(half p0, half p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <2 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <2 x [[TYPE]]> -// CHECK: ret <2 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <2 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <2 x [[TYPE]]> %splat.splat half2 test_fmod_half2(half2 p0, half2 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <3 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <3 x [[TYPE]]> -// CHECK: ret <3 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <3 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}} #{{.*}} +// CHECK: ret <3 x [[TYPE]]> %splat.splat half3 test_fmod_half3(half3 p0, half3 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] <4 x [[TYPE]]> @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn <4 x [[TYPE]]> -// CHECK: ret <4 x [[TYPE]]> %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] [[TYPE]] @{{.*}}(<4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}, <4 x [[TYPE]]> noundef nofpclass(nan inf) %{{.*}}) #{{.*}} +// CHECK: ret <4 x [[TYPE]]> %splat.splat half4 test_fmod_half4(half4 p0, half4 p1) { return fmod(p0, p1); } // CHECK: define [[FNATTRS]] float @ -// CHECK: %fmod = frem reassoc nnan ninf nsz arcp afn float -// CHECK: ret float %fmod +// CHECK: call reassoc nnan ninf nsz arcp afn [[FNATTRS]] float @{{.*}}(float noundef nofpclass(nan inf) %{{.*}}, float noundef nofpclass(na
[clang] [HLSL] Add bounds checks for the HLSL `fmod` vector arguments and return types (PR #131035)
https://github.com/kmpeng created https://github.com/llvm/llvm-project/pull/131035 Fixes #131024. - Fixes template for scalar and vector `fmod` intrinsic overloads - Fixes `fmod` Sema test >From b33ab9d6a3c87c59ca9b2d3e8e5001f1d79bb620 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 11 Mar 2025 16:47:27 -0700 Subject: [PATCH] add bounds checks for the hlsl fmod vector arguments and return types --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 4 +- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 22 -- clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl | 44 ++- 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 5f7c047dbf340..89ab664e90ba9 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -58,9 +58,7 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } -template -constexpr enable_if_t::value || is_same::value, T> -fmod_impl(T X, T Y) { +template constexpr T fmod_impl(T X, T Y) { #if !defined(__DIRECTX__) return __builtin_elementwise_fmod(X, Y); #else diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 5459cbeb34fd0..a48a8e998a015 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -129,19 +129,33 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, /// Return the floating-point remainder of the x parameter divided by the y /// parameter. +template _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline half fmod(half X, half Y) { return __detail::fmod_impl(X, Y); } +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + T> fmod(T X, T Y) { + return __detail::fmod_impl(X, Y); +} -const inline float fmod(float X, float Y) { return __detail::fmod_impl(X, Y); } +template +const inline __detail::enable_if_t< +__detail::is_arithmetic::Value && __detail::is_same::value, T> +fmod(T X, T Y) { + return __detail::fmod_impl(X, Y); +} template _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline vector fmod(vector X, vector Y) { +const inline __detail::HLSL_FIXED_VECTOR fmod( +__detail::HLSL_FIXED_VECTOR X, +__detail::HLSL_FIXED_VECTOR Y) { return __detail::fmod_vec_impl(X, Y); } template -const inline vector fmod(vector X, vector Y) { +const inline __detail::HLSL_FIXED_VECTOR +fmod(__detail::HLSL_FIXED_VECTOR X, + __detail::HLSL_FIXED_VECTOR Y) { return __detail::fmod_vec_impl(X, Y); } diff --git a/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl index 86f5a6f7bea9c..fc931139e523d 100644 --- a/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl @@ -3,8 +3,8 @@ float test_no_second_arg(float2 p0) { return fmod(p0); // expected-error@-1 {{no matching function for call to 'fmod'}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} } @@ -12,22 +12,46 @@ float test_no_second_arg(float2 p0) { float test_too_many_arg(float2 p0) { return fmod(p0, p0, p0); // expected-error@-1 {{no matching function for call to 'fmod'}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} } float test_double_inputs(double p0, double p1) { return fmod(p0, p1); - // expected-error@-1 {{call to 'fmod' is
[clang] [HLSL] Add bounds checks for the HLSL `fmod` vector arguments and return types (PR #131035)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/131035 >From 72625f987846d33c11e57ec4e42e98bf211f3389 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 11 Mar 2025 16:47:27 -0700 Subject: [PATCH] add bounds checks for the hlsl fmod vector arguments and return types --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 4 +- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 22 -- clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl | 44 ++- 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 5f7c047dbf340..89ab664e90ba9 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -58,9 +58,7 @@ constexpr vector reflect_vec_impl(vector I, vector N) { #endif } -template -constexpr enable_if_t::value || is_same::value, T> -fmod_impl(T X, T Y) { +template constexpr T fmod_impl(T X, T Y) { #if !defined(__DIRECTX__) return __builtin_elementwise_fmod(X, Y); #else diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 5459cbeb34fd0..a48a8e998a015 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -129,19 +129,33 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR X, /// Return the floating-point remainder of the x parameter divided by the y /// parameter. +template _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline half fmod(half X, half Y) { return __detail::fmod_impl(X, Y); } +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + T> fmod(T X, T Y) { + return __detail::fmod_impl(X, Y); +} -const inline float fmod(float X, float Y) { return __detail::fmod_impl(X, Y); } +template +const inline __detail::enable_if_t< +__detail::is_arithmetic::Value && __detail::is_same::value, T> +fmod(T X, T Y) { + return __detail::fmod_impl(X, Y); +} template _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) -const inline vector fmod(vector X, vector Y) { +const inline __detail::HLSL_FIXED_VECTOR fmod( +__detail::HLSL_FIXED_VECTOR X, +__detail::HLSL_FIXED_VECTOR Y) { return __detail::fmod_vec_impl(X, Y); } template -const inline vector fmod(vector X, vector Y) { +const inline __detail::HLSL_FIXED_VECTOR +fmod(__detail::HLSL_FIXED_VECTOR X, + __detail::HLSL_FIXED_VECTOR Y) { return __detail::fmod_vec_impl(X, Y); } diff --git a/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl index 86f5a6f7bea9c..fc931139e523d 100644 --- a/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/fmod-errors.hlsl @@ -3,8 +3,8 @@ float test_no_second_arg(float2 p0) { return fmod(p0); // expected-error@-1 {{no matching function for call to 'fmod'}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 1 was provided}} } @@ -12,22 +12,46 @@ float test_no_second_arg(float2 p0) { float test_too_many_arg(float2 p0) { return fmod(p0, p0, p0); // expected-error@-1 {{no matching function for call to 'fmod'}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires 2 arguments, but 3 were provided}} } float test_double_inputs(double p0, double p1) { return fmod(p0, p1); - // expected-error@-1 {{call to 'fmod' is ambiguous}} - // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} - // expected-note@hlsl/hl
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng deleted https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng created https://github.com/llvm/llvm-project/pull/132288 Closes #99156. Tasks completed: - Implement `smoothstep` using HLSL source in `hlsl_intrinsics.h` - Implement the `smoothstep` SPIR-V target built-in in `clang/include/clang/Basic/BuiltinsSPIRV.td` - Add sema checks for `smoothstep` to `CheckSPIRVBuiltinFunctionCall` in `clang/lib/Sema/SemaSPIRV.cpp` - Add codegen for spv `smoothstep` to `EmitSPIRVBuiltinExpr` in `CGBuiltin.cpp` - Add codegen tests to `clang/test/CodeGenHLSL/builtins/smoothstep.hlsl` - Add spv codegen test to `clang/test/CodeGenSPIRV/Builtins/smoothstep.c` - Add sema tests to `clang/test/SemaHLSL/BuiltIns/smoothstep-errors.hlsl` - Add spv sema tests to `clang/test/SemaSPIRV/BuiltIns/smoothstep-errors.c` - Create the `int_spv_smoothstep` intrinsic in `IntrinsicsSPIRV.td` - In SPIRVInstructionSelector.cpp create the `smoothstep` lowering and map it to `int_spv_smoothstep` in `SPIRVInstructionSelector::selectIntrinsic` - Create SPIR-V backend test case in `llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll` - Create SPIR-V backend test case in `llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll` >From 0a5da660c5aae053d87d556e59f98c121d916b79 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 1/5] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 2aba950037ec3..73b06027823f3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -101,6 +101,44 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_smoothstep: { +if (SemaRef.checkArgCount(TheCall, 3)) + return true; + +ExprResult A = TheCall->getArg(0); +QualType ArgTyA = A.get()->getType(); +auto *VTyA = ArgTyA->getAs(); +if (!(ArgTyA->isScalarType() || VTyA)) { kmpeng wrote: Would using `SemaRef.BuiltinElementwiseTernaryMath` with `ArgTyRestr=Sema::EltwiseBuiltinArgTyRestriction::FloatTy` be appropriate here for the first check? https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,44 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// RUN: %clang_cc1 -O1 -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define spir_func float @test_smoothstep_float( kmpeng wrote: Yes. It also supports int types so will add both to the tests. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -101,6 +101,44 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_smoothstep: { +if (SemaRef.checkArgCount(TheCall, 3)) + return true; + +ExprResult A = TheCall->getArg(0); +QualType ArgTyA = A.get()->getType(); +auto *VTyA = ArgTyA->getAs(); +if (!(ArgTyA->isScalarType() || VTyA)) { kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng deleted https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 0a5da660c5aae053d87d556e59f98c121d916b79 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 1/6] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 2aba950037ec3..73b06027823f3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -20955,6 +20955,25 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/I->getType(), Intrinsic::spv_reflect, ArrayRef{I, N}, nullptr, "spv.reflect"); } + case SPIRV::BI__builtin_spirv_smoothstep: { +Value *Min = EmitScalarExpr(E->getArg(0)); +Value *Max = EmitScalarExpr(E->getArg(1)); +Value *X = EmitScalarExpr(E->getArg(2)); +assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasFloatingRepresentation() && + E->getArg(2)->getType()->hasFloatingRepresentation() && + "SmoothStep operands must have a float representation"); +assert((E->getArg(0)->getType()->isScalarType() || kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -20955,6 +20955,25 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/I->getType(), Intrinsic::spv_reflect, ArrayRef{I, N}, nullptr, "spv.reflect"); } + case SPIRV::BI__builtin_spirv_smoothstep: { +Value *Min = EmitScalarExpr(E->getArg(0)); +Value *Max = EmitScalarExpr(E->getArg(1)); +Value *X = EmitScalarExpr(E->getArg(2)); +assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasFloatingRepresentation() && + E->getArg(2)->getType()->hasFloatingRepresentation() && + "SmoothStep operands must have a float representation"); +assert((E->getArg(0)->getType()->isScalarType() || +E->getArg(0)->getType()->isVectorType()) && + (E->getArg(1)->getType()->isScalarType() || +E->getArg(1)->getType()->isVectorType()) && + (E->getArg(2)->getType()->isScalarType() || +E->getArg(2)->getType()->isVectorType()) && + "SmoothStep operands must be a scalar or vector"); +return Builder.CreateIntrinsic( +/*ReturnType=*/Min->getType(), Intrinsic::spv_smoothstep, +ArrayRef{Min, Max, X}, nullptr, "spv.smoothstep"); kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,18 @@ +// RUN: %clang_cc1 %s -triple spirv-pc-vulkan-compute -verify + +typedef float float2 __attribute__((ext_vector_type(2))); + +float2 test_no_second_arg(float2 p0) { + return __builtin_spirv_smoothstep(p0); + // expected-error@-1 {{too few arguments to function call, expected 3, have 1}} +} + +float2 test_no_third_arg(float2 p0) { + return __builtin_spirv_smoothstep(p0, p0); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} +} + +float2 test_too_many_arg(float2 p0) { + return __builtin_spirv_smoothstep(p0, p0, p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} +} kmpeng wrote: Code updated with suggested changes. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 0a5da660c5aae053d87d556e59f98c121d916b79 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 1/7] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 2aba950037ec3..73b06027823f3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng created https://github.com/llvm/llvm-project/pull/134171 Closes #99135. Tasks completed: - Wrote implementation in `hlsl_intrinsics.h`/`hlsl_intrinsic_helpers.h` that matches DXC - Created overloads in `hlsl_compat_overloads.h` that take doubles/ints and explicitly converts them to floats, like DXC does - Added codegen tests to `clang/test/CodeGenHLSL/builtins/lit.hlsl` and `lit-overloads.hlsl` - Added sema tests to `clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl` and `lit-errors-16bit.hlsl` >From 719bb94279f64f134c826faa22898e4e549bb23c Mon Sep 17 00:00:00 2001 From: kmpeng Date: Thu, 27 Mar 2025 14:39:27 -0700 Subject: [PATCH 1/6] finished lit implementation, added codegen and sema tests --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 12 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 31 clang/test/CodeGenHLSL/builtins/lit.hlsl | 36 + clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl | 50 +++ 4 files changed, 129 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/lit.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 8cdd63d7e07bb..619d4c59f8074 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -101,6 +101,18 @@ constexpr vector smoothstep_vec_impl(vector Min, vector Max, #endif } +template +constexpr vector lit_impl(T N_dot_l, T N_dot_h, T M) { + bool Cond1 = N_dot_l < 0; + T ClampedP1 = select(Cond1, 0, N_dot_l); + vector Result = {1, ClampedP1, 0, 1}; + bool CombinedCond = or (Cond1, (N_dot_h < 0)); + T LogP2 = log(N_dot_h); + T Exp = exp(LogP2 * M); + Result[2] = select(CombinedCond, 0, Exp); + return Result; +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index fd799b8d874ae..5b6692d2a4281 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T x, T y) +/// \brief Returns a lighting coefficient vector. +/// \param N_dot_l The dot product of the normalized surface normal and the +/// light vector. +/// \param N_dot_h The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + +template +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> +lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + //===--===// // D3DCOLORtoUBYTE4 builtin //===--===// diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl new file mode 100644 index 0..3fb0a2c5b5d6f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z13test_lit_halfDhDhDh( +// CHECK-SAME: half noundef nofpclass(nan inf) [[N_DOT_L:%.*]], half noundef nofpclass(nan inf) [[N_DOT_H:%.*]], half noundef nofpclass(nan inf) [[M:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[CMP_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_L]], 0xH +// CHECK-NEXT:[[HLSL_SELECT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16(half [[N_DOT_L]], half 0xH) +// CHECK-NEXT:[[VECINIT2_I:%.*]] = insertelement <4 x half> , half [[HLSL_SELECT_I]], i64 1 +// CHECK-NEXT:[[CMP4_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_H]], 0xH +// CHECK-NEXT:[[HLSL_OR_I:%.*]] = or i1 [[CMP_I]], [[CMP4_I]] +// CHECK-NEXT:[
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng deleted https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -280,6 +280,22 @@ constexpr bool4 isinf(double4 V) { return isinf((float4)V); } _DXC_COMPAT_TERNARY_DOUBLE_OVERLOADS(lerp) _DXC_COMPAT_TERNARY_INTEGER_OVERLOADS(lerp) +//===--===// +// lit builtins overloads +//===--===// + +template +constexpr __detail::enable_if_t<__detail::is_arithmetic::Value && +(__detail::is_same::value || kmpeng wrote: The ambiguous error doesn't appear anymore. But removing the templates lets vector half and float inputs pass so I don't think we can do that. https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/134171 >From 719bb94279f64f134c826faa22898e4e549bb23c Mon Sep 17 00:00:00 2001 From: kmpeng Date: Thu, 27 Mar 2025 14:39:27 -0700 Subject: [PATCH 1/7] finished lit implementation, added codegen and sema tests --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 12 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 31 clang/test/CodeGenHLSL/builtins/lit.hlsl | 36 + clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl | 50 +++ 4 files changed, 129 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/lit.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 8cdd63d7e07bb..619d4c59f8074 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -101,6 +101,18 @@ constexpr vector smoothstep_vec_impl(vector Min, vector Max, #endif } +template +constexpr vector lit_impl(T N_dot_l, T N_dot_h, T M) { + bool Cond1 = N_dot_l < 0; + T ClampedP1 = select(Cond1, 0, N_dot_l); + vector Result = {1, ClampedP1, 0, 1}; + bool CombinedCond = or (Cond1, (N_dot_h < 0)); + T LogP2 = log(N_dot_h); + T Exp = exp(LogP2 * M); + Result[2] = select(CombinedCond, 0, Exp); + return Result; +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index fd799b8d874ae..5b6692d2a4281 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T x, T y) +/// \brief Returns a lighting coefficient vector. +/// \param N_dot_l The dot product of the normalized surface normal and the +/// light vector. +/// \param N_dot_h The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + +template +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> +lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + //===--===// // D3DCOLORtoUBYTE4 builtin //===--===// diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl new file mode 100644 index 0..3fb0a2c5b5d6f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z13test_lit_halfDhDhDh( +// CHECK-SAME: half noundef nofpclass(nan inf) [[N_DOT_L:%.*]], half noundef nofpclass(nan inf) [[N_DOT_H:%.*]], half noundef nofpclass(nan inf) [[M:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[CMP_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_L]], 0xH +// CHECK-NEXT:[[HLSL_SELECT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16(half [[N_DOT_L]], half 0xH) +// CHECK-NEXT:[[VECINIT2_I:%.*]] = insertelement <4 x half> , half [[HLSL_SELECT_I]], i64 1 +// CHECK-NEXT:[[CMP4_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_H]], 0xH +// CHECK-NEXT:[[HLSL_OR_I:%.*]] = or i1 [[CMP_I]], [[CMP4_I]] +// CHECK-NEXT:[[ELT_LOG_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(half [[N_DOT_H]]) +// CHECK-NEXT:[[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[ELT_LOG_I]], [[M]] +// CHECK-NEXT:[[ELT_EXP_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half [[MUL_I]]) +// CHECK-NEXT:[[HLSL_SELECT7_I:%.*]] = select reassoc nnan ninf nsz arcp afn i1 [[HLSL_OR_I]], half 0xH, half [[ELT_EXP_I]]
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T NDotL, T NDotH, T M) +/// \brief Returns a lighting coefficient vector. +/// \param NDotL The dot product of the normalized surface normal and the +/// light vector. +/// \param NDotH The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && kmpeng wrote: > I believe you should not worry about implicit truncation here. Hm ok, curious what other people's thoughts are on this. I was just trying to match the behavior of DXC since calling with vector inputs on DXC doesn't compile. I still think `_DXC_COMPAT_TERNARY_DOUBLE_OVERLOADS(lit)` doesn't work regardless because the return type is always a vector of 4. https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: test_lit_half +// CHECK-SAME: half noundef nofpclass(nan inf) [[NDOTL:%.*]], half noundef nofpclass(nan inf) [[NDOTH:%.*]], half noundef nofpclass(nan inf) [[M:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[CMP_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[NDOTL]], 0xH +// CHECK-NEXT:[[HLSL_SELECT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16(half [[NDOTL]], half 0xH) +// CHECK-NEXT:[[VECINIT2_I:%.*]] = insertelement <4 x half> , half [[HLSL_SELECT_I]], i64 1 +// CHECK-NEXT:[[CMP4_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[NDOTH]], 0xH +// CHECK-NEXT:[[HLSL_OR_I:%.*]] = or i1 [[CMP_I]], [[CMP4_I]] +// CHECK-NEXT:[[ELT_LOG_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(half [[NDOTH]]) +// CHECK-NEXT:[[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[ELT_LOG_I]], [[M]] +// CHECK-NEXT:[[ELT_EXP_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half [[MUL_I]]) +// CHECK-NEXT:[[HLSL_SELECT7_I:%.*]] = select reassoc nnan ninf nsz arcp afn i1 [[HLSL_OR_I]], half 0xH, half [[ELT_EXP_I]] +// CHECK-NEXT:[[VECINS_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[HLSL_SELECT7_I]], i64 2 +// CHECK-NEXT:ret <4 x half> [[VECINS_I]] +// kmpeng wrote: Code updated https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -101,6 +101,16 @@ constexpr vector smoothstep_vec_impl(vector Min, vector Max, #endif } +template constexpr vector lit_impl(T NDotL, T NDotH, T M) { + bool DiffuseCond = NDotL < 0; + T Diffuse = select(DiffuseCond, 0, NDotL); + vector Result = {1, Diffuse, 0, 1}; + bool SpecularCond = or (DiffuseCond, (NDotH < 0)); kmpeng wrote: Code updated https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/134171 >From 719bb94279f64f134c826faa22898e4e549bb23c Mon Sep 17 00:00:00 2001 From: kmpeng Date: Thu, 27 Mar 2025 14:39:27 -0700 Subject: [PATCH 1/9] finished lit implementation, added codegen and sema tests --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 12 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 31 clang/test/CodeGenHLSL/builtins/lit.hlsl | 36 + clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl | 50 +++ 4 files changed, 129 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/lit.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 8cdd63d7e07bb..619d4c59f8074 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -101,6 +101,18 @@ constexpr vector smoothstep_vec_impl(vector Min, vector Max, #endif } +template +constexpr vector lit_impl(T N_dot_l, T N_dot_h, T M) { + bool Cond1 = N_dot_l < 0; + T ClampedP1 = select(Cond1, 0, N_dot_l); + vector Result = {1, ClampedP1, 0, 1}; + bool CombinedCond = or (Cond1, (N_dot_h < 0)); + T LogP2 = log(N_dot_h); + T Exp = exp(LogP2 * M); + Result[2] = select(CombinedCond, 0, Exp); + return Result; +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index fd799b8d874ae..5b6692d2a4281 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T x, T y) +/// \brief Returns a lighting coefficient vector. +/// \param N_dot_l The dot product of the normalized surface normal and the +/// light vector. +/// \param N_dot_h The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + +template +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> +lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + //===--===// // D3DCOLORtoUBYTE4 builtin //===--===// diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl new file mode 100644 index 0..3fb0a2c5b5d6f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z13test_lit_halfDhDhDh( +// CHECK-SAME: half noundef nofpclass(nan inf) [[N_DOT_L:%.*]], half noundef nofpclass(nan inf) [[N_DOT_H:%.*]], half noundef nofpclass(nan inf) [[M:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[CMP_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_L]], 0xH +// CHECK-NEXT:[[HLSL_SELECT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16(half [[N_DOT_L]], half 0xH) +// CHECK-NEXT:[[VECINIT2_I:%.*]] = insertelement <4 x half> , half [[HLSL_SELECT_I]], i64 1 +// CHECK-NEXT:[[CMP4_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_H]], 0xH +// CHECK-NEXT:[[HLSL_OR_I:%.*]] = or i1 [[CMP_I]], [[CMP4_I]] +// CHECK-NEXT:[[ELT_LOG_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(half [[N_DOT_H]]) +// CHECK-NEXT:[[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[ELT_LOG_I]], [[M]] +// CHECK-NEXT:[[ELT_EXP_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half [[MUL_I]]) +// CHECK-NEXT:[[HLSL_SELECT7_I:%.*]] = select reassoc nnan ninf nsz arcp afn i1 [[HLSL_OR_I]], half 0xH, half [[ELT_EXP_I]]
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T NDotL, T NDotH, T M) +/// \brief Returns a lighting coefficient vector. +/// \param NDotL The dot product of the normalized surface normal and the +/// light vector. +/// \param NDotH The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && kmpeng wrote: > If DXC does implicit cast then i think we should as well. DXC doesn't implicit cast so going to leave the code as is https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -280,6 +280,22 @@ constexpr bool4 isinf(double4 V) { return isinf((float4)V); } _DXC_COMPAT_TERNARY_DOUBLE_OVERLOADS(lerp) _DXC_COMPAT_TERNARY_INTEGER_OVERLOADS(lerp) +//===--===// +// lit builtins overloads +//===--===// + +template +constexpr __detail::enable_if_t<__detail::is_arithmetic::Value && +(__detail::is_same::value || kmpeng wrote: Code updated to allow the bool overload https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -280,6 +280,22 @@ constexpr bool4 isinf(double4 V) { return isinf((float4)V); } _DXC_COMPAT_TERNARY_DOUBLE_OVERLOADS(lerp) _DXC_COMPAT_TERNARY_INTEGER_OVERLOADS(lerp) +//===--===// +// lit builtins overloads +//===--===// + +template +constexpr __detail::enable_if_t<__detail::is_arithmetic::Value && +(__detail::is_same::value || kmpeng wrote: The ambiguous error doesn't appear, but the half definition in `hlsl_instrinsics.h` doesn't seem to be used anymore when we're only checking `__detail::is_arithmetic` in the overloads. The tablegen shows half inputs being casted to floats and then the float definition of `lit` being called. https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -101,6 +101,52 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_smoothstep: { +if (SemaRef.checkArgCount(TheCall, 3)) + return true; + +// check if the all arguments have floating representation +ExprResult A = TheCall->getArg(0); +QualType ArgTyA = A.get()->getType(); +if (!ArgTyA->hasFloatingRepresentation()) { + SemaRef.Diag(A.get()->getBeginLoc(), + diag::err_typecheck_convert_incompatible) kmpeng wrote: Don't we also want to say the [api supports matrices](https://github.com/llvm/llvm-project/pull/132288#discussion_r2008116154) though? I don't think I can do that with `err_builtin_invalid_arg_type` unless I edit `DiagnosticSemaKinds.td`—the closest I can get is this message: ``` 1st argument must be a scalar or vector of floating-point types (was 'int') ``` https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 0a5da660c5aae053d87d556e59f98c121d916b79 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 1/8] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 2aba950037ec3..73b06027823f3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,183 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -O1 -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefix=SPVCHECK + +// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z20test_smoothstep_halfDhDhDh( +// CHECK-SAME: half noundef nofpclass(nan inf) [[MIN:%.*]], half noundef nofpclass(nan inf) [[MAX:%.*]], half noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[SUB_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn half [[X]], [[MIN]] +// CHECK-NEXT:[[SUB1_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn half [[MAX]], [[MIN]] +// CHECK-NEXT:[[DIV_I:%.*]] = fdiv reassoc nnan ninf nsz arcp afn half [[SUB_I]], [[SUB1_I]] +// CHECK-NEXT:[[HLSL_SATURATE_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.dx.saturate.f16(half [[DIV_I]]) +// CHECK-NEXT:[[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[HLSL_SATURATE_I]], 0xH4000 +// CHECK-NEXT:[[SUB2_I:%.*]] = fsub reassoc nnan ninf nsz arcp afn half 0xH4200, [[MUL_I]] +// CHECK-NEXT:[[TMP0:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[HLSL_SATURATE_I]], [[HLSL_SATURATE_I]] +// CHECK-NEXT:[[MUL4_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[TMP0]], [[SUB2_I]] +// CHECK-NEXT:ret half [[MUL4_I]] +// +// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z20test_smoothstep_halfDhDhDh( +// SPVCHECK-SAME: half noundef nofpclass(nan inf) [[MIN:%.*]], half noundef nofpclass(nan inf) [[MAX:%.*]], half noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SPVCHECK-NEXT: [[ENTRY:.*:]] +// SPVCHECK-NEXT:[[CONV_I:%.*]] = fpext reassoc nnan ninf nsz arcp afn half [[MIN]] to double +// SPVCHECK-NEXT:[[CONV1_I:%.*]] = fpext reassoc nnan ninf nsz arcp afn half [[MAX]] to double +// SPVCHECK-NEXT:[[CONV2_I:%.*]] = fpext reassoc nnan ninf nsz arcp afn half [[X]] to double +// SPVCHECK-NEXT:[[SPV_SMOOTHSTEP_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn double @llvm.spv.smoothstep.f64(double [[CONV_I]], double [[CONV1_I]], double [[CONV2_I]]) kmpeng wrote: Yes, I agree. This is what I was asking about earlier. I'm not sure why this is happening. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -101,6 +101,52 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_smoothstep: { +if (SemaRef.checkArgCount(TheCall, 3)) + return true; + +// check if the all arguments have floating representation +ExprResult A = TheCall->getArg(0); kmpeng wrote: I'm already checking `hasFloatingRepresentation` for all of the args and emitting the corresponding diag. Do you want me to move the individual checks into a for loop? https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 0a5da660c5aae053d87d556e59f98c121d916b79 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 1/7] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 2aba950037ec3..73b06027823f3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng deleted https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} kmpeng wrote: Seems like this change produces this error in the SPIR-V tests. ``` error: line 1: Capability Kernel is not allowed by Vulkan 1.2 specification (or requires extension) OpCapability Kernel ``` Still kind of new to this—do you know what's going on here? https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 1d28510653b39fcabe45ad37197674bdd0217add Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 01/10] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 906bdfdcd4eb1..4389b86745d7f 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index fd598904d7ba5..644223ccdc0a7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3083,6 +3083,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtIns
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} kmpeng wrote: Code updated with `--target-env vulkan1.2` added to both `hlsl-intrinsics/smoothstep.ll` and `opencl/smoothstep.ll`. Let me know if that looks good! https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -101,6 +101,52 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_smoothstep: { +if (SemaRef.checkArgCount(TheCall, 3)) + return true; + +// check if the all arguments have floating representation +ExprResult A = TheCall->getArg(0); +QualType ArgTyA = A.get()->getType(); +if (!ArgTyA->hasFloatingRepresentation()) { + SemaRef.Diag(A.get()->getBeginLoc(), + diag::err_typecheck_convert_incompatible) kmpeng wrote: Code updated. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} kmpeng wrote: Would you also like this added to the runs in `opencl/smoothstep.ll`? https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 7853b27b8fffa6d2c0393a4004abaac9a7954608 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 01/10] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index bd54590c87cac..1ba022b416808 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtIns
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -101,6 +101,52 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(unsigned BuiltinID, TheCall->setType(RetTy); break; } + case SPIRV::BI__builtin_spirv_smoothstep: { +if (SemaRef.checkArgCount(TheCall, 3)) + return true; + +// check if the all arguments have floating representation +ExprResult A = TheCall->getArg(0); kmpeng wrote: Code updated to move the checks into a for loop. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 7853b27b8fffa6d2c0393a4004abaac9a7954608 Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 1/8] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 4a0e10db2f1e4..7760961de7b6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index bd54590c87cac..1ba022b416808 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3121,6 +3121,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtInst
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} kmpeng wrote: @s-perron Changing the target environments to `--target-env vulkan1.3` for hlsl-intrinsics and `--target-env spv1.6` for opencl seems to fix the issues. Do these changes work for you? https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/132288 >From 1d28510653b39fcabe45ad37197674bdd0217add Mon Sep 17 00:00:00 2001 From: kmpeng Date: Tue, 18 Mar 2025 13:25:10 -0700 Subject: [PATCH 01/11] create int_spv_smoothstep intrinsic, create smoothstep lowering & map to int_spv_smoothstep, create SPIR-V backend test cases --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 + .../SPIRV/hlsl-intrinsics/smoothstep.ll | 60 ++ llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll | 61 +++ 4 files changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/smoothstep.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 906bdfdcd4eb1..4389b86745d7f 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -75,6 +75,7 @@ let TargetPrefix = "spv" in { def int_spv_reflect : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_smoothstep : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index fd598904d7ba5..644223ccdc0a7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3083,6 +3083,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_smoothstep: +return selectExtInst(ResVReg, ResType, I, CL::smoothstep, GL::SmoothStep); case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll new file mode 100644 index 0..09f93ab7955d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smoothstep.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure SPIRV operation function calls for smoothstep are lowered correctly. + +; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 + +define noundef half @smoothstep_half(half noundef %a, half noundef %b, half noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_16]] + ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call half @llvm.spv.smoothstep.f16(half %a, half %b, half %c) + ret half %spv.smoothstep +} + +define noundef float @smoothstep_float(float noundef %a, float noundef %b, float noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#float_32]] + ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] SmoothStep %[[#arg0]] %[[#arg1]] %[[#arg2]] + %spv.smoothstep = call float @llvm.spv.smoothstep.f32(float %a, float %b, float %c) + ret float %spv.smoothstep +} + +define noundef <4 x half> @smoothstep_half4(<4 x half> noundef %a, <4 x half> noundef %b, <4 x half> noundef %c) { +entry: + ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]] + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#vec4_float_16]] + ; CHECK: %[[#]] = OpExtIns
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng updated https://github.com/llvm/llvm-project/pull/134171 >From 719bb94279f64f134c826faa22898e4e549bb23c Mon Sep 17 00:00:00 2001 From: kmpeng Date: Thu, 27 Mar 2025 14:39:27 -0700 Subject: [PATCH 1/8] finished lit implementation, added codegen and sema tests --- .../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 12 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 31 clang/test/CodeGenHLSL/builtins/lit.hlsl | 36 + clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl | 50 +++ 4 files changed, 129 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/lit.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/lit-errors.hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index 8cdd63d7e07bb..619d4c59f8074 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -101,6 +101,18 @@ constexpr vector smoothstep_vec_impl(vector Min, vector Max, #endif } +template +constexpr vector lit_impl(T N_dot_l, T N_dot_h, T M) { + bool Cond1 = N_dot_l < 0; + T ClampedP1 = select(Cond1, 0, N_dot_l); + vector Result = {1, ClampedP1, 0, 1}; + bool CombinedCond = or (Cond1, (N_dot_h < 0)); + T LogP2 = log(N_dot_h); + T Exp = exp(LogP2 * M); + Result[2] = select(CombinedCond, 0, Exp); + return Result; +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index fd799b8d874ae..5b6692d2a4281 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T x, T y) +/// \brief Returns a lighting coefficient vector. +/// \param N_dot_l The dot product of the normalized surface normal and the +/// light vector. +/// \param N_dot_h The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + +template +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> +lit(T N_dot_l, T N_dot_h, T M) { + return __detail::lit_impl(N_dot_l, N_dot_h, M); +} + //===--===// // D3DCOLORtoUBYTE4 builtin //===--===// diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl new file mode 100644 index 0..3fb0a2c5b5d6f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s + +// CHECK-LABEL: define noundef nofpclass(nan inf) <4 x half> @_Z13test_lit_halfDhDhDh( +// CHECK-SAME: half noundef nofpclass(nan inf) [[N_DOT_L:%.*]], half noundef nofpclass(nan inf) [[N_DOT_H:%.*]], half noundef nofpclass(nan inf) [[M:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[CMP_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_L]], 0xH +// CHECK-NEXT:[[HLSL_SELECT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16(half [[N_DOT_L]], half 0xH) +// CHECK-NEXT:[[VECINIT2_I:%.*]] = insertelement <4 x half> , half [[HLSL_SELECT_I]], i64 1 +// CHECK-NEXT:[[CMP4_I:%.*]] = fcmp reassoc nnan ninf nsz arcp afn olt half [[N_DOT_H]], 0xH +// CHECK-NEXT:[[HLSL_OR_I:%.*]] = or i1 [[CMP_I]], [[CMP4_I]] +// CHECK-NEXT:[[ELT_LOG_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.log.f16(half [[N_DOT_H]]) +// CHECK-NEXT:[[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[ELT_LOG_I]], [[M]] +// CHECK-NEXT:[[ELT_EXP_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half [[MUL_I]]) +// CHECK-NEXT:[[HLSL_SELECT7_I:%.*]] = select reassoc nnan ninf nsz arcp afn i1 [[HLSL_OR_I]], half 0xH, half [[ELT_EXP_I]]
[clang] [llvm] [HLSL] Implement the `smoothstep` intrinsic (PR #132288)
@@ -322,5 +322,53 @@ reflect(__detail::HLSL_FIXED_VECTOR I, __detail::HLSL_FIXED_VECTOR N) { return __detail::reflect_vec_impl(I, N); } + +//===--===// +// smoothstep builtin +//===--===// + +/// \fn T smoothstep(T Min, T Max, T X) +/// \brief Returns a smooth Hermite interpolation between 0 and 1, if \a X is in +/// the range [\a Min, \a Max]. +/// \param Min The minimum range of the x parameter. +/// \param Max The maximum range of the x parameter. +/// \param X The specified value to be interpolated. +/// +/// The return value is 0.0 if \a X ≤ \a Min and 1.0 if \a X ≥ \a Max. When \a +/// Min < \a X < \a Max, the function performs smooth Hermite interpolation +/// between 0 and 1. + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + T> smoothstep(T Min, T Max, T X) { + return __detail::smoothstep_impl(Min, Max, X); +} + +template +const inline __detail::enable_if_t< kmpeng wrote: I'm not sure how to do this when clang-format is an automatic process. The other intrinsics are also formatted the same way as mine. https://github.com/llvm/llvm-project/pull/132288 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -253,6 +253,37 @@ const inline float length(__detail::HLSL_FIXED_VECTOR X) { return __detail::length_vec_impl(X); } +//===--===// +// lit builtins +//===--===// + +/// \fn vector lit(T NDotL, T NDotH, T M) +/// \brief Returns a lighting coefficient vector. +/// \param NDotL The dot product of the normalized surface normal and the +/// light vector. +/// \param NDotH The dot product of the half-angle vector and the surface +/// normal. +/// \param M A specular exponent. +/// +/// This function returns a lighting coefficient vector (ambient, diffuse, +/// specular, 1). + +template +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> lit(T NDotL, T NDotH, T M) { + return __detail::lit_impl(NDotL, NDotH, M); +} + +template +const inline __detail::enable_if_t<__detail::is_arithmetic::Value && + __detail::is_same::value, + vector> kmpeng wrote: I've tried but clang-format keeps putting it back to this :\ https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
@@ -101,6 +101,16 @@ constexpr vector smoothstep_vec_impl(vector Min, vector Max, #endif } +template constexpr vector lit_impl(T NDotL, T NDotH, T M) { + bool DiffuseCond = NDotL < 0; + T Diffuse = select(DiffuseCond, 0, NDotL); + vector Result = {1, Diffuse, 0, 1}; + bool SpecularCond = or (DiffuseCond, (NDotH < 0)); kmpeng wrote: Adding the manual space is making it fail :\ Should I tell clang-format to ignore it? And how do I do that? https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
kmpeng wrote: I currently don't have `-fnative-half-type` enabled in that file, so yes this test case ends up having `float` input/return types (though it's not necessarily being converted in the function, if that's what you're asking). With `-fnative-half-type` enabled it doesn't convert to `float` https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [HLSL] Implement the `lit` intrinsic (PR #134171)
https://github.com/kmpeng edited https://github.com/llvm/llvm-project/pull/134171 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits