https://github.com/kcloudy0717 updated https://github.com/llvm/llvm-project/pull/205735
>From de9cdb6c854b65fbcf0f87c93b3ddfba1bc90ac2 Mon Sep 17 00:00:00 2001 From: Kai Huang <[email protected]> Date: Sun, 29 Mar 2026 00:25:42 +0800 Subject: [PATCH] [HLSL][DXIL][SPIRV] QuadReadLaneAt intrinsic support --- clang/include/clang/Basic/Builtins.td | 6 + clang/include/clang/Basic/HLSLIntrinsics.td | 16 ++ clang/lib/CodeGen/CGHLSLBuiltins.cpp | 9 + clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Sema/SemaHLSL.cpp | 3 +- .../CodeGenHLSL/builtins/QuadReadLaneAt.hlsl | 185 ++++++++++++++++++ .../BuiltIns/QuadReadLaneAt-errors.hlsl | 38 ++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + llvm/lib/Target/DirectX/DXIL.td | 10 + llvm/lib/Target/DirectX/DXILShaderFlags.cpp | 1 + .../DirectX/DirectXTargetTransformInfo.cpp | 1 + llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 3 + llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 1 + llvm/test/CodeGen/DirectX/QuadReadLaneAt.ll | 95 +++++++++ .../QuadReadLaneAt.constant.ll | 62 ++++++ .../hlsl-intrinsics/QuadReadLaneAt.uniform.ll | 65 ++++++ 18 files changed, 498 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenHLSL/builtins/QuadReadLaneAt.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/QuadReadLaneAt-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/QuadReadLaneAt.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.constant.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.uniform.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 63cdb787bea16..b37725498fbdc 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5555,6 +5555,12 @@ def HLSLWavePrefixProduct : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLQuadReadLaneAt : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_quad_read_lane_at"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLQuadReadAcrossX : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_quad_read_across_x"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/HLSLIntrinsics.td b/clang/include/clang/Basic/HLSLIntrinsics.td index 99259046940f1..5dd2cd47b9756 100644 --- a/clang/include/clang/Basic/HLSLIntrinsics.td +++ b/clang/include/clang/Basic/HLSLIntrinsics.td @@ -1322,6 +1322,22 @@ def hlsl_pow : HLSLTwoArgBuiltin<"pow", "__builtin_elementwise_pow"> { let VaryingMatDims = []; } +// Returns the value from the lane with the specified index in the quad. +def hlsl_quad_read_lane_at : HLSLBuiltin<"QuadReadLaneAt", "__builtin_hlsl_quad_read_lane_at"> { + let Doc = [{ +\brief Returns the value from the lane with the specified index in the quad. +\param Val The value to read. +\param Index The lane index. +}]; + let ReturnType = Varying; + let Args = [Varying, UIntTy]; + let VaryingTypes = AllTypesWithBool; + let VaryingScalar = 1; + let VaryingVecSizes = [2, 3, 4]; + let VaryingMatDims = []; + let IsConvergent = 1; +} + // Reads the value from the lane across the X axis of the quad. def hlsl_quad_read_across_x : HLSLOneArgBuiltin<"QuadReadAcrossX", diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 20a2119e28ce1..ce3af3ffd0467 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -1557,6 +1557,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitIntrinsicCall(IID, {OpExpr->getType()}, ArrayRef{OpExpr}, "hlsl.wave.prefix.product"); } + case Builtin::BI__builtin_hlsl_quad_read_lane_at: { + Value *OpExpr = EmitScalarExpr(E->getArg(0)); + Value *OpIndex = EmitScalarExpr(E->getArg(1)); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration( + &CGM.getModule(), CGM.getHLSLRuntime().getQuadReadLaneAtIntrinsic(), + {OpExpr->getType()}), + ArrayRef{OpExpr, OpIndex}, "hlsl.quad.read.lane.at"); + } case Builtin::BI__builtin_hlsl_quad_read_across_x: { Value *OpExpr = EmitScalarExpr(E->getArg(0)); Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic(); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index a126d4612a5f4..a75ffdba04a8d 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -160,6 +160,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveGetLaneCount, wave_get_lane_count) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) + GENERATE_HLSL_INTRINSIC_FUNCTION(QuadReadLaneAt, quad_read_lane_at) GENERATE_HLSL_INTRINSIC_FUNCTION(QuadReadAcrossX, quad_read_across_x) GENERATE_HLSL_INTRINSIC_FUNCTION(QuadReadAcrossY, quad_read_across_y) GENERATE_HLSL_INTRINSIC_FUNCTION(QuadReadAcrossDiagonal, diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 075dc97b0aef2..55c96e4f525a8 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -4613,7 +4613,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } - case Builtin::BI__builtin_hlsl_wave_read_lane_at: { + case Builtin::BI__builtin_hlsl_wave_read_lane_at: + case Builtin::BI__builtin_hlsl_quad_read_lane_at: { if (SemaRef.checkArgCount(TheCall, 2)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/QuadReadLaneAt.hlsl b/clang/test/CodeGenHLSL/builtins/QuadReadLaneAt.hlsl new file mode 100644 index 0000000000000..9242b5075dc10 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/QuadReadLaneAt.hlsl @@ -0,0 +1,185 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-compute %s -fnative-half-type -fnative-int16-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=dx -DCC="" +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=dx -DCC="" + +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,CHECK-NATIVE_HALF -DTARGET=spv -DCC="spir_func " +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NO_HALF -DTARGET=spv -DCC="spir_func " + +// CHECK: %[[RET:.*]] = call [[CC]]i1 @llvm.[[TARGET]].quad.read.lane.at.i1(i1 %[[VAR:.*]], i32 %[[#]]) +// CHECK: ret i1 %[[RET]] +bool test_bool(bool expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<2 x i1> @llvm.[[TARGET]].quad.read.lane.at.v2i1(<2 x i1> %[[VAR:.*]], i32 %[[#]]) +// CHECK: ret <2 x i1> %[[RET]] +bool2 test_bool2(bool2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<3 x i1> @llvm.[[TARGET]].quad.read.lane.at.v3i1(<3 x i1> %[[VAR:.*]], i32 %[[#]]) +// CHECK: ret <3 x i1> %[[RET]] +bool3 test_bool3(bool3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<4 x i1> @llvm.[[TARGET]].quad.read.lane.at.v4i1(<4 x i1> %[[VAR:.*]], i32 %[[#]]) +// CHECK: ret <4 x i1> %[[RET]] +bool4 test_bool4(bool4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]i32 @llvm.[[TARGET]].quad.read.lane.at.i32(i32 %[[#]], i32 %[[#]]) +// CHECK: ret i32 %[[RET]] +int test_int(int expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> @llvm.[[TARGET]].quad.read.lane.at.v2i32(<2 x i32> %[[#]], i32 %[[#]]) +// CHECK: ret <2 x i32> %[[RET]] +int2 test_int2(int2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> @llvm.[[TARGET]].quad.read.lane.at.v3i32(<3 x i32> %[[#]], i32 %[[#]]) +// CHECK: ret <3 x i32> %[[RET]] +int3 test_int3(int3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> @llvm.[[TARGET]].quad.read.lane.at.v4i32(<4 x i32> %[[#]], i32 %[[#]]) +// CHECK: ret <4 x i32> %[[RET]] +int4 test_int4(int4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]i32 @llvm.[[TARGET]].quad.read.lane.at.i32(i32 %[[#]], i32 %[[#]]) +// CHECK: ret i32 %[[RET]] +uint test_uint(uint expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<2 x i32> @llvm.[[TARGET]].quad.read.lane.at.v2i32(<2 x i32> %[[#]], i32 %[[#]]) +// CHECK: ret <2 x i32> %[[RET]] +uint2 test_uint2(uint2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<3 x i32> @llvm.[[TARGET]].quad.read.lane.at.v3i32(<3 x i32> %[[#]], i32 %[[#]]) +// CHECK: ret <3 x i32> %[[RET]] +uint3 test_uint3(uint3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<4 x i32> @llvm.[[TARGET]].quad.read.lane.at.v4i32(<4 x i32> %[[#]], i32 %[[#]]) +// CHECK: ret <4 x i32> %[[RET]] +uint4 test_uint4(uint4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]i64 @llvm.[[TARGET]].quad.read.lane.at.i64(i64 %[[#]], i32 %[[#]]) +// CHECK: ret i64 %[[RET]] +int64_t test_int64_t(int64_t expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> @llvm.[[TARGET]].quad.read.lane.at.v2i64(<2 x i64> %[[#]], i32 %[[#]]) +// CHECK: ret <2 x i64> %[[RET]] +int64_t2 test_int64_t2(int64_t2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> @llvm.[[TARGET]].quad.read.lane.at.v3i64(<3 x i64> %[[#]], i32 %[[#]]) +// CHECK: ret <3 x i64> %[[RET]] +int64_t3 test_int64_t3(int64_t3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> @llvm.[[TARGET]].quad.read.lane.at.v4i64(<4 x i64> %[[#]], i32 %[[#]]) +// CHECK: ret <4 x i64> %[[RET]] +int64_t4 test_int64_t4(int64_t4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]i64 @llvm.[[TARGET]].quad.read.lane.at.i64(i64 %[[#]], i32 %[[#]]) +// CHECK: ret i64 %[[RET]] +uint64_t test_uint64_t(uint64_t expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<2 x i64> @llvm.[[TARGET]].quad.read.lane.at.v2i64(<2 x i64> %[[#]], i32 %[[#]]) +// CHECK: ret <2 x i64> %[[RET]] +uint64_t2 test_uint64_t2(uint64_t2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<3 x i64> @llvm.[[TARGET]].quad.read.lane.at.v3i64(<3 x i64> %[[#]], i32 %[[#]]) +// CHECK: ret <3 x i64> %[[RET]] +uint64_t3 test_uint64_t3(uint64_t3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call [[CC]]<4 x i64> @llvm.[[TARGET]].quad.read.lane.at.v4i64(<4 x i64> %[[#]], i32 %[[#]]) +// CHECK: ret <4 x i64> %[[RET]] +uint64_t4 test_uint64_t4(uint64_t4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float @llvm.[[TARGET]].quad.read.lane.at.f32(float %[[#]], i32 %[[#]]) +// CHECK: ret float %[[RET]] +float test_float(float expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> @llvm.[[TARGET]].quad.read.lane.at.v2f32(<2 x float> %[[#]], i32 %[[#]]) +// CHECK: ret <2 x float> %[[RET]] +float2 test_float2(float2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> @llvm.[[TARGET]].quad.read.lane.at.v3f32(<3 x float> %[[#]], i32 %[[#]]) +// CHECK: ret <3 x float> %[[RET]] +float3 test_float3(float3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> @llvm.[[TARGET]].quad.read.lane.at.v4f32(<4 x float> %[[#]], i32 %[[#]]) +// CHECK: ret <4 x float> %[[RET]] +float4 test_float4(float4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]double @llvm.[[TARGET]].quad.read.lane.at.f64(double %[[#]], i32 %[[#]]) +// CHECK: ret double %[[RET]] +double test_double(double expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x double> @llvm.[[TARGET]].quad.read.lane.at.v2f64(<2 x double> %[[#]], i32 %[[#]]) +// CHECK: ret <2 x double> %[[RET]] +double2 test_double2(double2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x double> @llvm.[[TARGET]].quad.read.lane.at.v3f64(<3 x double> %[[#]], i32 %[[#]]) +// CHECK: ret <3 x double> %[[RET]] +double3 test_double3(double3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x double> @llvm.[[TARGET]].quad.read.lane.at.v4f64(<4 x double> %[[#]], i32 %[[#]]) +// CHECK: ret <4 x double> %[[RET]] +double4 test_double4(double4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]half @llvm.[[TARGET]].quad.read.lane.at.f16(half %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret half %[[RET]] +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]float @llvm.[[TARGET]].quad.read.lane.at.f32(float %[[#]], i32 %[[#]]) +// CHECK-NO_HALF: ret float %[[RET]] +half test_half(half expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x half> @llvm.[[TARGET]].quad.read.lane.at.v2f16(<2 x half> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <2 x half> %[[RET]] +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<2 x float> @llvm.[[TARGET]].quad.read.lane.at.v2f32(<2 x float> %[[#]], i32 %[[#]]) +// CHECK-NO_HALF: ret <2 x float> %[[RET]] +half2 test_half2(half2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x half> @llvm.[[TARGET]].quad.read.lane.at.v3f16(<3 x half> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <3 x half> %[[RET]] +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<3 x float> @llvm.[[TARGET]].quad.read.lane.at.v3f32(<3 x float> %[[#]], i32 %[[#]]) +// CHECK-NO_HALF: ret <3 x float> %[[RET]] +half3 test_half3(half3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x half> @llvm.[[TARGET]].quad.read.lane.at.v4f16(<4 x half> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <4 x half> %[[RET]] +// CHECK-NO_HALF: %[[RET:.*]] = call reassoc nnan ninf nsz arcp afn [[CC]]<4 x float> @llvm.[[TARGET]].quad.read.lane.at.v4f32(<4 x float> %[[#]], i32 %[[#]]) +// CHECK-NO_HALF: ret <4 x float> %[[RET]] +half4 test_half4(half4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +#ifdef __HLSL_ENABLE_16_BIT +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 @llvm.[[TARGET]].quad.read.lane.at.i16(i16 %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret i16 %[[RET]] +int16_t test_int16_t(int16_t expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> @llvm.[[TARGET]].quad.read.lane.at.v2i16(<2 x i16> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]] +int16_t2 test_int16_t2(int16_t2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> @llvm.[[TARGET]].quad.read.lane.at.v3i16(<3 x i16> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]] +int16_t3 test_int16_t3(int16_t3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> @llvm.[[TARGET]].quad.read.lane.at.v4i16(<4 x i16> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]] +int16_t4 test_int16_t4(int16_t4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]i16 @llvm.[[TARGET]].quad.read.lane.at.i16(i16 %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret i16 %[[RET]] +uint16_t test_uint16_t(uint16_t expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<2 x i16> @llvm.[[TARGET]].quad.read.lane.at.v2i16(<2 x i16> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <2 x i16> %[[RET]] +uint16_t2 test_uint16_t2(uint16_t2 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<3 x i16> @llvm.[[TARGET]].quad.read.lane.at.v3i16(<3 x i16> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <3 x i16> %[[RET]] +uint16_t3 test_uint16_t3(uint16_t3 expr, uint idx) { return QuadReadLaneAt(expr, idx); } + +// CHECK-NATIVE_HALF: %[[RET:.*]] = call [[CC]]<4 x i16> @llvm.[[TARGET]].quad.read.lane.at.v4i16(<4 x i16> %[[#]], i32 %[[#]]) +// CHECK-NATIVE_HALF: ret <4 x i16> %[[RET]] +uint16_t4 test_uint16_t4(uint16_t4 expr, uint idx) { return QuadReadLaneAt(expr, idx); } +#endif diff --git a/clang/test/SemaHLSL/BuiltIns/QuadReadLaneAt-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/QuadReadLaneAt-errors.hlsl new file mode 100644 index 0000000000000..38d22b1f44772 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/QuadReadLaneAt-errors.hlsl @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -verify + +bool test_too_few_arg() { + return __builtin_hlsl_quad_read_lane_at(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} +} + +float2 test_too_few_arg_1(float2 p0) { + return __builtin_hlsl_quad_read_lane_at(p0); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} +} + +float2 test_too_many_arg(float2 p0) { + return __builtin_hlsl_quad_read_lane_at(p0, p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} +} + +float3 test_index_double_type_check(float3 p0, double idx) { + return __builtin_hlsl_quad_read_lane_at(p0, idx); + // expected-error@-1 {{passing 'double' to parameter of incompatible type 'unsigned int'}} +} + +float3 test_index_int3_type_check(float3 p0, int3 idxs) { + return __builtin_hlsl_quad_read_lane_at(p0, idxs); + // expected-error@-1 {{passing 'int3' (aka 'vector<int, 3>') to parameter of incompatible type 'unsigned int'}} +} + +struct S { float f; }; + +float3 test_index_S_type_check(float3 p0, S idx) { + return __builtin_hlsl_quad_read_lane_at(p0, idx); + // expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}} +} + +S test_expr_struct_type_check(S p0, int idx) { + return __builtin_hlsl_quad_read_lane_at(p0, idx); + // expected-error@-1 {{invalid operand of type 'S' where a scalar or vector is required}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index af360dfc78965..cbcce3bd6b6bc 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -277,6 +277,7 @@ def int_dx_wave_prefix_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType def int_dx_wave_prefix_usum : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_prefix_product : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_prefix_uproduct : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; +def int_dx_quad_read_lane_at : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_quad_read_across_x : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_quad_read_across_y : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_quad_read_across_diagonal : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 6e4cf8f7e72dc..8c76ef3f9e6c7 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -166,6 +166,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent]>; def int_spv_wave_prefix_sum : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; def int_spv_wave_prefix_product : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; + def int_spv_quad_read_lane_at : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_spv_quad_read_across_x : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; def int_spv_quad_read_across_y : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; def int_spv_quad_read_across_diagonal : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 299d2d113b6bf..7ab9f99a911bb 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -1310,6 +1310,16 @@ def WavePrefixOp : DXILOp<121, wavePrefixOp> { let attributes = [Attributes<DXIL1_0, []>]; } +def QuadReadLaneAt : DXILOp<122, quadReadLaneAt> { + let Doc = "returns the value from the specified lane in the quad"; + let intrinsics = [IntrinSelect<int_dx_quad_read_lane_at>]; + let arguments = [OverloadTy, Int32Ty]; + let result = OverloadTy; + let overloads = [Overloads< + DXIL1_0, [HalfTy, FloatTy, DoubleTy, Int1Ty, Int16Ty, Int32Ty, Int64Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; +} + def QuadOp : DXILOp<123, quadOp> { let Doc = "returns the value from another lane within the quad by swapping values in a direction"; let intrinsics = [ diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp index 3a9ff9f62361a..da699667efcd1 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp @@ -107,6 +107,7 @@ static bool checkWaveOps(Intrinsic::ID IID) { case Intrinsic::dx_wave_prefix_product: case Intrinsic::dx_wave_prefix_uproduct: // Quad Op Variants + case Intrinsic::dx_quad_read_lane_at: case Intrinsic::dx_quad_read_across_x: case Intrinsic::dx_quad_read_across_y: case Intrinsic::dx_quad_read_across_diagonal: diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index af1d7bc452126..292007796b682 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -19,6 +19,7 @@ bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg( Intrinsic::ID ID, unsigned ScalarOpdIdx) const { switch (ID) { case Intrinsic::dx_wave_readlane: + case Intrinsic::dx_quad_read_lane_at: return ScalarOpdIdx == 1; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 66e5d2f6a626e..64379cf98987a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -855,6 +855,7 @@ def OpGroupNonUniformBitwiseXor: OpGroupNUGroup<"BitwiseXor", 361>; def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>; def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>; def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>; +def OpGroupNonUniformQuadBroadcast : OpGroupNU4<"QuadBroadcast", 365>; def OpGroupNonUniformQuadSwap: OpGroupNU4<"QuadSwap", 366>; // SPV_KHR_subgroup_rotate diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index cd99015a61ba9..70cc025186aff 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -5382,6 +5382,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectWaveExclusiveScanSum(ResVReg, ResType, I); case Intrinsic::spv_wave_prefix_product: return selectWaveExclusiveScanProduct(ResVReg, ResType, I); + case Intrinsic::spv_quad_read_lane_at: + return selectWaveOpInst(ResVReg, ResType, I, + SPIRV::OpGroupNonUniformQuadBroadcast); case Intrinsic::spv_quad_read_across_x: { return selectQuadSwap(ResVReg, ResType, I, /*Direction*/ 0); } diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index aed16fd785af8..697b47d07866d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1832,6 +1832,7 @@ void addInstrRequirements(const MachineInstr &MI, } break; } + case SPIRV::OpGroupNonUniformQuadBroadcast: case SPIRV::OpGroupNonUniformQuadSwap: Reqs.addCapability(SPIRV::Capability::GroupNonUniformQuad); break; diff --git a/llvm/test/CodeGen/DirectX/QuadReadLaneAt.ll b/llvm/test/CodeGen/DirectX/QuadReadLaneAt.ll new file mode 100644 index 0000000000000..1ee2703c94ae0 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/QuadReadLaneAt.ll @@ -0,0 +1,95 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s + +; Test that for scalar values, QuadReadLaneAt maps down to the DirectX op + +define noundef i1 @quad_read_lane_at_bool(i1 noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call i1 @dx.op.quadReadLaneAt.i1(i32 122, i1 %expr, i32 %idx) + %ret = call i1 @llvm.dx.quad.read.lane.at.i1(i1 %expr, i32 %idx) + ret i1 %ret +} + +define noundef half @quad_read_lane_at_half(half noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call half @dx.op.quadReadLaneAt.f16(i32 122, half %expr, i32 %idx) + %ret = call half @llvm.dx.quad.read.lane.at.f16(half %expr, i32 %idx) + ret half %ret +} + +define noundef float @quad_read_lane_at_float(float noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call float @dx.op.quadReadLaneAt.f32(i32 122, float %expr, i32 %idx) + %ret = call float @llvm.dx.quad.read.lane.at.f32(float %expr, i32 %idx) + ret float %ret +} + +define noundef double @quad_read_lane_at_double(double noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call double @dx.op.quadReadLaneAt.f64(i32 122, double %expr, i32 %idx) + %ret = call double @llvm.dx.quad.read.lane.at.f64(double %expr, i32 %idx) + ret double %ret +} + +define noundef i16 @quad_read_lane_at_i16(i16 noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call i16 @dx.op.quadReadLaneAt.i16(i32 122, i16 %expr, i32 %idx) + %ret = call i16 @llvm.dx.quad.read.lane.at.i16(i16 %expr, i32 %idx) + ret i16 %ret +} + +define noundef i32 @quad_read_lane_at_i32(i32 noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call i32 @dx.op.quadReadLaneAt.i32(i32 122, i32 %expr, i32 %idx) + %ret = call i32 @llvm.dx.quad.read.lane.at.i32(i32 %expr, i32 %idx) + ret i32 %ret +} + +define noundef i64 @quad_read_lane_at_i64(i64 noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call i64 @dx.op.quadReadLaneAt.i64(i32 122, i64 %expr, i32 %idx) + %ret = call i64 @llvm.dx.quad.read.lane.at.i64(i64 %expr, i32 %idx) + ret i64 %ret +} + +declare i1 @llvm.dx.quad.read.lane.at.i1(i1, i32) +declare half @llvm.dx.quad.read.lane.at.f16(half, i32) +declare float @llvm.dx.quad.read.lane.at.f32(float, i32) +declare double @llvm.dx.quad.read.lane.at.f64(double, i32) + +declare i16 @llvm.dx.quad.read.lane.at.i16(i16, i32) +declare i32 @llvm.dx.quad.read.lane.at.i32(i32, i32) +declare i64 @llvm.dx.quad.read.lane.at.i64(i64, i32) + +; Test that for vector values, QuadReadLaneAt scalarizes and maps down to the +; DirectX op + +define noundef <2 x half> @quad_read_lane_at_v2half(<2 x half> noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call half @dx.op.quadReadLaneAt.f16(i32 122, half %expr.i0, i32 %idx) +; CHECK: call half @dx.op.quadReadLaneAt.f16(i32 122, half %expr.i1, i32 %idx) + %ret = call <2 x half> @llvm.dx.quad.read.lane.at.v2f16(<2 x half> %expr, i32 %idx) + ret <2 x half> %ret +} + +define noundef <3 x i32> @quad_read_lane_at_v3i32(<3 x i32> noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call i32 @dx.op.quadReadLaneAt.i32(i32 122, i32 %expr.i0, i32 %idx) +; CHECK: call i32 @dx.op.quadReadLaneAt.i32(i32 122, i32 %expr.i1, i32 %idx) +; CHECK: call i32 @dx.op.quadReadLaneAt.i32(i32 122, i32 %expr.i2, i32 %idx) + %ret = call <3 x i32> @llvm.dx.quad.read.lane.at.v3i32(<3 x i32> %expr, i32 %idx) + ret <3 x i32> %ret +} + +define noundef <4 x double> @quad_read_lane_at_v4f64(<4 x double> noundef %expr, i32 noundef %idx) { +entry: +; CHECK: call double @dx.op.quadReadLaneAt.f64(i32 122, double %expr.i0, i32 %idx) +; CHECK: call double @dx.op.quadReadLaneAt.f64(i32 122, double %expr.i1, i32 %idx) +; CHECK: call double @dx.op.quadReadLaneAt.f64(i32 122, double %expr.i2, i32 %idx) +; CHECK: call double @dx.op.quadReadLaneAt.f64(i32 122, double %expr.i3, i32 %idx) + %ret = call <4 x double> @llvm.dx.quad.read.lane.at.v4f64(<4 x double> %expr, i32 %idx) + ret <4 x double> %ret +} + +declare <2 x half> @llvm.dx.quad.read.lane.at.v2f16(<2 x half>, i32) +declare <3 x i32> @llvm.dx.quad.read.lane.at.v3i32(<3 x i32>, i32) +declare <4 x double> @llvm.dx.quad.read.lane.at.v4f64(<4 x double>, i32) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.constant.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.constant.ll new file mode 100644 index 0000000000000..0e4258ba65a6d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.constant.ll @@ -0,0 +1,62 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan1.3 %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3 %s -o - -filetype=obj | spirv-val --target-env vulkan1.3 %} + +; Test lowering to spir-v backend for various types and scalar/vector +; This tests pre SPIRV 1.5 where index must be a constant + +; CHECK: OpCapability GroupNonUniformQuad + +; CHECK-DAG: %[[#bool:]] = OpTypeBool +; CHECK-DAG: %[[#f16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#f32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#v4_half:]] = OpTypeVector %[[#f16]] 4 +; CHECK-DAG: %[[#scope:]] = OpConstant %[[#uint]] 3 +; CHECK-DAG: %[[#uint_0:]] = OpConstant %[[#uint]] 0 + +; CHECK-LABEL: Begin function test_bool +; CHECK: %[[#bexpr:]] = OpFunctionParameter %[[#bool]] +define internal i1 @test_bool(i1 %bexpr) { +entry: +; CHECK: %[[#bret:]] = OpGroupNonUniformQuadBroadcast %[[#bool]] %[[#scope]] %[[#bexpr]] %[[#uint_0]] + %0 = call i1 @llvm.spv.quad.read.lane.at.i1(i1 %bexpr, i32 0) + ret i1 %0 +} + +; CHECK-LABEL: Begin function test_float +; CHECK: %[[#fexpr:]] = OpFunctionParameter %[[#f32]] +define internal float @test_float(float %fexpr) { +entry: +; CHECK: %[[#fret:]] = OpGroupNonUniformQuadBroadcast %[[#f32]] %[[#scope]] %[[#fexpr]] %[[#uint_0]] + %0 = call float @llvm.spv.quad.read.lane.at.f32(float %fexpr, i32 0) + ret float %0 +} + +; CHECK-LABEL: Begin function test_int +; CHECK: %[[#iexpr:]] = OpFunctionParameter %[[#uint]] +define internal i32 @test_int(i32 %iexpr) { +entry: +; CHECK: %[[#iret:]] = OpGroupNonUniformQuadBroadcast %[[#uint]] %[[#scope]] %[[#iexpr]] %[[#uint_0]] + %0 = call i32 @llvm.spv.quad.read.lane.at.i32(i32 %iexpr, i32 0) + ret i32 %0 +} + +; CHECK-LABEL: Begin function test_vhalf +; CHECK: %[[#vbexpr:]] = OpFunctionParameter %[[#v4_half]] +define internal <4 x half> @test_vhalf(<4 x half> %vbexpr) { +entry: +; CHECK: %[[#vhalfret:]] = OpGroupNonUniformQuadBroadcast %[[#v4_half]] %[[#scope]] %[[#vbexpr]] %[[#uint_0]] + %0 = call <4 x half> @llvm.spv.quad.read.lane.at.v4half(<4 x half> %vbexpr, i32 0) + ret <4 x half> %0 +} + +define void @main() #0 { + ret void +} + +declare i1 @llvm.spv.quad.read.lane.at.i1(i1, i32) +declare float @llvm.spv.quad.read.lane.at.f32(float, i32) +declare i32 @llvm.spv.quad.read.lane.at.i32(i32, i32) +declare <4 x half> @llvm.spv.quad.read.lane.at.v4half(<4 x half>, i32) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.uniform.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.uniform.ll new file mode 100644 index 0000000000000..f3818a6828f86 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/QuadReadLaneAt.uniform.ll @@ -0,0 +1,65 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv1.5-vulkan-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-unknown %s -o - -filetype=obj | spirv-val %} + +; Test lowering to spir-v backend for various types and scalar/vector +; This tests SPIRV 1.5 where index must be dynamically uniform + +; CHECK: OpCapability GroupNonUniformQuad + +; CHECK-DAG: %[[#bool:]] = OpTypeBool +; CHECK-DAG: %[[#f16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#f32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#v4_half:]] = OpTypeVector %[[#f16]] 4 +; CHECK-DAG: %[[#scope:]] = OpConstant %[[#uint]] 3 + +; CHECK-LABEL: Begin function test_bool +; CHECK: %[[#bexpr:]] = OpFunctionParameter %[[#bool]] +; CHECK: %[[#idx:]] = OpFunctionParameter %[[#uint]] +define internal i1 @test_bool(i1 %bexpr, i32 %idx) { +entry: +; CHECK: %[[#bret:]] = OpGroupNonUniformQuadBroadcast %[[#bool]] %[[#scope]] %[[#bexpr]] %[[#idx]] + %0 = call i1 @llvm.spv.quad.read.lane.at.i1(i1 %bexpr, i32 %idx) + ret i1 %0 +} + +; CHECK-LABEL: Begin function test_float +; CHECK: %[[#fexpr:]] = OpFunctionParameter %[[#f32]] +; CHECK: %[[#idx:]] = OpFunctionParameter %[[#uint]] +define internal float @test_float(float %fexpr, i32 %idx) { +entry: +; CHECK: %[[#fret:]] = OpGroupNonUniformQuadBroadcast %[[#f32]] %[[#scope]] %[[#fexpr]] %[[#idx]] + %0 = call float @llvm.spv.quad.read.lane.at.f32(float %fexpr, i32 %idx) + ret float %0 +} + +; CHECK-LABEL: Begin function test_int +; CHECK: %[[#iexpr:]] = OpFunctionParameter %[[#uint]] +; CHECK: %[[#idx:]] = OpFunctionParameter %[[#uint]] +define internal i32 @test_int(i32 %iexpr, i32 %idx) { +entry: +; CHECK: %[[#iret:]] = OpGroupNonUniformQuadBroadcast %[[#uint]] %[[#scope]] %[[#iexpr]] %[[#idx]] + %0 = call i32 @llvm.spv.quad.read.lane.at.i32(i32 %iexpr, i32 %idx) + ret i32 %0 +} + +; CHECK-LABEL: Begin function test_vhalf +; CHECK: %[[#vbexpr:]] = OpFunctionParameter %[[#v4_half]] +; CHECK: %[[#idx:]] = OpFunctionParameter %[[#uint]] +define internal <4 x half> @test_vhalf(<4 x half> %vbexpr, i32 %idx) { +entry: +; CHECK: %[[#vhalfret:]] = OpGroupNonUniformQuadBroadcast %[[#v4_half]] %[[#scope]] %[[#vbexpr]] %[[#idx]] + %0 = call <4 x half> @llvm.spv.quad.read.lane.at.v4half(<4 x half> %vbexpr, i32 %idx) + ret <4 x half> %0 +} + +define void @main() #0 { + ret void +} + +declare i1 @llvm.spv.quad.read.lane.at.i1(i1, i32) +declare float @llvm.spv.quad.read.lane.at.f32(float, i32) +declare i32 @llvm.spv.quad.read.lane.at.i32(i32, i32) +declare <4 x half> @llvm.spv.quad.read.lane.at.v4half(<4 x half>, i32) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
