https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/89130
>From f1406f8ee7ef0db485186606d5c373322596765a Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <farzonlo...@microsoft.com> Date: Wed, 17 Apr 2024 15:25:33 -0400 Subject: [PATCH 1/2] [SPIRV][HLSL] Add mad intrinsic lowering for spirv --- clang/lib/CodeGen/CGBuiltin.cpp | 52 ++- clang/lib/CodeGen/CGHLSLRuntime.cpp | 15 + clang/lib/CodeGen/CGHLSLRuntime.h | 9 +- clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/test/CodeGenHLSL/builtins/mad.hlsl | 240 ++++++++------ llvm/test/CodeGen/DirectX/fmad.ll | 12 +- .../CodeGen/SPIRV/hlsl-intrinsics/fmad.ll | 12 +- .../CodeGen/SPIRV/hlsl-intrinsics/imad.ll | 302 ++++++++++++++++++ 8 files changed, 521 insertions(+), 122 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a05874e63c73c2..3f9f5aa9b42ed0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18186,6 +18186,40 @@ Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) { return Intrinsic::dx_udot; } +Value *CodeGenFunction::EmitHLSLMadIntrinsic(const CallExpr *E) { + Value *M = EmitScalarExpr(E->getArg(0)); + Value *A = EmitScalarExpr(E->getArg(1)); + Value *B = EmitScalarExpr(E->getArg(2)); + if (E->getArg(0)->getType()->hasFloatingRepresentation()) + return Builder.CreateIntrinsic( + /*ReturnType*/ M->getType(), Intrinsic::fmuladd, + ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad"); + + auto EmitHLSLIMadDirectX = [E, M, A, B, this]() -> Value * { + if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) + return Builder.CreateIntrinsic( + /*ReturnType*/ M->getType(), Intrinsic::dx_imad, + ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad"); + assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); + return Builder.CreateIntrinsic( + /*ReturnType=*/M->getType(), Intrinsic::dx_umad, + ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad"); + }; + + auto EmitHLSLIMadGeneric = [E, M, A, B, this]() -> Value * { + if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { + Value *Mul = Builder.CreateNSWMul(M, A); + return Builder.CreateNSWAdd(Mul, B); + } + assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); + Value *Mul = Builder.CreateNUWMul(M, A); + return Builder.CreateNUWAdd(Mul, B); + }; + + return CGM.getHLSLRuntime().emitHLSLIntrinsic( + EmitHLSLIMadDirectX, EmitHLSLIMadGeneric, EmitHLSLIMadGeneric); +} + Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (!getLangOpts().HLSL) @@ -18291,23 +18325,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef<Value *>{Op0}, nullptr, "dx.isinf"); } case Builtin::BI__builtin_hlsl_mad: { - Value *M = EmitScalarExpr(E->getArg(0)); - Value *A = EmitScalarExpr(E->getArg(1)); - Value *B = EmitScalarExpr(E->getArg(2)); - if (E->getArg(0)->getType()->hasFloatingRepresentation()) { - return Builder.CreateIntrinsic( - /*ReturnType*/ M->getType(), Intrinsic::fmuladd, - ArrayRef<Value *>{M, A, B}, nullptr, "dx.fmad"); - } - if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { - return Builder.CreateIntrinsic( - /*ReturnType*/ M->getType(), Intrinsic::dx_imad, - ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad"); - } - assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); - return Builder.CreateIntrinsic( - /*ReturnType=*/M->getType(), Intrinsic::dx_umad, - ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad"); + return EmitHLSLMadIntrinsic(E); } case Builtin::BI__builtin_hlsl_elementwise_rcp: { Value *Op0 = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 5e6a3dd4878f46..1c801a4d1b06a0 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -119,6 +119,21 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { return CGM.getTarget().getTriple().getArch(); } +Value * +CGHLSLRuntime::emitHLSLIntrinsic(llvm::function_ref<Value *()> DxilEmitter, + llvm::function_ref<Value *()> SPIRVEmitter, + llvm::function_ref<Value *()> GenericEmitter) { + llvm::Triple::ArchType Arch = getArch(); + switch (Arch) { + case llvm::Triple::dxil: + return DxilEmitter(); + case llvm::Triple::spirv: + return SPIRVEmitter(); + default: + return GenericEmitter(); + } +} + void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) { if (D->getStorageClass() == SC_Static) { // For static inside cbuffer, take as global static. diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 506b364f5b2ec7..923cf2140c13f0 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -50,6 +50,7 @@ namespace llvm { class GlobalVariable; class Function; class StructType; +class Value; } // namespace llvm namespace clang { @@ -79,7 +80,13 @@ class CGHLSLRuntime { //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. //===----------------------------------------------------------------------===// - + llvm::Value *emitHLSLIntrinsic( + llvm::function_ref<llvm::Value *()> DxilEmitter, + llvm::function_ref<llvm::Value *()> SPIRVEmitter, + llvm::function_ref<llvm::Value *()> GenericEmitter = + []() -> llvm::Value * { + llvm_unreachable("Intrinsic not supported by target architecture."); + }); struct BufferResBinding { // The ID like 2 in register(b2, space1). std::optional<unsigned> Reg; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ff1873325d409f..f0e923949f2047 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4548,6 +4548,7 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitHLSLMadIntrinsic(const CallExpr *E); llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/clang/test/CodeGenHLSL/builtins/mad.hlsl b/clang/test/CodeGenHLSL/builtins/mad.hlsl index 749eac6d64736d..bd4f38067a5c59 100644 --- a/clang/test/CodeGenHLSL/builtins/mad.hlsl +++ b/clang/test/CodeGenHLSL/builtins/mad.hlsl @@ -1,182 +1,238 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ -// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF + +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_CHECK #ifdef __HLSL_ENABLE_16_BIT -// NATIVE_HALF: %dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2) -// NATIVE_HALF: ret i16 %dx.umad +// DXIL_NATIVE_HALF: %dx.umad = call i16 @llvm.dx.umad.i16(i16 %0, i16 %1, i16 %2) +// DXIL_NATIVE_HALF: ret i16 %dx.umad +// SPIR_NATIVE_HALF: mul nuw i16 %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nuw i16 %{{.*}}, %{{.*}} uint16_t test_mad_uint16_t(uint16_t p0, uint16_t p1, uint16_t p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.umad = call <2 x i16> @llvm.dx.umad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2) -// NATIVE_HALF: ret <2 x i16> %dx.umad +// DXIL_NATIVE_HALF: %dx.umad = call <2 x i16> @llvm.dx.umad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2) +// DXIL_NATIVE_HALF: ret <2 x i16> %dx.umad +// SPIR_NATIVE_HALF: mul nuw <2 x i16> %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nuw <2 x i16> %{{.*}}, %{{.*}} uint16_t2 test_mad_uint16_t2(uint16_t2 p0, uint16_t2 p1, uint16_t2 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.umad = call <3 x i16> @llvm.dx.umad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2) -// NATIVE_HALF: ret <3 x i16> %dx.umad +// DXIL_NATIVE_HALF: %dx.umad = call <3 x i16> @llvm.dx.umad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2) +// DXIL_NATIVE_HALF: ret <3 x i16> %dx.umad +// SPIR_NATIVE_HALF: mul nuw <3 x i16> %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nuw <3 x i16> %{{.*}}, %{{.*}} uint16_t3 test_mad_uint16_t3(uint16_t3 p0, uint16_t3 p1, uint16_t3 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) -// NATIVE_HALF: ret <4 x i16> %dx.umad +// DXIL_NATIVE_HALF: %dx.umad = call <4 x i16> @llvm.dx.umad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) +// DXIL_NATIVE_HALF: ret <4 x i16> %dx.umad +// SPIR_NATIVE_HALF: mul nuw <4 x i16> %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nuw <4 x i16> %{{.*}}, %{{.*}} uint16_t4 test_mad_uint16_t4(uint16_t4 p0, uint16_t4 p1, uint16_t4 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2) -// NATIVE_HALF: ret i16 %dx.imad +// DXIL_NATIVE_HALF: %dx.imad = call i16 @llvm.dx.imad.i16(i16 %0, i16 %1, i16 %2) +// DXIL_NATIVE_HALF: ret i16 %dx.imad +// SPIR_NATIVE_HALF: mul nsw i16 %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nsw i16 %{{.*}}, %{{.*}} int16_t test_mad_int16_t(int16_t p0, int16_t p1, int16_t p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.imad = call <2 x i16> @llvm.dx.imad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2) -// NATIVE_HALF: ret <2 x i16> %dx.imad +// DXIL_NATIVE_HALF: %dx.imad = call <2 x i16> @llvm.dx.imad.v2i16(<2 x i16> %0, <2 x i16> %1, <2 x i16> %2) +// DXIL_NATIVE_HALF: ret <2 x i16> %dx.imad +// SPIR_NATIVE_HALF: mul nsw <2 x i16> %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nsw <2 x i16> %{{.*}}, %{{.*}} int16_t2 test_mad_int16_t2(int16_t2 p0, int16_t2 p1, int16_t2 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.imad = call <3 x i16> @llvm.dx.imad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2) -// NATIVE_HALF: ret <3 x i16> %dx.imad +// DXIL_NATIVE_HALF: %dx.imad = call <3 x i16> @llvm.dx.imad.v3i16(<3 x i16> %0, <3 x i16> %1, <3 x i16> %2) +// DXIL_NATIVE_HALF: ret <3 x i16> %dx.imad +// SPIR_NATIVE_HALF: mul nsw <3 x i16> %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nsw <3 x i16> %{{.*}}, %{{.*}} int16_t3 test_mad_int16_t3(int16_t3 p0, int16_t3 p1, int16_t3 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.imad = call <4 x i16> @llvm.dx.imad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) -// NATIVE_HALF: ret <4 x i16> %dx.imad +// DXIL_NATIVE_HALF: %dx.imad = call <4 x i16> @llvm.dx.imad.v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) +// DXIL_NATIVE_HALF: ret <4 x i16> %dx.imad +// SPIR_NATIVE_HALF: mul nsw <4 x i16> %{{.*}}, %{{.*}} +// SPIR_NATIVE_HALF: add nsw <4 x i16> %{{.*}}, %{{.*}} int16_t4 test_mad_int16_t4(int16_t4 p0, int16_t4 p1, int16_t4 p2) { return mad(p0, p1, p2); } #endif // __HLSL_ENABLE_16_BIT -// NATIVE_HALF: %dx.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2) -// NATIVE_HALF: ret half %dx.fmad -// NO_HALF: %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2) -// NO_HALF: ret float %dx.fmad +// NATIVE_HALF: %hlsl.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2) +// NATIVE_HALF: ret half %hlsl.fmad +// NO_HALF: %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2) +// NO_HALF: ret float %hlsl.fmad half test_mad_half(half p0, half p1, half p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2) -// NATIVE_HALF: ret <2 x half> %dx.fmad -// NO_HALF: %dx.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) -// NO_HALF: ret <2 x float> %dx.fmad +// NATIVE_HALF: %hlsl.fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2) +// NATIVE_HALF: ret <2 x half> %hlsl.fmad +// NO_HALF: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) +// NO_HALF: ret <2 x float> %hlsl.fmad half2 test_mad_half2(half2 p0, half2 p1, half2 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.fmad = call <3 x half> @llvm.fmuladd.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2) -// NATIVE_HALF: ret <3 x half> %dx.fmad -// NO_HALF: %dx.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2) -// NO_HALF: ret <3 x float> %dx.fmad +// NATIVE_HALF: %hlsl.fmad = call <3 x half> @llvm.fmuladd.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2) +// NATIVE_HALF: ret <3 x half> %hlsl.fmad +// NO_HALF: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2) +// NO_HALF: ret <3 x float> %hlsl.fmad half3 test_mad_half3(half3 p0, half3 p1, half3 p2) { return mad(p0, p1, p2); } -// NATIVE_HALF: %dx.fmad = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2) -// NATIVE_HALF: ret <4 x half> %dx.fmad -// NO_HALF: %dx.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2) -// NO_HALF: ret <4 x float> %dx.fmad +// NATIVE_HALF: %hlsl.fmad = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2) +// NATIVE_HALF: ret <4 x half> %hlsl.fmad +// NO_HALF: %hlsl.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2) +// NO_HALF: ret <4 x float> %hlsl.fmad half4 test_mad_half4(half4 p0, half4 p1, half4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2) -// CHECK: ret float %dx.fmad +// CHECK: %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2) +// CHECK: ret float %hlsl.fmad float test_mad_float(float p0, float p1, float p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) -// CHECK: ret <2 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) +// CHECK: ret <2 x float> %hlsl.fmad float2 test_mad_float2(float2 p0, float2 p1, float2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2) -// CHECK: ret <3 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2) +// CHECK: ret <3 x float> %hlsl.fmad float3 test_mad_float3(float3 p0, float3 p1, float3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2) -// CHECK: ret <4 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2) +// CHECK: ret <4 x float> %hlsl.fmad float4 test_mad_float4(float4 p0, float4 p1, float4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2) -// CHECK: ret double %dx.fmad +// CHECK: %hlsl.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2) +// CHECK: ret double %hlsl.fmad double test_mad_double(double p0, double p1, double p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2) -// CHECK: ret <2 x double> %dx.fmad +// CHECK: %hlsl.fmad = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %0, <2 x double> %1, <2 x double> %2) +// CHECK: ret <2 x double> %hlsl.fmad double2 test_mad_double2(double2 p0, double2 p1, double2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %0, <3 x double> %1, <3 x double> %2) -// CHECK: ret <3 x double> %dx.fmad +// CHECK: %hlsl.fmad = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %0, <3 x double> %1, <3 x double> %2) +// CHECK: ret <3 x double> %hlsl.fmad double3 test_mad_double3(double3 p0, double3 p1, double3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2) -// CHECK: ret <4 x double> %dx.fmad +// CHECK: %hlsl.fmad = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %0, <4 x double> %1, <4 x double> %2) +// CHECK: ret <4 x double> %hlsl.fmad double4 test_mad_double4(double4 p0, double4 p1, double4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2) -// CHECK: ret i32 %dx.imad +// DXIL_CHECK: %dx.imad = call i32 @llvm.dx.imad.i32(i32 %0, i32 %1, i32 %2) +// DXIL_CHECK: ret i32 %dx.imad +// SPIR_CHECK: mul nsw i32 %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw i32 %{{.*}}, %{{.*}} int test_mad_int(int p0, int p1, int p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call <2 x i32> @llvm.dx.imad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) -// CHECK: ret <2 x i32> %dx.imad +// DXIL_CHECK: %dx.imad = call <2 x i32> @llvm.dx.imad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) +// DXIL_CHECK: ret <2 x i32> %dx.imad +// SPIR_CHECK: mul nsw <2 x i32> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw <2 x i32> %{{.*}}, %{{.*}} int2 test_mad_int2(int2 p0, int2 p1, int2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call <3 x i32> @llvm.dx.imad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2) -// CHECK: ret <3 x i32> %dx.imad +// DXIL_CHECK: %dx.imad = call <3 x i32> @llvm.dx.imad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2) +// DXIL_CHECK: ret <3 x i32> %dx.imad +// SPIR_CHECK: mul nsw <3 x i32> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw <3 x i32> %{{.*}}, %{{.*}} int3 test_mad_int3(int3 p0, int3 p1, int3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call <4 x i32> @llvm.dx.imad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) -// CHECK: ret <4 x i32> %dx.imad +// DXIL_CHECK: %dx.imad = call <4 x i32> @llvm.dx.imad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) +// DXIL_CHECK: ret <4 x i32> %dx.imad +// SPIR_CHECK: mul nsw <4 x i32> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw <4 x i32> %{{.*}}, %{{.*}} int4 test_mad_int4(int4 p0, int4 p1, int4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2) -// CHECK: ret i64 %dx.imad +// DXIL_CHECK: %dx.imad = call i64 @llvm.dx.imad.i64(i64 %0, i64 %1, i64 %2) +// DXIL_CHECK: ret i64 %dx.imad +// SPIR_CHECK: mul nsw i64 %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw i64 %{{.*}}, %{{.*}} int64_t test_mad_int64_t(int64_t p0, int64_t p1, int64_t p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call <2 x i64> @llvm.dx.imad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) -// CHECK: ret <2 x i64> %dx.imad +// DXIL_CHECK: %dx.imad = call <2 x i64> @llvm.dx.imad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) +// DXIL_CHECK: ret <2 x i64> %dx.imad +// SPIR_CHECK: mul nsw <2 x i64> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw <2 x i64> %{{.*}}, %{{.*}} int64_t2 test_mad_int64_t2(int64_t2 p0, int64_t2 p1, int64_t2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call <3 x i64> @llvm.dx.imad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2) -// CHECK: ret <3 x i64> %dx.imad +// DXIL_CHECK: %dx.imad = call <3 x i64> @llvm.dx.imad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2) +// DXIL_CHECK: ret <3 x i64> %dx.imad +// SPIR_CHECK: mul nsw <3 x i64> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw <3 x i64> %{{.*}}, %{{.*}} int64_t3 test_mad_int64_t3(int64_t3 p0, int64_t3 p1, int64_t3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.imad = call <4 x i64> @llvm.dx.imad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2) -// CHECK: ret <4 x i64> %dx.imad +// DXIL_CHECK: %dx.imad = call <4 x i64> @llvm.dx.imad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2) +// DXIL_CHECK: ret <4 x i64> %dx.imad +// SPIR_CHECK: mul nsw <4 x i64> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nsw <4 x i64> %{{.*}}, %{{.*}} int64_t4 test_mad_int64_t4(int64_t4 p0, int64_t4 p1, int64_t4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call i32 @llvm.dx.umad.i32(i32 %0, i32 %1, i32 %2) -// CHECK: ret i32 %dx.umad +// DXIL_CHECK: %dx.umad = call i32 @llvm.dx.umad.i32(i32 %0, i32 %1, i32 %2) +// DXIL_CHECK: ret i32 %dx.umad +// SPIR_CHECK: mul nuw i32 %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw i32 %{{.*}}, %{{.*}} uint test_mad_uint(uint p0, uint p1, uint p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call <2 x i32> @llvm.dx.umad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) -// CHECK: ret <2 x i32> %dx.umad +// DXIL_CHECK: %dx.umad = call <2 x i32> @llvm.dx.umad.v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) +// DXIL_CHECK: ret <2 x i32> %dx.umad +// SPIR_CHECK: mul nuw <2 x i32> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw <2 x i32> %{{.*}}, %{{.*}} uint2 test_mad_uint2(uint2 p0, uint2 p1, uint2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call <3 x i32> @llvm.dx.umad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2) -// CHECK: ret <3 x i32> %dx.umad +// DXIL_CHECK: %dx.umad = call <3 x i32> @llvm.dx.umad.v3i32(<3 x i32> %0, <3 x i32> %1, <3 x i32> %2) +// DXIL_CHECK: ret <3 x i32> %dx.umad +// SPIR_CHECK: mul nuw <3 x i32> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw <3 x i32> %{{.*}}, %{{.*}} uint3 test_mad_uint3(uint3 p0, uint3 p1, uint3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) -// CHECK: ret <4 x i32> %dx.umad +// DXIL_CHECK: %dx.umad = call <4 x i32> @llvm.dx.umad.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) +// DXIL_CHECK: ret <4 x i32> %dx.umad +// SPIR_CHECK: mul nuw <4 x i32> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw <4 x i32> %{{.*}}, %{{.*}} uint4 test_mad_uint4(uint4 p0, uint4 p1, uint4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call i64 @llvm.dx.umad.i64(i64 %0, i64 %1, i64 %2) -// CHECK: ret i64 %dx.umad +// DXIL_CHECK: %dx.umad = call i64 @llvm.dx.umad.i64(i64 %0, i64 %1, i64 %2) +// DXIL_CHECK: ret i64 %dx.umad +// SPIR_CHECK: mul nuw i64 %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw i64 %{{.*}}, %{{.*}} uint64_t test_mad_uint64_t(uint64_t p0, uint64_t p1, uint64_t p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call <2 x i64> @llvm.dx.umad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) -// CHECK: ret <2 x i64> %dx.umad +// DXIL_CHECK: %dx.umad = call <2 x i64> @llvm.dx.umad.v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) +// DXIL_CHECK: ret <2 x i64> %dx.umad +// SPIR_CHECK: mul nuw <2 x i64> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw <2 x i64> %{{.*}}, %{{.*}} uint64_t2 test_mad_uint64_t2(uint64_t2 p0, uint64_t2 p1, uint64_t2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call <3 x i64> @llvm.dx.umad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2) -// CHECK: ret <3 x i64> %dx.umad +// DXIL_CHECK: %dx.umad = call <3 x i64> @llvm.dx.umad.v3i64(<3 x i64> %0, <3 x i64> %1, <3 x i64> %2) +// DXIL_CHECK: ret <3 x i64> %dx.umad +// SPIR_CHECK: mul nuw <3 x i64> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw <3 x i64> %{{.*}}, %{{.*}} uint64_t3 test_mad_uint64_t3(uint64_t3 p0, uint64_t3 p1, uint64_t3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2) -// CHECK: ret <4 x i64> %dx.umad +// DXIL_CHECK: %dx.umad = call <4 x i64> @llvm.dx.umad.v4i64(<4 x i64> %0, <4 x i64> %1, <4 x i64> %2) +// DXIL_CHECK: ret <4 x i64> %dx.umad +// SPIR_CHECK: mul nuw <4 x i64> %{{.*}}, %{{.*}} +// SPIR_CHECK: add nuw <4 x i64> %{{.*}}, %{{.*}} uint64_t4 test_mad_uint64_t4(uint64_t4 p0, uint64_t4 p1, uint64_t4 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %splat.splat, <2 x float> %1, <2 x float> %2) -// CHECK: ret <2 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %splat.splat, <2 x float> %1, <2 x float> %2) +// CHECK: ret <2 x float> %hlsl.fmad float2 test_mad_float2_splat(float p0, float2 p1, float2 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %splat.splat, <3 x float> %1, <3 x float> %2) -// CHECK: ret <3 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %splat.splat, <3 x float> %1, <3 x float> %2) +// CHECK: ret <3 x float> %hlsl.fmad float3 test_mad_float3_splat(float p0, float3 p1, float3 p2) { return mad(p0, p1, p2); } -// CHECK: %dx.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat, <4 x float> %1, <4 x float> %2) -// CHECK: ret <4 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat, <4 x float> %1, <4 x float> %2) +// CHECK: ret <4 x float> %hlsl.fmad float4 test_mad_float4_splat(float p0, float4 p1, float4 p2) { return mad(p0, p1, p2); } // CHECK: %conv = sitofp i32 %2 to float // CHECK: %splat.splatinsert = insertelement <2 x float> poison, float %conv, i64 0 // CHECK: %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer -// CHECK: %dx.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %splat.splat) -// CHECK: ret <2 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %splat.splat) +// CHECK: ret <2 x float> %hlsl.fmad float2 test_mad_float2_int_splat(float2 p0, float2 p1, int p2) { return mad(p0, p1, p2); } @@ -184,8 +240,8 @@ float2 test_mad_float2_int_splat(float2 p0, float2 p1, int p2) { // CHECK: %conv = sitofp i32 %2 to float // CHECK: %splat.splatinsert = insertelement <3 x float> poison, float %conv, i64 0 // CHECK: %splat.splat = shufflevector <3 x float> %splat.splatinsert, <3 x float> poison, <3 x i32> zeroinitializer -// CHECK: %dx.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %splat.splat) -// CHECK: ret <3 x float> %dx.fmad +// CHECK: %hlsl.fmad = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %splat.splat) +// CHECK: ret <3 x float> %hlsl.fmad float3 test_mad_float3_int_splat(float3 p0, float3 p1, int p2) { return mad(p0, p1, p2); } diff --git a/llvm/test/CodeGen/DirectX/fmad.ll b/llvm/test/CodeGen/DirectX/fmad.ll index 693e237e70dc02..e1f4e5cd50c4f0 100644 --- a/llvm/test/CodeGen/DirectX/fmad.ll +++ b/llvm/test/CodeGen/DirectX/fmad.ll @@ -21,8 +21,8 @@ entry: %0 = load half, ptr %p0.addr, align 2 %1 = load half, ptr %p1.addr, align 2 %2 = load half, ptr %p2.addr, align 2 - %dx.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2) - ret half %dx.fmad + %hlsl.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2) + ret half %hlsl.fmad } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) @@ -40,8 +40,8 @@ entry: %0 = load float, ptr %p0.addr, align 4 %1 = load float, ptr %p1.addr, align 4 %2 = load float, ptr %p2.addr, align 4 - %dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2) - ret float %dx.fmad + %hlsl.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2) + ret float %hlsl.fmad } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) @@ -59,8 +59,8 @@ entry: %0 = load double, ptr %p0.addr, align 8 %1 = load double, ptr %p1.addr, align 8 %2 = load double, ptr %p2.addr, align 8 - %dx.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2) - ret double %dx.fmad + %hlsl.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2) + ret double %hlsl.fmad } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll index a3fec10a9e4bc9..ce9b8f09daead1 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll @@ -6,22 +6,22 @@ define noundef half @fmad_half(half noundef %a, half noundef %b, half noundef %c) #0 { entry: ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] Fma %[[#]] %[[#]] %[[#]] - %dx.fmad = call half @llvm.fmuladd.f16(half %a, half %b, half %c) - ret half %dx.fmad + %hlsl.fmad = call half @llvm.fmuladd.f16(half %a, half %b, half %c) + ret half %hlsl.fmad } define noundef float @fmad_float(float noundef %a, float noundef %b, float noundef %c) #0 { entry: ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] Fma %[[#]] %[[#]] %[[#]] - %dx.fmad = call float @llvm.fmuladd.f32(float %a, float %b, float %c) - ret float %dx.fmad + %hlsl.fmad = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %hlsl.fmad } define noundef double @fmad_double(double noundef %a, double noundef %b, double noundef %c) { entry: ; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] Fma %[[#]] %[[#]] %[[#]] - %dx.fmad = call double @llvm.fmuladd.f64(double %a, double %b, double %c) - ret double %dx.fmad + %hlsl.fmad = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %hlsl.fmad } declare half @llvm.fmuladd.f16(half, half, half) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll new file mode 100644 index 00000000000000..b854412b6ec12a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll @@ -0,0 +1,302 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#int_16:]] = OpTypeInt 16 0 +; CHECK-DAG: %[[#vec2_16:]] = OpTypeVector %[[#int_16]] 2 +; CHECK-DAG: %[[#vec3_16:]] = OpTypeVector %[[#int_16]] 3 +; CHECK-DAG: %[[#vec4_16:]] = OpTypeVector %[[#int_16]] 4 +; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#vec2_32:]] = OpTypeVector %[[#int_32]] 2 +; CHECK-DAG: %[[#vec3_32:]] = OpTypeVector %[[#int_32]] 3 +; CHECK-DAG: %[[#vec4_32:]] = OpTypeVector %[[#int_32]] 4 +; CHECK-DAG: %[[#int_64:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#vec2_64:]] = OpTypeVector %[[#int_64]] 2 +; CHECK-DAG: %[[#vec3_64:]] = OpTypeVector %[[#int_64]] 3 +; CHECK-DAG: %[[#vec4_64:]] = OpTypeVector %[[#int_64]] 4 + +define spir_func noundef i16 @test_mad_uint16_t(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#int_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#int_16]] %[[#mul]] %[[#arg2]] + %3 = mul nuw i16 %p0, %p1 + %4 = add nuw i16 %3, %p2 + ret i16 %4 +} + +define spir_func noundef <2 x i16> @test_mad_uint16_t2(<2 x i16> noundef %p0, <2 x i16> noundef %p1, <2 x i16> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec2_16]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <2 x i16> %p0, %p1 + %4 = add nuw <2 x i16> %3, %p2 + ret <2 x i16> %4 +} + +define spir_func noundef <3 x i16> @test_mad_uint16_t3(<3 x i16> noundef %p0, <3 x i16> noundef %p1, <3 x i16> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec3_16]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <3 x i16> %p0, %p1 + %4 = add nuw <3 x i16> %3, %p2 + ret <3 x i16> %4 +} + +define spir_func noundef <4 x i16> @test_mad_uint16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec4_16]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <4 x i16> %p0, %p1 + %4 = add nuw <4 x i16> %3, %p2 + ret <4 x i16> %4 +} + +define spir_func noundef i16 @test_mad_int16_t(i16 noundef %p0, i16 noundef %p1, i16 noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#int_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#int_16]] %[[#mul]] %[[#arg2]] + %3 = mul nsw i16 %p0, %p1 + %4 = add nsw i16 %3, %p2 + ret i16 %4 +} + +define spir_func noundef <2 x i16> @test_mad_int16_t2(<2 x i16> noundef %p0, <2 x i16> noundef %p1, <2 x i16> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec2_16]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <2 x i16> %p0, %p1 + %4 = add nsw <2 x i16> %3, %p2 + ret <2 x i16> %4 +} + +define spir_func noundef <3 x i16> @test_mad_int16_t3(<3 x i16> noundef %p0, <3 x i16> noundef %p1, <3 x i16> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec3_16]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <3 x i16> %p0, %p1 + %4 = add nsw <3 x i16> %3, %p2 + ret <3 x i16> %4 +} + +define spir_func noundef <4 x i16> @test_mad_int16_t4(<4 x i16> noundef %p0, <4 x i16> noundef %p1, <4 x i16> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_16]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec4_16]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <4 x i16> %p0, %p1 + %4 = add nsw <4 x i16> %3, %p2 + ret <4 x i16> %4 +} +define spir_func noundef i32 @test_mad_int(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#int_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#int_32]] %[[#mul]] %[[#arg2]] + %3 = mul nsw i32 %p0, %p1 + %4 = add nsw i32 %3, %p2 + ret i32 %4 +} + +define spir_func noundef <2 x i32> @test_mad_int2(<2 x i32> noundef %p0, <2 x i32> noundef %p1, <2 x i32> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec2_32]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <2 x i32> %p0, %p1 + %4 = add nsw <2 x i32> %3, %p2 + ret <2 x i32> %4 +} + +define spir_func noundef <3 x i32> @test_mad_int3(<3 x i32> noundef %p0, <3 x i32> noundef %p1, <3 x i32> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec3_32]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <3 x i32> %p0, %p1 + %4 = add nsw <3 x i32> %3, %p2 + ret <3 x i32> %4 +} + +define spir_func noundef <4 x i32> @test_mad_int4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec4_32]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <4 x i32> %p0, %p1 + %4 = add nsw <4 x i32> %3, %p2 + ret <4 x i32> %4 +} + +define spir_func noundef i64 @test_mad_int64_t(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#int_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#int_64]] %[[#mul]] %[[#arg2]] + %3 = mul nsw i64 %p0, %p1 + %4 = add nsw i64 %3, %p2 + ret i64 %4 +} + +define spir_func noundef <2 x i64> @test_mad_int64_t2(<2 x i64> noundef %p0, <2 x i64> noundef %p1, <2 x i64> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec2_64]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <2 x i64> %p0, %p1 + %4 = add nsw <2 x i64> %3, %p2 + ret <2 x i64> %4 +} + +define spir_func noundef <3 x i64> @test_mad_int64_t3(<3 x i64> noundef %p0, <3 x i64> noundef %p1, <3 x i64> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec3_64]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <3 x i64> %p0, %p1 + %4 = add nsw <3 x i64> %3, %p2 + ret <3 x i64> %4 +} + +define spir_func noundef <4 x i64> @test_mad_int64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec4_64]] %[[#mul]] %[[#arg2]] + %3 = mul nsw <4 x i64> %p0, %p1 + %4 = add nsw <4 x i64> %3, %p2 + ret <4 x i64> %4 +} + +define spir_func noundef i32 @test_mad_uint(i32 noundef %p0, i32 noundef %p1, i32 noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#int_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#int_32]] %[[#mul]] %[[#arg2]] + %3 = mul nuw i32 %p0, %p1 + %4 = add nuw i32 %3, %p2 + ret i32 %4 +} + +define spir_func noundef <2 x i32> @test_mad_uint2(<2 x i32> noundef %p0, <2 x i32> noundef %p1, <2 x i32> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec2_32]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <2 x i32> %p0, %p1 + %4 = add nuw <2 x i32> %3, %p2 + ret <2 x i32> %4 +} + +define spir_func noundef <3 x i32> @test_mad_uint3(<3 x i32> noundef %p0, <3 x i32> noundef %p1, <3 x i32> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec3_32]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <3 x i32> %p0, %p1 + %4 = add nuw <3 x i32> %3, %p2 + ret <3 x i32> %4 +} + +define spir_func noundef <4 x i32> @test_mad_uint4(<4 x i32> noundef %p0, <4 x i32> noundef %p1, <4 x i32> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_32]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec4_32]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <4 x i32> %p0, %p1 + %4 = add nuw <4 x i32> %3, %p2 + ret <4 x i32> %4 +} + +define spir_func noundef i64 @test_mad_uint64_t(i64 noundef %p0, i64 noundef %p1, i64 noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#int_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#int_64]] %[[#mul]] %[[#arg2]] + %3 = mul nuw i64 %p0, %p1 + %4 = add nuw i64 %3, %p2 + ret i64 %4 +} + +define spir_func noundef <2 x i64> @test_mad_uint64_t2(<2 x i64> noundef %p0, <2 x i64> noundef %p1, <2 x i64> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec2_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec2_64]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <2 x i64> %p0, %p1 + %4 = add nuw <2 x i64> %3, %p2 + ret <2 x i64> %4 +} + +define spir_func noundef <3 x i64> @test_mad_uint64_t3(<3 x i64> noundef %p0, <3 x i64> noundef %p1, <3 x i64> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec3_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec3_64]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <3 x i64> %p0, %p1 + %4 = add nuw <3 x i64> %3, %p2 + ret <3 x i64> %4 +} + +define spir_func noundef <4 x i64> @test_mad_uint64_t4(<4 x i64> noundef %p0, <4 x i64> noundef %p1, <4 x i64> noundef %p2) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#arg2:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#mul:]] = OpIMul %[[#vec4_64]] %[[#arg0]] %[[#arg1]] + ; CHECK: OpIAdd %[[#vec4_64]] %[[#mul]] %[[#arg2]] + %3 = mul nuw <4 x i64> %p0, %p1 + %4 = add nuw <4 x i64> %3, %p2 + ret <4 x i64> %4 +} >From 218206dde75a89447193cc46d4d2a04c7230a7e3 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <farzonlo...@microsoft.com> Date: Thu, 18 Apr 2024 17:04:48 -0400 Subject: [PATCH 2/2] add a means of registering emitters per target --- clang/lib/CodeGen/CGBuiltin.cpp | 8 ++-- clang/lib/CodeGen/CGHLSLRuntime.cpp | 34 +++++++++++------ clang/lib/CodeGen/CGHLSLRuntime.h | 57 +++++++++++++++++++++++++---- 3 files changed, 78 insertions(+), 21 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3f9f5aa9b42ed0..040aad1819d154 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18215,9 +18215,11 @@ Value *CodeGenFunction::EmitHLSLMadIntrinsic(const CallExpr *E) { Value *Mul = Builder.CreateNUWMul(M, A); return Builder.CreateNUWAdd(Mul, B); }; - - return CGM.getHLSLRuntime().emitHLSLIntrinsic( - EmitHLSLIMadDirectX, EmitHLSLIMadGeneric, EmitHLSLIMadGeneric); + CGM.getHLSLRuntime().registerHLSLTargetIntrinsic( + Builtin::BI__builtin_hlsl_mad, llvm::Triple::dxil, EmitHLSLIMadDirectX); + CGM.getHLSLRuntime().registerHLSLGenericIntrinsic( + Builtin::BI__builtin_hlsl_mad, EmitHLSLIMadGeneric); + return CGM.getHLSLRuntime().emitHLSLIntrinsic(Builtin::BI__builtin_hlsl_mad); } Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 1c801a4d1b06a0..3a52d4e207b1e7 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -119,19 +119,31 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { return CGM.getTarget().getTriple().getArch(); } -Value * -CGHLSLRuntime::emitHLSLIntrinsic(llvm::function_ref<Value *()> DxilEmitter, - llvm::function_ref<Value *()> SPIRVEmitter, - llvm::function_ref<Value *()> GenericEmitter) { +void CGHLSLRuntime::registerHLSLTargetIntrinsic( + Builtin::ID Id, llvm::Triple::ArchType Arch, + llvm::function_ref<llvm::Value *()> IntrinsicImpl) { + if (!IntrinsicCodeGen.count(Id)) + IntrinsicCodeGen[Id] = CGHLSLIntrinsic(); + IntrinsicCodeGen[Id].targetImplementations[Arch] = IntrinsicImpl; +} +void CGHLSLRuntime::registerHLSLGenericIntrinsic( + Builtin::ID Id, llvm::function_ref<llvm::Value *()> IntrinsicImpl) { + if (!IntrinsicCodeGen.count(Id)) + IntrinsicCodeGen[Id] = CGHLSLIntrinsic(); + IntrinsicCodeGen[Id].genericImplementation = IntrinsicImpl; +} + +llvm::Value *CGHLSLRuntime::emitHLSLIntrinsic(Builtin::ID id) { + auto it = IntrinsicCodeGen.find(id); + assert(it != IntrinsicCodeGen.end() && + " HLSL intrinsics need to be reigstered before use."); llvm::Triple::ArchType Arch = getArch(); - switch (Arch) { - case llvm::Triple::dxil: - return DxilEmitter(); - case llvm::Triple::spirv: - return SPIRVEmitter(); - default: - return GenericEmitter(); + auto targets = it->second.targetImplementations; + auto targetIt = targets.find(Arch); + if (targetIt == targets.end()) { + return it->second.genericImplementation(); } + return targetIt->second(); } void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) { diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 923cf2140c13f0..a731ee4ad898c4 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -51,6 +51,41 @@ class GlobalVariable; class Function; class StructType; class Value; + +template <> struct DenseMapInfo<clang::Builtin::ID> { + static clang::Builtin::ID getEmptyKey() { return clang::Builtin::NotBuiltin; } + + static clang::Builtin::ID getTombstoneKey() { + return clang::Builtin::FirstTSBuiltin; + } + + static unsigned getHashValue(clang::Builtin::ID Val) { + return static_cast<unsigned>(Val); + } + + static bool isEqual(clang::Builtin::ID LHS, clang::Builtin::ID RHS) { + return LHS == RHS; + } +}; + +template <> struct DenseMapInfo<llvm::Triple::ArchType> { + static llvm::Triple::ArchType getEmptyKey() { + return llvm::Triple::ArchType::UnknownArch; + } + + static llvm::Triple::ArchType getTombstoneKey() { + return llvm::Triple::ArchType::LastArchType; + } + + static unsigned getHashValue(llvm::Triple::ArchType Val) { + return static_cast<unsigned>(Val); + } + + static bool isEqual(llvm::Triple::ArchType LHS, llvm::Triple::ArchType RHS) { + return LHS == RHS; + } +}; + } // namespace llvm namespace clang { @@ -67,6 +102,15 @@ namespace CodeGen { class CodeGenModule; +struct CGHLSLIntrinsic { + llvm::DenseMap<llvm::Triple::ArchType, llvm::function_ref<llvm::Value *()>> + targetImplementations; + llvm::function_ref<llvm::Value *()> genericImplementation = + []() -> llvm::Value * { + llvm_unreachable("Intrinsic not supported by target architecture."); + }; +}; + class CGHLSLRuntime { public: //===----------------------------------------------------------------------===// @@ -80,13 +124,11 @@ class CGHLSLRuntime { //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. //===----------------------------------------------------------------------===// - llvm::Value *emitHLSLIntrinsic( - llvm::function_ref<llvm::Value *()> DxilEmitter, - llvm::function_ref<llvm::Value *()> SPIRVEmitter, - llvm::function_ref<llvm::Value *()> GenericEmitter = - []() -> llvm::Value * { - llvm_unreachable("Intrinsic not supported by target architecture."); - }); + void registerHLSLTargetIntrinsic(Builtin::ID, llvm::Triple::ArchType, + llvm::function_ref<llvm::Value *()>); + void registerHLSLGenericIntrinsic(Builtin::ID, + llvm::function_ref<llvm::Value *()>); + llvm::Value *emitHLSLIntrinsic(Builtin::ID); struct BufferResBinding { // The ID like 2 in register(b2, space1). std::optional<unsigned> Reg; @@ -137,6 +179,7 @@ class CGHLSLRuntime { void addBufferDecls(const DeclContext *DC, Buffer &CB); llvm::Triple::ArchType getArch(); llvm::SmallVector<Buffer> Buffers; + llvm::DenseMap<clang::Builtin::ID, CGHLSLIntrinsic> IntrinsicCodeGen; }; } // namespace CodeGen _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits