https://github.com/bob80905 updated https://github.com/llvm/llvm-project/pull/101256
>From 7027cf254ae1b6acfdfbbf5dbeda3c4d6a4b3c43 Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Fri, 26 Jul 2024 15:41:01 -0700 Subject: [PATCH 1/8] first attempt --- clang/docs/LanguageExtensions.rst | 1 + clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Basic/Builtins.td | 6 ++++++ clang/lib/CodeGen/CGBuiltin.cpp | 3 +++ clang/lib/Sema/SemaChecking.cpp | 1 + clang/test/CodeGenHLSL/builtins/length.hlsl | 1 + clang/test/SemaHLSL/BuiltIns/length-errors.hlsl | 1 + 7 files changed, 14 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/length.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/length-errors.hlsl diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index a747464582e77..45f081081a371 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -664,6 +664,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in T __builtin_elementwise_cosh(T x) return the hyperbolic cosine of angle x in radians floating point types T __builtin_elementwise_tanh(T x) return the hyperbolic tangent of angle x in radians floating point types T __builtin_elementwise_floor(T x) return the largest integral value less than or equal to x floating point types + T __builtin_elementwise_length(T x) return the length of the specified floating-point vector floating point types T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dad44f45a847f..46f40889b4b33 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -243,6 +243,7 @@ DWARF Support in Clang Floating Point Support in Clang ------------------------------- +- Add ``__builtin_elementwise_length``builtin for floating point types only. Fixed Point Support in Clang ---------------------------- diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 4133f6ff40cf3..d6122a484c094 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1248,6 +1248,12 @@ def ElementwiseBitreverse : Builtin { let Prototype = "void(...)"; } +def ElementwiseLength : Builtin, F16F128MathTemplate { + let Spellings = ["__builtin_elementwise_length"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "T(float, T)"; +} + def ElementwiseMax : Builtin { let Spellings = ["__builtin_elementwise_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f0651c280ff95..38c7cc8ab5a78 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3815,6 +3815,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_exp2: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::exp2, "elt.exp2")); + case Builtin::BI__builtin_elementwise_length: + return RValue::get(emitBuiltinWithOneOverloadedType<1>( + *this, E, llvm::Intrinsic::length, "elt.length")); case Builtin::BI__builtin_elementwise_log: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::log, "elt.log")); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index bb30b1e289a1c..09e3b17571528 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2684,6 +2684,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_exp: case Builtin::BI__builtin_elementwise_exp2: case Builtin::BI__builtin_elementwise_floor: + case Builtin::BI__builtin_elementwise_length: case Builtin::BI__builtin_elementwise_log: case Builtin::BI__builtin_elementwise_log2: case Builtin::BI__builtin_elementwise_log10: diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl new file mode 100644 index 0000000000000..2f259b79aa7e2 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -0,0 +1 @@ +s \ No newline at end of file diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl new file mode 100644 index 0000000000000..2f259b79aa7e2 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -0,0 +1 @@ +s \ No newline at end of file >From fc20777ddb0d2e083fa92f3c1673e87874f8f935 Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Tue, 30 Jul 2024 15:05:57 -0700 Subject: [PATCH 2/8] sema and codegen hlsl passes --- clang/include/clang/Basic/Builtins.td | 12 +-- clang/lib/CodeGen/CGBuiltin.cpp | 19 ++++- clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/lib/Headers/hlsl/hlsl_intrinsics.h | 33 +++++++++ clang/lib/Sema/SemaChecking.cpp | 1 - clang/lib/Sema/SemaHLSL.cpp | 22 ++++++ clang/test/CodeGenHLSL/builtins/length.hlsl | 74 ++++++++++++++++++- .../test/SemaHLSL/BuiltIns/length-errors.hlsl | 32 +++++++- llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 39 ++++++++++ .../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 +- 12 files changed, 226 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index d6122a484c094..0baadf0d196b2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1248,12 +1248,6 @@ def ElementwiseBitreverse : Builtin { let Prototype = "void(...)"; } -def ElementwiseLength : Builtin, F16F128MathTemplate { - let Spellings = ["__builtin_elementwise_length"]; - let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; - let Prototype = "T(float, T)"; -} - def ElementwiseMax : Builtin { let Spellings = ["__builtin_elementwise_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; @@ -4713,6 +4707,12 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLLength : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_length"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLLerp : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_lerp"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 38c7cc8ab5a78..a28073ca9ccc5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3815,9 +3815,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_exp2: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::exp2, "elt.exp2")); - case Builtin::BI__builtin_elementwise_length: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, llvm::Intrinsic::length, "elt.length")); case Builtin::BI__builtin_elementwise_log: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::log, "elt.log")); @@ -18463,6 +18460,22 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp"); } + case Builtin::BI__builtin_hlsl_elementwise_length: { + Value *X = EmitScalarExpr(E->getArg(0)); + + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("length operand must have a float representation"); + // if the operand is a scalar, we can use the fabs llvm intrinsic directly + if (!E->getArg(0)->getType()->isVectorType()) { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); + return Builder.CreateCall(F, X); + } + return Builder.CreateIntrinsic( + /*ReturnType=*/X->getType()->getScalarType(), + CGM.getHLSLRuntime().getLengthIntrinsic(), + ArrayRef<Value *>{X}, nullptr, "hlsl.length"); + } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 8c067f4963955..3f2dc0ae7b84d 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -75,6 +75,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(All, all) GENERATE_HLSL_INTRINSIC_FUNCTION(Any, any) GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac) + GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length) GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp) GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 09f26a4588c14..21ac25bba1acb 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -908,6 +908,39 @@ float3 lerp(float3, float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) float4 lerp(float4, float4, float4); + +//===----------------------------------------------------------------------===// +// length builtins +//===----------------------------------------------------------------------===// + +/// \fn T length(T x) +/// \brief Returns the length of the specified floating-point vector. +/// \param x [in] The vector of floats, or a scalar float. +/// +/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + �). + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +half length(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +half length(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +half length(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +half length(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +float length(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +float length(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +float length(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) +float length(float4); + //===----------------------------------------------------------------------===// // log builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 09e3b17571528..bb30b1e289a1c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2684,7 +2684,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_exp: case Builtin::BI__builtin_elementwise_exp2: case Builtin::BI__builtin_elementwise_floor: - case Builtin::BI__builtin_elementwise_length: case Builtin::BI__builtin_elementwise_log: case Builtin::BI__builtin_elementwise_log2: case Builtin::BI__builtin_elementwise_log10: diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 9940bc5b4a606..624cbd3777bb8 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1076,6 +1076,28 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_length: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) + return true; + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + QualType RetTy; + + if (auto *VTy = ArgTyA->getAs<VectorType>()) + RetTy = VTy->getElementType(); + else + RetTy = TheCall->getArg(0)->getType(); + + TheCall->setType(RetTy); + + + if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) + return true; + break; + } case Builtin::BI__builtin_hlsl_mad: { if (SemaRef.checkArgCount(TheCall, 3)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index 2f259b79aa7e2..0af669f36e6ba 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -1 +1,73 @@ -s \ No newline at end of file +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF + +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.fabs.f16(half +// NO_HALF: call float @llvm.fabs.f32(float +// NATIVE_HALF: ret half +// NO_HALF: ret float +half test_length_half(half p0) +{ + return length(p0); +} +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16 +// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32( +// NATIVE_HALF: ret half %hlsl.length +// NO_HALF: ret float %hlsl.length +half test_length_half2(half2 p0) +{ + return length(p0); +} +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16 +// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32( +// NATIVE_HALF: ret half %hlsl.length +// NO_HALF: ret float %hlsl.length +half test_length_half3(half3 p0) +{ + return length(p0); +} +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16 +// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32( +// NATIVE_HALF: ret half %hlsl.length +// NO_HALF: ret float %hlsl.length +half test_length_half4(half4 p0) +{ + return length(p0); +} + +// CHECK: define noundef float @ +// CHECK: call float @llvm.fabs.f32(float +// CHECK: ret float +float test_length_float(float p0) +{ + return length(p0); +} +// CHECK: define noundef float @ +// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32( +// CHECK: ret float %hlsl.length +float test_length_float2(float2 p0) +{ + return length(p0); +} +// CHECK: define noundef float @ +// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32( +// CHECK: ret float %hlsl.length +float test_length_float3(float3 p0) +{ + return length(p0); +} +// CHECK: define noundef float @ +// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32( +// CHECK: ret float %hlsl.length +float test_length_float4(float4 p0) +{ + return length(p0); +} \ No newline at end of file diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl index 2f259b79aa7e2..781c344f0da17 100644 --- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -1 +1,31 @@ -s \ No newline at end of file +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected + +bool test_too_few_arg() +{ + return __builtin_hlsl_elementwise_length(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +bool2 test_too_many_arg(float2 p0) +{ + return __builtin_hlsl_elementwise_length(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +bool builtin_bool_to_float_type_promotion(bool p1) +{ + return __builtin_hlsl_elementwise_length(p1); + // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}} +} + +bool builtin_length_int_to_float_promotion(int p1) +{ + return __builtin_hlsl_elementwise_length(p1); + // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}} +} + +bool2 builtin_length_int2_to_float2_promotion(int2 p1) +{ + return __builtin_hlsl_elementwise_length(p1); + // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} +} \ No newline at end of file diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index a7f212da2f5b6..47c01f899a926 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -55,6 +55,8 @@ def int_dx_isinf : def int_dx_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem, IntrWillReturn] >; +def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>; + def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index ef6ddf12c32f6..c91fe859d7cc2 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -63,5 +63,7 @@ let TargetPrefix = "spv" in { def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem, IntrWillReturn] >; + def int_spv_length : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], + [IntrNoMem, IntrWillReturn] >; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; } diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 4b162a35365c8..7ef5b9eae9310 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -42,6 +42,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_clamp: case Intrinsic::dx_uclamp: case Intrinsic::dx_lerp: + case Intrinsic::dx_length: case Intrinsic::dx_sdot: case Intrinsic::dx_udot: return true; @@ -157,6 +158,42 @@ static bool expandAnyIntrinsic(CallInst *Orig) { return true; } +static bool expandLengthIntrinsic(CallInst *Orig) { + Value *X = Orig->getOperand(0); + IRBuilder<> Builder(Orig->getParent()); + Builder.SetInsertPoint(Orig); + Type *Ty = X->getType(); + Type *EltTy = Ty->getScalarType(); + + // Though dx.length does work on scalar type, we can optimize it to just emit + // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because + // CGBuiltin.cpp should have emitted a fabs call. + assert(Ty->isVectorTy() && "dx.length only works on vector type"); + Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0); + auto *XVec = dyn_cast<FixedVectorType>(Ty); + unsigned size = XVec->getNumElements(); + if (size > 1) { + Value *Sum = Builder.CreateFMul(Elt, Elt); + for (unsigned i = 1; i < size; i++) { + Elt = Builder.CreateExtractElement(X, i); + Value *Mul = Builder.CreateFMul(Elt, Elt); + Sum = Builder.CreateFAdd(Sum, Mul); + } + Value *Result = Builder.CreateIntrinsic( + EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum}, nullptr, "elt.sqrt"); + + Orig->replaceAllUsesWith(Result); + Orig->eraseFromParent(); + return true; + } else { + Value *Result = Builder.CreateIntrinsic( + EltTy, Intrinsic::fabs, ArrayRef<Value *>{Elt}, nullptr, "elt.abs"); + Orig->replaceAllUsesWith(Result); + Orig->eraseFromParent(); + return true; + } +} + static bool expandLerpIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); Value *Y = Orig->getOperand(1); @@ -280,6 +317,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { return expandClampIntrinsic(Orig, F.getIntrinsicID()); case Intrinsic::dx_lerp: return expandLerpIntrinsic(Orig); + case Intrinsic::dx_length: + return expandLengthIntrinsic(Orig); case Intrinsic::dx_sdot: case Intrinsic::dx_udot: return expandIntegerDot(Orig, F.getIntrinsicID()); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 8391e0dec9a39..0f0b7fee96559 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -280,7 +280,7 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - MMI = &MF.getMMI().getObjFileInfo<SPIRVMachineModuleInfo>(); + // MMI = &MF.getMMI().getObjFileInfo<SPIRVMachineModuleInfo>(); MRI = &MF.getRegInfo(); GR.setCurrentFunc(MF); InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); >From 2fa4ffdc63e699e2b0e3c44e5dfb95284dbc5f6b Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Tue, 30 Jul 2024 15:25:47 -0700 Subject: [PATCH 3/8] clangformat --- clang/lib/CodeGen/CGBuiltin.cpp | 6 +++--- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 1 - clang/lib/Sema/SemaHLSL.cpp | 9 ++++----- clang/test/CodeGenHLSL/builtins/length.hlsl | 2 +- .../test/SemaHLSL/BuiltIns/length-errors.hlsl | 2 +- llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 - llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 -- .../Target/DirectX/DXILIntrinsicExpansion.cpp | 18 +++++++++--------- 8 files changed, 18 insertions(+), 23 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a28073ca9ccc5..9b0ab3d361f1b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18462,7 +18462,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, } case Builtin::BI__builtin_hlsl_elementwise_length: { Value *X = EmitScalarExpr(E->getArg(0)); - + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("length operand must have a float representation"); // if the operand is a scalar, we can use the fabs llvm intrinsic directly @@ -18473,8 +18473,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, } return Builder.CreateIntrinsic( /*ReturnType=*/X->getType()->getScalarType(), - CGM.getHLSLRuntime().getLengthIntrinsic(), - ArrayRef<Value *>{X}, nullptr, "hlsl.length"); + CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X}, + nullptr, "hlsl.length"); } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 21ac25bba1acb..03e74ef35b5bf 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -908,7 +908,6 @@ float3 lerp(float3, float3, float3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) float4 lerp(float4, float4, float4); - //===----------------------------------------------------------------------===// // length builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 75be844227672..183ff40c04642 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1083,19 +1083,18 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (SemaRef.checkArgCount(TheCall, 1)) return true; if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) - return true; + return true; ExprResult A = TheCall->getArg(0); QualType ArgTyA = A.get()->getType(); QualType RetTy; - if (auto *VTy = ArgTyA->getAs<VectorType>()) + if (auto *VTy = ArgTyA->getAs<VectorType>()) RetTy = VTy->getElementType(); - else + else RetTy = TheCall->getArg(0)->getType(); - TheCall->setType(RetTy); - + TheCall->setType(RetTy); if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index 0af669f36e6ba..ff9238d2d82e3 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -70,4 +70,4 @@ float test_length_float3(float3 p0) float test_length_float4(float4 p0) { return length(p0); -} \ No newline at end of file +} diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl index 781c344f0da17..85c317a1ee0ec 100644 --- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -28,4 +28,4 @@ bool2 builtin_length_int2_to_float2_promotion(int2 p1) { return __builtin_hlsl_elementwise_length(p1); // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} -} \ No newline at end of file +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 47c01f899a926..312c3862f240d 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -56,7 +56,6 @@ def int_dx_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType [IntrNoMem, IntrWillReturn] >; def int_dx_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>; - def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index c91fe859d7cc2..ef6ddf12c32f6 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -63,7 +63,5 @@ let TargetPrefix = "spv" in { def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem, IntrWillReturn] >; - def int_spv_length : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], - [IntrNoMem, IntrWillReturn] >; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; } diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 7ef5b9eae9310..8c3dd54b9af18 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -181,17 +181,17 @@ static bool expandLengthIntrinsic(CallInst *Orig) { } Value *Result = Builder.CreateIntrinsic( EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum}, nullptr, "elt.sqrt"); - - Orig->replaceAllUsesWith(Result); + + Orig->replaceAllUsesWith(Result); Orig->eraseFromParent(); - return true; + return true; } else { - Value *Result = Builder.CreateIntrinsic( - EltTy, Intrinsic::fabs, ArrayRef<Value *>{Elt}, nullptr, "elt.abs"); - Orig->replaceAllUsesWith(Result); - Orig->eraseFromParent(); - return true; - } + Value *Result = Builder.CreateIntrinsic( + EltTy, Intrinsic::fabs, ArrayRef<Value *>{Elt}, nullptr, "elt.abs"); + Orig->replaceAllUsesWith(Result); + Orig->eraseFromParent(); + return true; + } } static bool expandLerpIntrinsic(CallInst *Orig) { >From fa0601208ccbbdc586db1741b0143f8e9fa88bdd Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Tue, 30 Jul 2024 17:03:40 -0700 Subject: [PATCH 4/8] add length.ll, address all of Farzon's feedback --- clang/docs/LanguageExtensions.rst | 1 - clang/docs/ReleaseNotes.rst | 1 - clang/lib/CodeGen/CGBuiltin.cpp | 8 +- clang/lib/Sema/SemaHLSL.cpp | 5 +- .../test/SemaHLSL/BuiltIns/length-errors.hlsl | 4 +- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 31 ++--- llvm/test/CodeGen/DirectX/length.ll | 107 ++++++++++++++++++ 8 files changed, 127 insertions(+), 31 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/length.ll diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 45f081081a371..a747464582e77 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -664,7 +664,6 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in T __builtin_elementwise_cosh(T x) return the hyperbolic cosine of angle x in radians floating point types T __builtin_elementwise_tanh(T x) return the hyperbolic tangent of angle x in radians floating point types T __builtin_elementwise_floor(T x) return the largest integral value less than or equal to x floating point types - T __builtin_elementwise_length(T x) return the length of the specified floating-point vector floating point types T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 34dbb26a66b82..3c2e0282d1c72 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -249,7 +249,6 @@ DWARF Support in Clang Floating Point Support in Clang ------------------------------- -- Add ``__builtin_elementwise_length``builtin for floating point types only. Fixed Point Support in Clang ---------------------------- diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9b0ab3d361f1b..1b67eaf9ab3ca 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18466,11 +18466,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("length operand must have a float representation"); // if the operand is a scalar, we can use the fabs llvm intrinsic directly - if (!E->getArg(0)->getType()->isVectorType()) { - llvm::Type *ResultType = ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); - return Builder.CreateCall(F, X); - } + if (!E->getArg(0)->getType()->isVectorType()) + return EmitFAbs(*this, X); + return Builder.CreateIntrinsic( /*ReturnType=*/X->getType()->getScalarType(), CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X}, diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 183ff40c04642..ee307a7da2136 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1080,6 +1080,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } case Builtin::BI__builtin_hlsl_elementwise_length: { + if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) + return true; if (SemaRef.checkArgCount(TheCall, 1)) return true; if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) @@ -1095,9 +1097,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { RetTy = TheCall->getArg(0)->getType(); TheCall->setType(RetTy); - - if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) - return true; break; } case Builtin::BI__builtin_hlsl_mad: { diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl index 85c317a1ee0ec..b1153a7782d35 100644 --- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected -bool test_too_few_arg() +void test_too_few_arg() { return __builtin_hlsl_elementwise_length(); // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} } -bool2 test_too_many_arg(float2 p0) +void test_too_many_arg(float2 p0) { return __builtin_hlsl_elementwise_length(p0, p0); // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index ef6ddf12c32f6..3f77ef6bfcdbe 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -63,5 +63,6 @@ let TargetPrefix = "spv" in { def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem, IntrWillReturn] >; + def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; } diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 8c3dd54b9af18..f7a1c91a10803 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -168,30 +168,23 @@ static bool expandLengthIntrinsic(CallInst *Orig) { // Though dx.length does work on scalar type, we can optimize it to just emit // fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because // CGBuiltin.cpp should have emitted a fabs call. - assert(Ty->isVectorTy() && "dx.length only works on vector type"); Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0); auto *XVec = dyn_cast<FixedVectorType>(Ty); unsigned size = XVec->getNumElements(); - if (size > 1) { - Value *Sum = Builder.CreateFMul(Elt, Elt); - for (unsigned i = 1; i < size; i++) { - Elt = Builder.CreateExtractElement(X, i); - Value *Mul = Builder.CreateFMul(Elt, Elt); - Sum = Builder.CreateFAdd(Sum, Mul); - } - Value *Result = Builder.CreateIntrinsic( - EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum}, nullptr, "elt.sqrt"); + assert(Ty->isVectorTy() && size > 1 && "dx.length only works on vector type"); - Orig->replaceAllUsesWith(Result); - Orig->eraseFromParent(); - return true; - } else { - Value *Result = Builder.CreateIntrinsic( - EltTy, Intrinsic::fabs, ArrayRef<Value *>{Elt}, nullptr, "elt.abs"); - Orig->replaceAllUsesWith(Result); - Orig->eraseFromParent(); - return true; + Value *Sum = Builder.CreateFMul(Elt, Elt); + for (unsigned i = 1; i < size; i++) { + Elt = Builder.CreateExtractElement(X, i); + Value *Mul = Builder.CreateFMul(Elt, Elt); + Sum = Builder.CreateFAdd(Sum, Mul); } + Value *Result = Builder.CreateIntrinsic( + EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum}, nullptr, "elt.sqrt"); + + Orig->replaceAllUsesWith(Result); + Orig->eraseFromParent(); + return true; } static bool expandLerpIntrinsic(CallInst *Orig) { diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll new file mode 100644 index 0000000000000..2047ba18c1bc9 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/length.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-op-lower < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; ModuleID = 'D:\llvm-project\clang\test\CodeGenHLSL\builtins\length.hlsl' +source_filename = "D:\\llvm-project\\clang\\test\\CodeGenHLSL\\builtins\\length.hlsl" +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxilv1.3-pc-shadermodel6.3-library" + +; Function Attrs: convergent noinline nounwind optnone +define noundef float @"?test_length_half@@YA$halff@$halff@@Z"(float noundef %p0) #0 { +entry: + %p0.addr = alloca float, align 4 + store float %p0, ptr %p0.addr, align 4 + %0 = load float, ptr %p0.addr, align 4 + + ; EXPCHECK: call float @llvm.fabs.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) + %1 = call float @llvm.fabs.f32(float %0) #3 + ret float %1 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare float @llvm.fabs.f32(float) #1 + +; Function Attrs: convergent noinline nounwind optnone +define noundef float @"?test_length_half2@@YA$halff@T?$__vector@$halff@$01@__clang@@@Z"(<2 x float> noundef %p0) #0 { +entry: + %p0.addr = alloca <2 x float>, align 8 + store <2 x float> %p0, ptr %p0.addr, align 8 + %0 = load <2 x float>, ptr %p0.addr, align 8 + + ; CHECK: extractelement <2 x float> %{{.*}}, i64 0 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: extractelement <2 x float> %{{.*}}, i64 1 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) + + %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %0) + ret float %hlsl.length +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare float @llvm.dx.length.v2f32(<2 x float>) #2 + +; Function Attrs: convergent noinline nounwind optnone +define noundef float @"?test_length_half3@@YA$halff@T?$__vector@$halff@$02@__clang@@@Z"(<3 x float> noundef %p0) #0 { +entry: + %p0.addr = alloca <3 x float>, align 16 + store <3 x float> %p0, ptr %p0.addr, align 16 + %0 = load <3 x float>, ptr %p0.addr, align 16 + + ; CHECK: extractelement <3 x float> %{{.*}}, i64 0 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x float> %{{.*}}, i64 1 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x float> %{{.*}}, i64 2 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) + + %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %0) + ret float %hlsl.length +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare float @llvm.dx.length.v3f32(<3 x float>) #2 + +; Function Attrs: convergent noinline nounwind optnone +define noundef float @"?test_length_half4@@YA$halff@T?$__vector@$halff@$03@__clang@@@Z"(<4 x float> noundef %p0) #0 { +entry: + %p0.addr = alloca <4 x float>, align 16 + store <4 x float> %p0, ptr %p0.addr, align 16 + %0 = load <4 x float>, ptr %p0.addr, align 16 + + ; CHECK: extractelement <4 x float> %{{.*}}, i64 0 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x float> %{{.*}}, i64 1 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x float> %{{.*}}, i64 2 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x float> %{{.*}}, i64 3 + ; CHECK: fmul float %{{.*}}, %{{.*}} + ; CHECK: fadd float %{{.*}}, %{{.*}} + ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) + ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) + + %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %0) + ret float %hlsl.length +} + +attributes #0 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { nocallback nofree nosync nounwind willreturn } +attributes #3 = { memory(none) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} +!2 = !{!"clang version 20.0.0git (g...@github.com:bob80905/llvm-project.git 2fa4ffdc63e699e2b0e3c44e5dfb95284dbc5f6b)"} \ No newline at end of file >From 46dec9d70e306b13aa6385a484d95d46cf35913d Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Wed, 31 Jul 2024 11:26:01 -0700 Subject: [PATCH 5/8] address all of farzon's feedback, add length_error.ll --- clang/test/CodeGenHLSL/builtins/length.hlsl | 16 +-- .../test/SemaHLSL/BuiltIns/length-errors.hlsl | 10 +- llvm/test/CodeGen/DirectX/length.ll | 132 +++++++++++------- llvm/test/CodeGen/DirectX/length_error.ll | 10 ++ 4 files changed, 102 insertions(+), 66 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/length_error.ll diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl index ff9238d2d82e3..1c23b0df04df9 100644 --- a/clang/test/CodeGenHLSL/builtins/length.hlsl +++ b/clang/test/CodeGenHLSL/builtins/length.hlsl @@ -13,7 +13,7 @@ // NO_HALF: ret float half test_length_half(half p0) { - return length(p0); + return length(p0); } // NATIVE_HALF: define noundef half @ // NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16 @@ -22,7 +22,7 @@ half test_length_half(half p0) // NO_HALF: ret float %hlsl.length half test_length_half2(half2 p0) { - return length(p0); + return length(p0); } // NATIVE_HALF: define noundef half @ // NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16 @@ -31,7 +31,7 @@ half test_length_half2(half2 p0) // NO_HALF: ret float %hlsl.length half test_length_half3(half3 p0) { - return length(p0); + return length(p0); } // NATIVE_HALF: define noundef half @ // NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16 @@ -40,7 +40,7 @@ half test_length_half3(half3 p0) // NO_HALF: ret float %hlsl.length half test_length_half4(half4 p0) { - return length(p0); + return length(p0); } // CHECK: define noundef float @ @@ -48,26 +48,26 @@ half test_length_half4(half4 p0) // CHECK: ret float float test_length_float(float p0) { - return length(p0); + return length(p0); } // CHECK: define noundef float @ // CHECK: %hlsl.length = call float @llvm.dx.length.v2f32( // CHECK: ret float %hlsl.length float test_length_float2(float2 p0) { - return length(p0); + return length(p0); } // CHECK: define noundef float @ // CHECK: %hlsl.length = call float @llvm.dx.length.v3f32( // CHECK: ret float %hlsl.length float test_length_float3(float3 p0) { - return length(p0); + return length(p0); } // CHECK: define noundef float @ // CHECK: %hlsl.length = call float @llvm.dx.length.v4f32( // CHECK: ret float %hlsl.length float test_length_float4(float4 p0) { - return length(p0); + return length(p0); } diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl index b1153a7782d35..0f7d8b2d65eaf 100644 --- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl @@ -2,30 +2,30 @@ void test_too_few_arg() { - return __builtin_hlsl_elementwise_length(); + return __builtin_hlsl_elementwise_length(); // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} } void test_too_many_arg(float2 p0) { - return __builtin_hlsl_elementwise_length(p0, p0); + return __builtin_hlsl_elementwise_length(p0, p0); // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} } bool builtin_bool_to_float_type_promotion(bool p1) { - return __builtin_hlsl_elementwise_length(p1); + return __builtin_hlsl_elementwise_length(p1); // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}} } bool builtin_length_int_to_float_promotion(int p1) { - return __builtin_hlsl_elementwise_length(p1); + return __builtin_hlsl_elementwise_length(p1); // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}} } bool2 builtin_length_int2_to_float2_promotion(int2 p1) { - return __builtin_hlsl_elementwise_length(p1); + return __builtin_hlsl_elementwise_length(p1); // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} } diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll index 2047ba18c1bc9..b5ca4b0dc5cbf 100644 --- a/llvm/test/CodeGen/DirectX/length.ll +++ b/llvm/test/CodeGen/DirectX/length.ll @@ -1,34 +1,88 @@ ; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK -; ModuleID = 'D:\llvm-project\clang\test\CodeGenHLSL\builtins\length.hlsl' -source_filename = "D:\\llvm-project\\clang\\test\\CodeGenHLSL\\builtins\\length.hlsl" -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxilv1.3-pc-shadermodel6.3-library" -; Function Attrs: convergent noinline nounwind optnone -define noundef float @"?test_length_half@@YA$halff@$halff@@Z"(float noundef %p0) #0 { -entry: - %p0.addr = alloca float, align 4 - store float %p0, ptr %p0.addr, align 4 - %0 = load float, ptr %p0.addr, align 4 +declare half @llvm.fabs.f16(half) +declare half @llvm.dx.length.v2f16(<2 x half>) +declare half @llvm.dx.length.v3f16(<3 x half>) +declare half @llvm.dx.length.v4f16(<4 x half>) + +declare float @llvm.fabs.f32(float) +declare float @llvm.dx.length.v2f32(<2 x float>) +declare float @llvm.dx.length.v3f32(<3 x float>) +declare float @llvm.dx.length.v4f32(<4 x float>) + +define noundef half @test_length_half(half noundef %p0) { +entry: + ; EXPCHECK: call half @llvm.fabs.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 6, half %{{.*}}) + %0 = call half @llvm.fabs.f16(half %p0) + ret half %0 +} - ; EXPCHECK: call float @llvm.fabs.f32(float %{{.*}}) - ; DOPCHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) - %1 = call float @llvm.fabs.f32(float %0) #3 - ret float %1 +define noundef half @test_length_half2(<2 x half> noundef %p0) { +entry: + ; CHECK: extractelement <2 x half> %{{.*}}, i64 0 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: extractelement <2 x half> %{{.*}}, i64 1 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) + + %hlsl.length = call half @llvm.dx.length.v2f16(<2 x half> %p0) + ret half %hlsl.length } -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare float @llvm.fabs.f32(float) #1 +define noundef half @test_length_half3(<3 x half> noundef %p0) { +entry: + ; CHECK: extractelement <3 x half> %{{.*}}, i64 0 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x half> %{{.*}}, i64 1 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; CHECK: extractelement <3 x half> %{{.*}}, i64 2 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) + + %hlsl.length = call half @llvm.dx.length.v3f16(<3 x half> %p0) + ret half %hlsl.length +} -; Function Attrs: convergent noinline nounwind optnone -define noundef float @"?test_length_half2@@YA$halff@T?$__vector@$halff@$01@__clang@@@Z"(<2 x float> noundef %p0) #0 { +define noundef half @test_length_half4(<4 x half> noundef %p0) { entry: - %p0.addr = alloca <2 x float>, align 8 - store <2 x float> %p0, ptr %p0.addr, align 8 - %0 = load <2 x float>, ptr %p0.addr, align 8 + ; CHECK: extractelement <4 x half> %{{.*}}, i64 0 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x half> %{{.*}}, i64 1 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x half> %{{.*}}, i64 2 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; CHECK: extractelement <4 x half> %{{.*}}, i64 3 + ; CHECK: fmul half %{{.*}}, %{{.*}} + ; CHECK: fadd half %{{.*}}, %{{.*}} + ; EXPCHECK: call half @llvm.sqrt.f16(half %{{.*}}) + ; DOPCHECK: call half @dx.op.unary.f16(i32 24, half %{{.*}}) + + %hlsl.length = call half @llvm.dx.length.v4f16(<4 x half> %p0) + ret half %hlsl.length +} +define noundef float @test_length_float(float noundef %p0) { +entry: + ; EXPCHECK: call float @llvm.fabs.f32(float %p0) + ; DOPCHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) + + %0 = call float @llvm.fabs.f32(float %p0) + ret float %0 +} + +define noundef float @test_length_float2(<2 x float> noundef %p0) { +entry: ; CHECK: extractelement <2 x float> %{{.*}}, i64 0 ; CHECK: fmul float %{{.*}}, %{{.*}} ; CHECK: extractelement <2 x float> %{{.*}}, i64 1 @@ -37,20 +91,12 @@ entry: ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) - %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %0) + %hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %p0) ret float %hlsl.length } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare float @llvm.dx.length.v2f32(<2 x float>) #2 - -; Function Attrs: convergent noinline nounwind optnone -define noundef float @"?test_length_half3@@YA$halff@T?$__vector@$halff@$02@__clang@@@Z"(<3 x float> noundef %p0) #0 { +define noundef float @test_length_float3(<3 x float> noundef %p0) { entry: - %p0.addr = alloca <3 x float>, align 16 - store <3 x float> %p0, ptr %p0.addr, align 16 - %0 = load <3 x float>, ptr %p0.addr, align 16 - ; CHECK: extractelement <3 x float> %{{.*}}, i64 0 ; CHECK: fmul float %{{.*}}, %{{.*}} ; CHECK: extractelement <3 x float> %{{.*}}, i64 1 @@ -62,20 +108,12 @@ entry: ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) - %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %0) + %hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %p0) ret float %hlsl.length } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare float @llvm.dx.length.v3f32(<3 x float>) #2 - -; Function Attrs: convergent noinline nounwind optnone -define noundef float @"?test_length_half4@@YA$halff@T?$__vector@$halff@$03@__clang@@@Z"(<4 x float> noundef %p0) #0 { +define noundef float @test_length_float4(<4 x float> noundef %p0) { entry: - %p0.addr = alloca <4 x float>, align 16 - store <4 x float> %p0, ptr %p0.addr, align 16 - %0 = load <4 x float>, ptr %p0.addr, align 16 - ; CHECK: extractelement <4 x float> %{{.*}}, i64 0 ; CHECK: fmul float %{{.*}}, %{{.*}} ; CHECK: extractelement <4 x float> %{{.*}}, i64 1 @@ -90,18 +128,6 @@ entry: ; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) ; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) - %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %0) + %hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %p0) ret float %hlsl.length } - -attributes #0 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -attributes #2 = { nocallback nofree nosync nounwind willreturn } -attributes #3 = { memory(none) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 4, !"dx.disable_optimizations", i32 1} -!2 = !{!"clang version 20.0.0git (g...@github.com:bob80905/llvm-project.git 2fa4ffdc63e699e2b0e3c44e5dfb95284dbc5f6b)"} \ No newline at end of file diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll new file mode 100644 index 0000000000000..9c747c36804a6 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/length_error.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation length does not support double overload type +; CHECK: LLVM ERROR: Invalid Overload + +define noundef double @test_length_double2(<2 x double> noundef %p0) { +entry: + %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0) + ret double %hlsl.length +} \ No newline at end of file >From 0af5ca8847c7927fb66fc075e1c97606f1a35833 Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Wed, 31 Jul 2024 11:27:57 -0700 Subject: [PATCH 6/8] remove target triple variable, define target triple in flag on dxil-op-lower run line --- llvm/test/CodeGen/DirectX/length.ll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll index b5ca4b0dc5cbf..c1133f973332d 100644 --- a/llvm/test/CodeGen/DirectX/length.ll +++ b/llvm/test/CodeGen/DirectX/length.ll @@ -1,7 +1,5 @@ ; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK -; RUN: opt -S -dxil-op-lower < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK - -target triple = "dxilv1.3-pc-shadermodel6.3-library" +; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK declare half @llvm.fabs.f16(half) declare half @llvm.dx.length.v2f16(<2 x half>) >From 8857963c5a537b0fc1e5003bac67bebf234c5d57 Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Wed, 31 Jul 2024 11:31:23 -0700 Subject: [PATCH 7/8] add new line to length_error.ll --- llvm/test/CodeGen/DirectX/length_error.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/DirectX/length_error.ll b/llvm/test/CodeGen/DirectX/length_error.ll index 9c747c36804a6..952c9155b2599 100644 --- a/llvm/test/CodeGen/DirectX/length_error.ll +++ b/llvm/test/CodeGen/DirectX/length_error.ll @@ -7,4 +7,4 @@ define noundef double @test_length_double2(<2 x double> noundef %p0) { entry: %hlsl.length = call double @llvm.dx.length.v2f32(<2 x double> %p0) ret double %hlsl.length -} \ No newline at end of file +} >From 9ac29685a8f3841b8260f509dd337e38921ea854 Mon Sep 17 00:00:00 2001 From: Joshua Batista <jbati...@microsoft.com> Date: Wed, 31 Jul 2024 11:53:11 -0700 Subject: [PATCH 8/8] add test description on length.ll --- llvm/test/CodeGen/DirectX/length.ll | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/CodeGen/DirectX/length.ll b/llvm/test/CodeGen/DirectX/length.ll index c1133f973332d..fc7384267912c 100644 --- a/llvm/test/CodeGen/DirectX/length.ll +++ b/llvm/test/CodeGen/DirectX/length.ll @@ -1,6 +1,8 @@ ; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK ; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK +; Make sure dxil operation function calls for length are generated for half/float. + declare half @llvm.fabs.f16(half) declare half @llvm.dx.length.v2f16(<2 x half>) declare half @llvm.dx.length.v3f16(<3 x half>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits