Author: Florian Hahn Date: 2021-11-02T15:01:42+01:00 New Revision: 7999355106fb2fcc8de243d2e34b4b73ae4f3d2f
URL: https://github.com/llvm/llvm-project/commit/7999355106fb2fcc8de243d2e34b4b73ae4f3d2f DIFF: https://github.com/llvm/llvm-project/commit/7999355106fb2fcc8de243d2e34b4b73ae4f3d2f.diff LOG: [Clang] Add min/max reduction builtins. This patch implements __builtin_reduce_max and __builtin_reduce_min as specified in D111529. The order of operations does not matter for min or max reductions and they can be directly lowered to the corresponding llvm.vector.reduce.{fmin,fmax,umin,umax,smin,smax} intrinsic calls. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D112001 Added: clang/test/CodeGen/builtins-reduction-math.c clang/test/Sema/builtins-reduction-math.c Modified: clang/include/clang/Basic/Builtins.def clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 7d331a86126f1..b05777889e79a 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -646,6 +646,8 @@ BUILTIN(__builtin_call_with_static_chain, "v.", "nt") BUILTIN(__builtin_elementwise_abs, "v.", "nct") BUILTIN(__builtin_elementwise_max, "v.", "nct") BUILTIN(__builtin_elementwise_min, "v.", "nct") +BUILTIN(__builtin_reduce_max, "v.", "nct") +BUILTIN(__builtin_reduce_min, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d37c8e9266e9b..a67ef684f1e5c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11313,7 +11313,7 @@ def err_builtin_invalid_arg_type: Error < "%ordinal0 argument must be a " "%select{vector, integer or floating point type|matrix|" "pointer to a valid matrix element type|" - "signed integer or floating point type}1 (was %2)">; + "signed integer or floating point type|vector type}1 (was %2)">; def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 11e157bc7d731..909328b164d5c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12732,6 +12732,7 @@ class Sema final { bool SemaBuiltinElementwiseMath(CallExpr *TheCall); bool SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall); + bool SemaBuiltinReduceMath(CallExpr *TheCall); // Matrix builtin handling. ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 501b0e3c34436..fab21e5b588a5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3145,6 +3145,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_reduce_max: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs<VectorType>()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + else + return llvm::Intrinsic::vector_reduce_umax; + } + return llvm::Intrinsic::vector_reduce_fmax; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_reduce_min: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs<VectorType>()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + else + return llvm::Intrinsic::vector_reduce_umin; + } + return llvm::Intrinsic::vector_reduce_fmin; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>(); Value *MatValue = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index bf458f914c111..84cebb03b5f02 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1985,6 +1985,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (SemaBuiltinElementwiseMath(TheCall)) return ExprError(); break; + case Builtin::BI__builtin_reduce_max: + case Builtin::BI__builtin_reduce_min: + if (SemaBuiltinReduceMath(TheCall)) + return ExprError(); + break; case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); @@ -16596,6 +16601,26 @@ bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) { return false; } +bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) { + if (checkArgCount(*this, TheCall, 1)) + return true; + + ExprResult A = UsualUnaryConversions(TheCall->getArg(0)); + if (A.isInvalid()) + return true; + + TheCall->setArg(0, A.get()); + const VectorType *TyA = A.get()->getType()->getAs<VectorType>(); + if (!TyA) { + SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc(); + return Diag(ArgLoc, diag::err_builtin_invalid_arg_type) + << 1 << /* vector ty*/ 4 << A.get()->getType(); + } + + TheCall->setType(TyA->getElementType()); + return false; +} + ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult) { if (checkArgCount(*this, TheCall, 1)) diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c new file mode 100644 index 0000000000000..417caed494d90 --- /dev/null +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef short int si8 __attribute__((ext_vector_type(8))); +typedef unsigned int u4 __attribute__((ext_vector_type(4))); + +__attribute__((address_space(1))) float4 vf1_as_one; + +void test_builtin_reduce_max(float4 vf1, si8 vi1, u4 vu1) { + // CHECK-LABEL: define void @test_builtin_reduce_max( + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1]]) + float r1 = __builtin_reduce_max(vf1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_max(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_max(vu1); + + // CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16 + // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1_AS1]]) + // CHECK-NEXT: fpext float [[RDX1]] to double + const double r4 = __builtin_reduce_max(vf1_as_one); + + // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16 + // CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[CVI1]]) + // CHECK-NEXT: sext i16 [[RDX2]] to i64 + const si8 cvi1 = vi1; + unsigned long long r5 = __builtin_reduce_max(cvi1); +} + +void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) { + // CHECK-LABEL: define void @test_builtin_reduce_min( + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1]]) + float r1 = __builtin_reduce_min(vf1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_min(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_min(vu1); + + // CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16 + // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1_AS1]]) + // CHECK-NEXT: fpext float [[RDX1]] to double + const double r4 = __builtin_reduce_min(vf1_as_one); + + // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16 + // CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[CVI1]]) + // CHECK-NEXT: sext i16 [[RDX2]] to i64 + const si8 cvi1 = vi1; + unsigned long long r5 = __builtin_reduce_min(cvi1); +} diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c new file mode 100644 index 0000000000000..0d1aecaa99c30 --- /dev/null +++ b/clang/test/Sema/builtins-reduction-math.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 %s -pedantic -verify -triple=x86_64-apple-darwin9 + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef int int3 __attribute__((ext_vector_type(3))); +typedef unsigned unsigned4 __attribute__((ext_vector_type(4))); + +struct Foo { + char *p; +}; + +void test_builtin_reduce_max(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_max(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_max(v, v); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_max(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_max(i); + // expected-error@-1 {{1st argument must be a vector type (was 'int')}} +} + +void test_builtin_reduce_min(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_min(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_min(v, v); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_min(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_min(i); + // expected-error@-1 {{1st argument must be a vector type (was 'int')}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits