Author: Lawrence Benson Date: 2024-04-29T16:45:33+02:00 New Revision: bd07c22e5372789c3eb47b9009029d5e99e0ef9f
URL: https://github.com/llvm/llvm-project/commit/bd07c22e5372789c3eb47b9009029d5e99e0ef9f DIFF: https://github.com/llvm/llvm-project/commit/bd07c22e5372789c3eb47b9009029d5e99e0ef9f.diff LOG: [Clang] Add support for scalable vectors in __builtin_reduce_* functions (#87750) Currently, a lot of `__builtin_reduce_*` function do not support scalable vectors, i.e., ARM SVE and RISCV V. This PR adds support for them. The main code change is to use a different path to extract the type from the vectors, the rest is the same and LLVM supports the reduce functions for `vscale` vectors. This PR adds scalable vector support for: - `__builtin_reduce_add` - `__builtin_reduce_mul` - `__builtin_reduce_xor` - `__builtin_reduce_or` - `__builtin_reduce_and` - `__builtin_reduce_min` - `__builtin_reduce_max` Note: For all except `min/max`, the element type must still be an integer value. Adding floating point support for `add` and `mul` is still an open TODO. Added: Modified: clang/docs/LanguageExtensions.rst clang/docs/ReleaseNotes.rst clang/include/clang/AST/Type.h clang/lib/AST/Type.cpp clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtins-reduction-math.c Removed: ################################################################################ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 127d1b6dd48252..87cb743856b07e 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -711,6 +711,8 @@ even-odd element pair with indices ``i * 2`` and ``i * 2 + 1`` with power of 2, the vector is widened with neutral elements for the reduction at the end to the next power of 2. +These reductions support both fixed-sized and scalable vector types. + Example: .. code-block:: c++ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 347c81253d39e6..4cb2462ae64956 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -222,6 +222,7 @@ Non-comprehensive list of changes in this release - ``__typeof_unqual__`` is available in all C modes as an extension, which behaves like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``. +- ``__builtin_reduce_{add|mul|xor|or|and|min|max}`` builtins now support scalable vectors. * Shared libraries linked with either the ``-ffast-math``, ``-Ofast``, or ``-funsafe-math-optimizations`` flags will no longer enable flush-to-zero diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index dff02d4861b3db..fa2b47ed3d2350 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2378,6 +2378,10 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { /// 'riscv_rvv_vector_bits' type attribute as VectorType. QualType getRVVEltType(const ASTContext &Ctx) const; + /// Returns the representative type for the element of a sizeless vector + /// builtin type. + QualType getSizelessVectorEltType(const ASTContext &Ctx) const; + /// Types are partitioned into 3 broad categories (C99 6.2.5p1): /// object types, function types, and incomplete types. diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 8aaa6801d85b8b..68e81f45b4c28e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2510,6 +2510,18 @@ bool Type::isSveVLSBuiltinType() const { return false; } +QualType Type::getSizelessVectorEltType(const ASTContext &Ctx) const { + assert(isSizelessVectorType() && "Must be sizeless vector type"); + // Currently supports SVE and RVV + if (isSVESizelessBuiltinType()) + return getSveEltType(Ctx); + + if (isRVVSizelessBuiltinType()) + return getRVVEltType(Ctx); + + llvm_unreachable("Unhandled type"); +} + QualType Type::getSveEltType(const ASTContext &Ctx) const { assert(isSveVLSBuiltinType() && "unsupported type!"); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d08ab539148914..a370734e00d3e1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3885,9 +3885,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_reduce_max: { - auto GetIntrinsicID = [](QualType QT) { + auto GetIntrinsicID = [this](QualType QT) { if (auto *VecTy = QT->getAs<VectorType>()) QT = VecTy->getElementType(); + else if (QT->isSizelessVectorType()) + QT = QT->getSizelessVectorEltType(CGM.getContext()); + if (QT->isSignedIntegerType()) return llvm::Intrinsic::vector_reduce_smax; if (QT->isUnsignedIntegerType()) @@ -3900,9 +3903,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_reduce_min: { - auto GetIntrinsicID = [](QualType QT) { + auto GetIntrinsicID = [this](QualType QT) { if (auto *VecTy = QT->getAs<VectorType>()) QT = VecTy->getElementType(); + else if (QT->isSizelessVectorType()) + QT = QT->getSizelessVectorEltType(CGM.getContext()); + if (QT->isSignedIntegerType()) return llvm::Intrinsic::vector_reduce_smin; if (QT->isUnsignedIntegerType()) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e33113ab9c4c1d..e26cf20d10363b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3164,13 +3164,20 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, const Expr *Arg = TheCall->getArg(0); const auto *TyA = Arg->getType()->getAs<VectorType>(); - if (!TyA) { + + QualType ElTy; + if (TyA) + ElTy = TyA->getElementType(); + else if (Arg->getType()->isSizelessVectorType()) + ElTy = Arg->getType()->getSizelessVectorEltType(Context); + + if (ElTy.isNull()) { Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* vector ty*/ 4 << Arg->getType(); return ExprError(); } - TheCall->setType(TyA->getElementType()); + TheCall->setType(ElTy); break; } @@ -3186,12 +3193,20 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, const Expr *Arg = TheCall->getArg(0); const auto *TyA = Arg->getType()->getAs<VectorType>(); - if (!TyA || !TyA->getElementType()->isIntegerType()) { + + QualType ElTy; + if (TyA) + ElTy = TyA->getElementType(); + else if (Arg->getType()->isSizelessVectorType()) + ElTy = Arg->getType()->getSizelessVectorEltType(Context); + + if (ElTy.isNull() || !ElTy->isIntegerType()) { Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) << 1 << /* vector of integers */ 6 << Arg->getType(); return ExprError(); } - TheCall->setType(TyA->getElementType()); + + TheCall->setType(ElTy); break; } diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c index 34f39cea5265ea..acafe9222d59fd 100644 --- a/clang/test/CodeGen/builtins-reduction-math.c +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=SVE %s + typedef float float4 __attribute__((ext_vector_type(4))); typedef short int si8 __attribute__((ext_vector_type(8))); typedef unsigned int u4 __attribute__((ext_vector_type(4))); @@ -134,3 +137,53 @@ void test_builtin_reduce_and(si8 vi1, u4 vu1) { // CHECK-NEXT: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[VU1]]) unsigned r3 = __builtin_reduce_and(vu1); } + +#if defined(__ARM_FEATURE_SVE) +#include <arm_sve.h> + +void test_builtin_reduce_SVE(int a, unsigned long long b, short c, float d) { + // SVE-LABEL: void @test_builtin_reduce_SVE( + + svint32_t vec_a = svdup_s32(a); + svuint64_t vec_b = svdup_u64(b); + svint16_t vec_c1 = svdup_s16(c); + svuint16_t vec_c2 = svdup_u16(c); + svfloat32_t vec_d = svdup_f32(d); + + // SVE: [[VF1:%.+]] = load <vscale x 4 x i32>, ptr %vec_a + // SVE-NEXT: call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[VF1]]) + int r1 = __builtin_reduce_add(vec_a); + + // SVE: [[VF2:%.+]] = load <vscale x 4 x i32>, ptr %vec_a + // SVE-NEXT: call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> [[VF2]]) + int r2 = __builtin_reduce_mul(vec_a); + + // SVE: [[VF3:%.+]] = load <vscale x 2 x i64>, ptr %vec_b + // SVE-NEXT: call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> [[VF3]]) + long long r3 = __builtin_reduce_xor(vec_b); + + // SVE: [[VF4:%.+]] = load <vscale x 2 x i64>, ptr %vec_b + // SVE-NEXT: call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[VF4]]) + long long r4 = __builtin_reduce_or(vec_b); + + // SVE: [[VF5:%.+]] = load <vscale x 2 x i64>, ptr %vec_b + // SVE-NEXT: call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> [[VF5]]) + long long r5 = __builtin_reduce_and(vec_b); + + // SVE: [[VF6:%.+]] = load <vscale x 8 x i16>, ptr %vec_c1 + // SVE-NEXT: call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> [[VF6]]) + short r6 = __builtin_reduce_max(vec_c1); + + // SVE: [[VF7:%.+]] = load <vscale x 8 x i16>, ptr %vec_c2 + // SVE-NEXT: call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> [[VF7]]) + unsigned short r7 = __builtin_reduce_min(vec_c2); + + // SVE: [[VF8:%.+]] = load <vscale x 4 x float>, ptr %vec_d + // SVE-NEXT: call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[VF8]]) + float r8 = __builtin_reduce_max(vec_d); + + // SVE: [[VF9:%.+]] = load <vscale x 4 x float>, ptr %vec_d + // SVE-NEXT: call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[VF9]]) + float r9 = __builtin_reduce_min(vec_d); +} +#endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits