r324988 - [DebugInfo] Avoid name conflict of generated VLA expression variable.
Author: s.desmalen Date: Mon Feb 12 23:49:34 2018 New Revision: 324988 URL: http://llvm.org/viewvc/llvm-project?rev=324988&view=rev Log: [DebugInfo] Avoid name conflict of generated VLA expression variable. Summary: This patch also adds the 'DW_AT_artificial' flag to the generated variable. Addresses the issues mentioned in http://llvm.org/PR30553. Reviewers: CarlosAlbertoEnciso, probinson, aprantl Reviewed By: aprantl Subscribers: JDevlieghere, cfe-commits Differential Revision: https://reviews.llvm.org/D43189 Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/test/CodeGen/debug-info-vla.c cfe/trunk/test/CodeGenCXX/debug-info-vla.cpp cfe/trunk/test/OpenMP/target_codegen.cpp cfe/trunk/test/OpenMP/target_parallel_codegen.cpp cfe/trunk/test/OpenMP/target_parallel_for_codegen.cpp cfe/trunk/test/OpenMP/target_parallel_for_simd_codegen.cpp cfe/trunk/test/OpenMP/target_simd_codegen.cpp cfe/trunk/test/OpenMP/target_teams_codegen.cpp cfe/trunk/test/OpenMP/target_teams_distribute_codegen.cpp cfe/trunk/test/OpenMP/target_teams_distribute_simd_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=324988&r1=324987&r2=324988&view=diff == --- cfe/trunk/lib/CodeGen/CGDecl.cpp (original) +++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon Feb 12 23:49:34 2018 @@ -969,8 +969,8 @@ void CodeGenFunction::EmitAndRegisterVar if (auto *C = dyn_cast(VlaSize.NumElts)) Dimensions.emplace_back(C, Type1D.getUnqualifiedType()); else { - auto SizeExprAddr = - CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), "vla_expr"); + auto SizeExprAddr = CreateDefaultAlignTempAlloca( + VlaSize.NumElts->getType(), "__vla_expr"); Builder.CreateStore(VlaSize.NumElts, SizeExprAddr); Dimensions.emplace_back(SizeExprAddr.getPointer(), Type1D.getUnqualifiedType()); @@ -999,6 +999,7 @@ void CodeGenFunction::EmitAndRegisterVar getContext(), const_cast(D.getDeclContext()), D.getLocation(), D.getLocation(), &NameIdent, QT, getContext().CreateTypeSourceInfo(QT), SC_Auto); + ArtificialDecl->setImplicit(); MD = DI->EmitDeclareOfAutoVariable(ArtificialDecl, VlaSize.NumElts, Builder); Modified: cfe/trunk/test/CodeGen/debug-info-vla.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/debug-info-vla.c?rev=324988&r1=324987&r2=324988&view=diff == --- cfe/trunk/test/CodeGen/debug-info-vla.c (original) +++ cfe/trunk/test/CodeGen/debug-info-vla.c Mon Feb 12 23:49:34 2018 @@ -2,9 +2,9 @@ void testVLAwithSize(int s) { -// CHECK-DAG: dbg.declare({{.*}} %vla_expr, metadata ![[VLAEXPR:[0-9]+]] +// CHECK-DAG: dbg.declare({{.*}} %__vla_expr, metadata ![[VLAEXPR:[0-9]+]] // CHECK-DAG: dbg.declare({{.*}} %vla, metadata ![[VAR:[0-9]+]] -// CHECK-DAG: ![[VLAEXPR]] = !DILocalVariable(name: "vla_expr" +// CHECK-DAG: ![[VLAEXPR]] = !DILocalVariable(name: "__vla_expr", {{.*}} flags: DIFlagArtificial // CHECK-DAG: ![[VAR]] = !DILocalVariable(name: "vla",{{.*}} line: [[@LINE+2]] // CHECK-DAG: !DISubrange(count: ![[VLAEXPR]]) int vla[s]; Modified: cfe/trunk/test/CodeGenCXX/debug-info-vla.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/debug-info-vla.cpp?rev=324988&r1=324987&r2=324988&view=diff == --- cfe/trunk/test/CodeGenCXX/debug-info-vla.cpp (original) +++ cfe/trunk/test/CodeGenCXX/debug-info-vla.cpp Mon Feb 12 23:49:34 2018 @@ -13,7 +13,7 @@ int (*fp)(int[][*]) = nullptr; // CHECK: [[ELEM_TYPE]] = !{[[NOCOUNT:.*]]} // CHECK: [[NOCOUNT]] = !DISubrange(count: -1) // -// CHECK: [[VAR:![0-9]+]] = !DILocalVariable(name: "vla_expr" +// CHECK: [[VAR:![0-9]+]] = !DILocalVariable(name: "__vla_expr", {{.*}}flags: DIFlagArtificial // CHECK: !DICompositeType(tag: DW_TAG_array_type, // CHECK-NOT: size: // CHECK-SAME: elements: [[ELEM_TYPE:![0-9]+]] Modified: cfe/trunk/test/OpenMP/target_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_codegen.cpp?rev=324988&r1=324987&r2=324988&view=diff == --- cfe/trunk/test/OpenMP/target_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/target_codegen.cpp Mon Feb 12 23:49:34 2018 @@ -511,7 +511,7 @@ int bar(int n){ // CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], // CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], -// CHECK-32: store i32 %{{.+}}, i32* %vla_expr +// CHECK-32: store i32 %{{.+}}, i32* %__vla_expr // CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
r347571 - [AArch64] Add aarch64_vector_pcs function attribute to Clang
Author: s.desmalen Date: Mon Nov 26 08:38:37 2018 New Revision: 347571 URL: http://llvm.org/viewvc/llvm-project?rev=347571&view=rev Log: [AArch64] Add aarch64_vector_pcs function attribute to Clang This is the Clang patch to complement the following LLVM patches: https://reviews.llvm.org/D51477 https://reviews.llvm.org/D51479 More information describing the vector ABI and procedure call standard can be found here: https://developer.arm.com/products/software-development-tools/\ hpc/arm-compiler-for-hpc/vector-function-abi Patch by Kerry McLaughlin. Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D54425 Added: cfe/trunk/test/CodeGen/aarch64-vpcs.c cfe/trunk/test/Sema/aarch64-vpcs.c Modified: cfe/trunk/include/clang-c/Index.h cfe/trunk/include/clang/Basic/Attr.td cfe/trunk/include/clang/Basic/AttrDocs.td cfe/trunk/include/clang/Basic/Specifiers.h cfe/trunk/lib/AST/ItaniumMangle.cpp cfe/trunk/lib/AST/Type.cpp cfe/trunk/lib/AST/TypePrinter.cpp cfe/trunk/lib/Basic/Targets/AArch64.cpp cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/lib/CodeGen/CGDebugInfo.cpp cfe/trunk/lib/Sema/SemaDeclAttr.cpp cfe/trunk/lib/Sema/SemaType.cpp cfe/trunk/test/Sema/callingconv.c cfe/trunk/tools/libclang/CXType.cpp Modified: cfe/trunk/include/clang-c/Index.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang-c/Index.h?rev=347571&r1=347570&r2=347571&view=diff == --- cfe/trunk/include/clang-c/Index.h (original) +++ cfe/trunk/include/clang-c/Index.h Mon Nov 26 08:38:37 2018 @@ -3336,6 +3336,7 @@ enum CXCallingConv { CXCallingConv_Swift = 13, CXCallingConv_PreserveMost = 14, CXCallingConv_PreserveAll = 15, + CXCallingConv_AArch64VectorCall = 16, CXCallingConv_Invalid = 100, CXCallingConv_Unexposed = 200 Modified: cfe/trunk/include/clang/Basic/Attr.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=347571&r1=347570&r2=347571&view=diff == --- cfe/trunk/include/clang/Basic/Attr.td (original) +++ cfe/trunk/include/clang/Basic/Attr.td Mon Nov 26 08:38:37 2018 @@ -1785,6 +1785,11 @@ def Pcs : DeclOrTypeAttr { let Documentation = [PcsDocs]; } +def AArch64VectorPcs: DeclOrTypeAttr { + let Spellings = [Clang<"aarch64_vector_pcs">]; + let Documentation = [AArch64VectorPcsDocs]; +} + def Pure : InheritableAttr { let Spellings = [GCC<"pure">]; let Documentation = [Undocumented]; Modified: cfe/trunk/include/clang/Basic/AttrDocs.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AttrDocs.td?rev=347571&r1=347570&r2=347571&view=diff == --- cfe/trunk/include/clang/Basic/AttrDocs.td (original) +++ cfe/trunk/include/clang/Basic/AttrDocs.td Mon Nov 26 08:38:37 2018 @@ -1742,6 +1742,31 @@ similar to ``stdcall`` on x86. Valid par }]; } +def AArch64VectorPcsDocs : Documentation { + let Category = DocCatCallingConvs; + let Content = [{ +On AArch64 targets, this attribute changes the calling convention of a +function to preserve additional floating-point and Advanced SIMD registers +relative to the default calling convention used for AArch64. + +This means it is more efficient to call such functions from code that performs +extensive floating-point and vector calculations, because fewer live SIMD and FP +registers need to be saved. This property makes it well-suited for e.g. +floating-point or vector math library functions, which are typically leaf +functions that require a small number of registers. + +However, using this attribute also means that it is more expensive to call +a function that adheres to the default calling convention from within such +a function. Therefore, it is recommended that this attribute is only used +for leaf functions. + +For more information, see the documentation for `aarch64_vector_pcs`_ on +the Arm Developer website. + +.. _`aarch64_vector_pcs`: https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi + }]; +} + def RegparmDocs : Documentation { let Category = DocCatCallingConvs; let Content = [{ Modified: cfe/trunk/include/clang/Basic/Specifiers.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Specifiers.h?rev=347571&r1=347570&r2=347571&view=diff == --- cfe/trunk/include/clang/Basic/Specifiers.h (original) +++ cfe/trunk/include/clang/Basic/Specifiers.h Mon Nov 26 08:38:37 2018 @@ -251,6 +251,7 @@ namespace clang { CC_Swift,// __attribute__((swiftcall)) CC_PreserveMost, // __attribute__((preserve_most)) CC_PreserveAll, // __attribute__((preserve_all)) +CC_AArch64VectorCall, // __attribute
r366878 - [SVE][Inline-Asm] Add support to specify SVE registers in the clobber list
Author: s.desmalen Date: Wed Jul 24 01:42:34 2019 New Revision: 366878 URL: http://llvm.org/viewvc/llvm-project?rev=366878&view=rev Log: [SVE][Inline-Asm] Add support to specify SVE registers in the clobber list Adds the SVE vector and predicate registers to the list of known registers. Patch by Kerry McLaughlin. Reviewers: erichkeane, sdesmalen, rengolin Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D64739 Added: cfe/trunk/test/CodeGen/aarch64-sve-inline-asm.c Modified: cfe/trunk/lib/Basic/Targets/AArch64.cpp Modified: cfe/trunk/lib/Basic/Targets/AArch64.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AArch64.cpp?rev=366878&r1=366877&r2=366878&view=diff == --- cfe/trunk/lib/Basic/Targets/AArch64.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AArch64.cpp Wed Jul 24 01:42:34 2019 @@ -351,10 +351,19 @@ const char *const AArch64TargetInfo::GCC "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", -// Vector registers +// Neon vector registers "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", -"v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" +"v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + +// SVE vector registers +"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", +"z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", +"z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", + +// SVE predicate registers +"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", +"p11", "p12", "p13", "p14", "p15" }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { Added: cfe/trunk/test/CodeGen/aarch64-sve-inline-asm.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-sve-inline-asm.c?rev=366878&view=auto == --- cfe/trunk/test/CodeGen/aarch64-sve-inline-asm.c (added) +++ cfe/trunk/test/CodeGen/aarch64-sve-inline-asm.c Wed Jul 24 01:42:34 2019 @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK + +void test_sve_asm() { + asm volatile( + "ptrue p0.d\n" + "ptrue p15.d\n" + "add z0.d, p0/m, z0.d, z0.d\n" + "add z31.d, p0/m, z31.d, z31.d\n" + : + : + : "z0", "z31", "p0", "p15"); + // CHECK: "~{z0},~{z31},~{p0},~{p15}" +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r367301 - [AArch64] Disable __ARM_FEATURE_SVE without ACLE.
Author: s.desmalen Date: Tue Jul 30 03:14:39 2019 New Revision: 367301 URL: http://llvm.org/viewvc/llvm-project?rev=367301&view=rev Log: [AArch64] Disable __ARM_FEATURE_SVE without ACLE. The Arm C Language Extensions for SVE document specifies that __ARM_FEATURE_SVE should be set when the compiler supports SVE and implements all the extensions described in the document. This is currently not yet the case, so the feature should be disabled until the compiler can provide all the extensions as described. Reviewers: c-rhodes, rengolin, rovka, ktkachov Reviewed By: rengolin Differential Revision: https://reviews.llvm.org/D65404 Modified: cfe/trunk/lib/Basic/Targets/AArch64.cpp cfe/trunk/test/Preprocessor/aarch64-target-features.c Modified: cfe/trunk/lib/Basic/Targets/AArch64.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AArch64.cpp?rev=367301&r1=367300&r2=367301&view=diff == --- cfe/trunk/lib/Basic/Targets/AArch64.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AArch64.cpp Tue Jul 30 03:14:39 2019 @@ -196,9 +196,6 @@ void AArch64TargetInfo::getTargetDefines Builder.defineMacro("__ARM_NEON_FP", "0xE"); } - if (FPU & SveMode) -Builder.defineMacro("__ARM_FEATURE_SVE", "1"); - if (HasCRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); Modified: cfe/trunk/test/Preprocessor/aarch64-target-features.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/aarch64-target-features.c?rev=367301&r1=367300&r2=367301&view=diff == --- cfe/trunk/test/Preprocessor/aarch64-target-features.c (original) +++ cfe/trunk/test/Preprocessor/aarch64-target-features.c Tue Jul 30 03:14:39 2019 @@ -88,7 +88,7 @@ // RUN: %clang -target aarch64 -mtune=cyclone -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MTUNE-CYCLONE %s // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE %s -// CHECK-SVE: __ARM_FEATURE_SVE 1 +// CHECK-SVE-NOT: __ARM_FEATURE_SVE 1 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8.2a+dotprod -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DOTPROD %s // CHECK-DOTPROD: __ARM_FEATURE_DOTPROD 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] f6ea026 - [SveEmitter] Fix encoding/decoding of SVETypeFlags
Author: Sander de Smalen Date: 2020-04-14T15:48:28+01:00 New Revision: f6ea026f179a02cd335d71e4800b4d67c25c006b URL: https://github.com/llvm/llvm-project/commit/f6ea026f179a02cd335d71e4800b4d67c25c006b DIFF: https://github.com/llvm/llvm-project/commit/f6ea026f179a02cd335d71e4800b4d67c25c006b.diff LOG: [SveEmitter] Fix encoding/decoding of SVETypeFlags Summary: This issue was introduced when reworking D75861. The bug isn't actually hit with current unit tests because the contiguous loads/stores infer the EltType and the MemEltType from the pointer and result, rather than using the flags. But it will be needed for other intrinsics, such as gather/scatter. Reviewers: SjoerdMeijer, Andrzej Reviewed By: SjoerdMeijer Subscribers: andwar, tschuett, cfe-commits, llvm-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76617 Added: Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 99af2412426f..468167957974 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -17,6 +17,7 @@ #include #include "clang/Basic/Builtins.h" +#include "llvm/Support/MathExtras.h" #undef PPC namespace clang { @@ -163,6 +164,9 @@ namespace clang { /// Flags to identify the types for overloaded SVE builtins. class SVETypeFlags { uint64_t Flags; +unsigned EltTypeShift; +unsigned MemEltTypeShift; +unsigned MergeTypeShift; public: #define LLVM_GET_SVE_TYPEFLAGS @@ -181,15 +185,27 @@ namespace clang { #undef LLVM_GET_SVE_MEMELTTYPES }; -SVETypeFlags(uint64_t F) : Flags(F) {} -SVETypeFlags(EltType ET, bool IsUnsigned) : Flags(ET) {} +enum MergeType { +#define LLVM_GET_SVE_MERGETYPES +#include "clang/Basic/arm_sve_typeflags.inc" +#undef LLVM_GET_SVE_MERGETYPES +}; +SVETypeFlags(uint64_t F) : Flags(F) { + EltTypeShift = llvm::countTrailingZeros(EltTypeMask); + MemEltTypeShift = llvm::countTrailingZeros(MemEltTypeMask); + MergeTypeShift = llvm::countTrailingZeros(MergeTypeMask); +} EltType getEltType() const { - return (EltType)((Flags & EltTypeMask) - FirstEltType); + return (EltType)((Flags & EltTypeMask) >> EltTypeShift); } MemEltType getMemEltType() const { - return (MemEltType)((Flags & MemEltTypeMask) - FirstMemEltType); + return (MemEltType)((Flags & MemEltTypeMask) >> MemEltTypeShift); +} + +MergeType getMergeType() const { + return (MergeType)((Flags & MergeTypeMask) >> MergeTypeShift); } bool isLoad() const { return Flags & IsLoad; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4e75d0339b5c..9ed4ae86e1b7 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -84,15 +84,16 @@ // Y: const pointer to uint32_t // Z: const pointer to uint64_t -class MergeType { +class MergeType { int Value = val; + string Suffix = suffix; } def MergeNone: MergeType<0>; -def MergeAny : MergeType<1>; -def MergeOp1 : MergeType<2>; -def MergeZero: MergeType<3>; -def MergeAnyExp : MergeType<4>; // Use merged builtin with explicit -def MergeZeroExp : MergeType<5>; // generation of its inactive argument. +def MergeAny : MergeType<1, "_x">; +def MergeOp1 : MergeType<2, "_m">; +def MergeZero: MergeType<3, "_z">; +def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit +def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument. class EltType { int Value = val; @@ -134,13 +135,17 @@ def FirstMemEltType : FlagType<0x0010>; // : : // : : def MemEltTypeMask: FlagType<0x0070>; -def IsLoad: FlagType<0x0080>; -def IsStore : FlagType<0x0100>; -def IsGatherLoad : FlagType<0x0200>; -def IsScatterStore: FlagType<0x0400>; -def IsStructLoad : FlagType<0x0800>; -def IsStructStore : FlagType<0x1000>; -def IsZExtReturn : FlagType<0x2000>; // Return value is sign-extend by default +def FirstMergeTypeMask: FlagType<0x0080>; +// : : +// : : +def MergeTypeMask : FlagType<0x0380>; +def IsLoad: FlagType<0x2000>; +def IsStore : FlagType<0x4000>; +def IsGatherLoad : FlagType<0x8000>; +def IsScatterStore: FlagType<0x0001>; +def IsStructLoad : FlagType<0x0002
[clang] c8a5b30 - [SveEmitter] Add range checks for immediates and predicate patterns.
Author: Sander de Smalen Date: 2020-04-14T16:49:32+01:00 New Revision: c8a5b30bac695e9fbb592cf77364a60ebd6e0dbd URL: https://github.com/llvm/llvm-project/commit/c8a5b30bac695e9fbb592cf77364a60ebd6e0dbd DIFF: https://github.com/llvm/llvm-project/commit/c8a5b30bac695e9fbb592cf77364a60ebd6e0dbd.diff LOG: [SveEmitter] Add range checks for immediates and predicate patterns. Summary: This patch adds a mechanism to easily add range checks for a builtin's immediate operands. This patch is tested with the qdech intrinsic, which takes both an enum for the predicate pattern, as well as an immediate for the multiplier. Reviewers: efriedma, SjoerdMeijer, rovka Reviewed By: efriedma, SjoerdMeijer Subscribers: mgorny, tschuett, mgrang, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76678 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdech.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qdech.c Modified: clang/include/clang/Basic/CMakeLists.txt clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/include/clang/Sema/Sema.h clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/Sema/SemaChecking.cpp clang/utils/TableGen/SveEmitter.cpp clang/utils/TableGen/TableGen.cpp clang/utils/TableGen/TableGenBackends.h Removed: diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 5eda48e8f250..47a3198a0e91 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -69,6 +69,9 @@ clang_tablegen(arm_sve_builtin_cg.inc -gen-arm-sve-builtin-codegen clang_tablegen(arm_sve_typeflags.inc -gen-arm-sve-typeflags SOURCE arm_sve.td TARGET ClangARMSveTypeFlags) +clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks + SOURCE arm_sve.td + TARGET ClangARMSveSemaRangeChecks) clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def SOURCE arm_cde.td TARGET ClangARMCdeBuiltinsDef) diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 468167957974..0821926a6881 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -190,6 +190,13 @@ namespace clang { #include "clang/Basic/arm_sve_typeflags.inc" #undef LLVM_GET_SVE_MERGETYPES }; + +enum ImmCheckType { +#define LLVM_GET_SVE_IMMCHECKTYPES +#include "clang/Basic/arm_sve_typeflags.inc" +#undef LLVM_GET_SVE_IMMCHECKTYPES +}; + SVETypeFlags(uint64_t F) : Flags(F) { EltTypeShift = llvm::countTrailingZeros(EltTypeMask); MemEltTypeShift = llvm::countTrailingZeros(MemEltTypeMask); diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 9ed4ae86e1b7..75fd3ca499d0 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -61,6 +61,10 @@ // d: default // c: const pointer type // P: predicate type +// +// i: constant uint64_t +// +// I: Predicate Pattern (sv_pattern) // l: int64_t @@ -147,9 +151,22 @@ def IsStructLoad : FlagType<0x0002>; def IsStructStore : FlagType<0x0004>; def IsZExtReturn : FlagType<0x0008>; // Return value is sign-extend by default +// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h +class ImmCheckType { + int Value = val; +} +def ImmCheck0_31: ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns) +def ImmCheck1_16: ImmCheckType<1>; // 1..16 + +class ImmCheck { + int Arg = arg; + int EltSizeArg = eltSizeArg; + ImmCheckType Kind = kind; +} + // Every intrinsic subclasses Inst. class Inst ft, MemEltType met> { + list ft, list ch, MemEltType met> { string Name = n; string Prototype = p; string Types = t; @@ -158,13 +175,21 @@ class Inst Flags = ft; + list ImmChecks = ch; int MemEltType = met.Value; } +// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8") +class SInst ft = [], list ch = []> +: Inst { +} + // MInst: Instructions which access memory class MInst f, -MemEltType met=MemEltTyDefault, string i=""> - : Inst {} +MemEltType met = MemEltTyDefault, string i = ""> +: Inst { +} // Loads @@ -256,3 +281,8 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + +
[clang] a8fbbf8 - [SveEmitter] NFC: Add missing ACLE tests
Author: Sander de Smalen Date: 2020-06-10T08:29:34+01:00 New Revision: a8fbbf8fe251652847bd98a5800a27375e13b1f9 URL: https://github.com/llvm/llvm-project/commit/a8fbbf8fe251652847bd98a5800a27375e13b1f9 DIFF: https://github.com/llvm/llvm-project/commit/a8fbbf8fe251652847bd98a5800a27375e13b1f9.diff LOG: [SveEmitter] NFC: Add missing ACLE tests These ACLE tests were missing in previous patches: - D79357: [SveEmitter] Add builtins for svdup and svindex - D78747: [SveEmitter] Add builtins for compares and ReverseCompare flag. - D76238: [SveEmitter] Implement builtins for contiguous loads/stores Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1sb.c Modified: Removed: diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c new file mode 100644 index ..c5c307122545 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svacge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) +{ + // CHECK-LABEL: test_svacge_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.facge.nxv8f16( %[[PG]], %op1, %op2) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svacge,_f16,,)(pg, op1, op2); +} + +svbool_t test_svacge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) +{ + // CHECK-LABEL: test_svacge_f32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.facge.nxv4f32( %[[PG]], %op1, %op2) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svacge,_f32,,)(pg, op1, op2); +} + +svbool_t test_svacge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) +{ + // CHECK-LABEL: test_svacge_f64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.facge.nxv2f64( %[[PG]], %op1, %op2) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svacge,_f64,,)(pg, op1, op2); +} + +svbool_t test_svacge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) +{ + // CHECK-LABEL: test_svacge_n_f32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.facge.nxv4f32( %[[PG]], %op1, %[[DUP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svacge,_n_f32,,)(pg, op1, op2); +} + +svbool_t test_svacge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) +{ + // CHECK-LABEL: test_svacge_n_f64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.facge.nxv2f64( %[[PG]], %op1, %[[DUP]]) + // CHECK: %[[CAST:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %[[INTRINSIC]]) + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svacge,_n_f64,,)(pg, op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c new file mode 100644 index ..00e408ca4d94 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -We
[clang] 9810035 - [SVE] Ensure proper mangling of ACLE tuple types
Author: Sander de Smalen Date: 2020-06-15T07:36:12+01:00 New Revision: 98100353d784e599fca502a34490603942f1930c URL: https://github.com/llvm/llvm-project/commit/98100353d784e599fca502a34490603942f1930c DIFF: https://github.com/llvm/llvm-project/commit/98100353d784e599fca502a34490603942f1930c.diff LOG: [SVE] Ensure proper mangling of ACLE tuple types The AAPCS specifies that the tuple types such as `svint32x2_t` should use their `arm_sve.h` names when mangled instead of their builtin names. This patch also renames the internal types for the tuples to be prefixed with `__clang_`, so they are not misinterpreted as specified internal types like the non-tuple types which *are* defined in the AAPCS. Using a builtin type for the tuples is a purely a choice of the Clang implementation. Reviewers: rsandifo-arm, c-rhodes, efriedma, rengolin Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D81721 Added: Modified: clang/include/clang/Basic/AArch64SVEACLETypes.def clang/lib/AST/ASTContext.cpp clang/lib/AST/ItaniumMangle.cpp clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 2daf4c76a1ad..b0950d1058bf 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -38,83 +38,84 @@ //===--===// #ifndef SVE_VECTOR_TYPE -#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP) \ +#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \ +IsSigned, IsFP) \ SVE_TYPE(Name, Id, SingletonId) #endif #ifndef SVE_PREDICATE_TYPE -#define SVE_PREDICATE_TYPE(Name, Id, SingletonId, NumEls)\ +#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \ SVE_TYPE(Name, Id, SingletonId) #endif //===- Vector point types ---===// -SVE_VECTOR_TYPE("__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, true, false) -SVE_VECTOR_TYPE("__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false) -SVE_VECTOR_TYPE("__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false) -SVE_VECTOR_TYPE("__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false) +SVE_VECTOR_TYPE("__SVInt8_t", "__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, true, false) +SVE_VECTOR_TYPE("__SVInt16_t", "__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false) +SVE_VECTOR_TYPE("__SVInt32_t", "__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false) +SVE_VECTOR_TYPE("__SVInt64_t", "__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false) -SVE_VECTOR_TYPE("__SVUint8_t", SveUint8, SveUint8Ty, 16, 8, false, false) -SVE_VECTOR_TYPE("__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false) -SVE_VECTOR_TYPE("__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false) -SVE_VECTOR_TYPE("__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false) +SVE_VECTOR_TYPE("__SVUint8_t", "__SVUint8_t", SveUint8, SveUint8Ty, 16, 8, false, false) +SVE_VECTOR_TYPE("__SVUint16_t", "__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false) +SVE_VECTOR_TYPE("__SVUint32_t", "__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false) +SVE_VECTOR_TYPE("__SVUint64_t", "__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false) -SVE_VECTOR_TYPE("__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true) -SVE_VECTOR_TYPE("__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true) -SVE_VECTOR_TYPE("__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true) +SVE_VECTOR_TYPE("__SVFloat16_t", "__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true) +SVE_VECTOR_TYPE("__SVFloat32_t", "__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true) +SVE_VECTOR_TYPE("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true) // // x2 // -SVE_VECTOR_TYPE("__SVInt8x2_t", SveInt8x2, SveInt8x2Ty, 32, 8, true, false) -SVE_VECTOR_TYPE("__SVInt16x2_t", SveInt16x2, SveInt16x2Ty, 16, 16, true, false) -SVE_VECTOR_TYPE("__SVInt32x2_t", SveInt32x2, SveInt32x2Ty, 8, 32, true, false) -SVE_VECTOR_TYPE("__SVInt64x2_t", SveInt64x2, SveInt64x2Ty, 4, 64, true, false) +SVE_VECTOR_TYPE("__clang_svint8x2_t", "svint8x2_t", SveInt8x2, SveInt8x2Ty, 32, 8, true, false) +SVE_VECTOR_TYPE("__clang_svint16x2_t", "svint16x2_t", SveInt16x2, SveInt16x2Ty, 16, 16, true, false) +SVE_VECTOR_TYPE("__clang_svint32x2_t", "svint32x2_t", SveInt32x2, SveInt32x2Ty, 8, 32, true, false) +SVE_VECTOR_TYPE("__clang_svint64x2_t", "svint64x2_t", SveInt64x2, SveInt64x2Ty, 4, 64, true, false) -SVE_VECTOR_TYPE("__SVUint8x2_t", SveUint8x2, SveUint8x2Ty, 32, 8, false, false) -SVE_VECTOR_TYPE("__SV
[clang] 91a4a59 - [SveEmitter] Add SVE tuple types and builtins for svundef.
Author: Sander de Smalen Date: 2020-06-15T07:36:01+01:00 New Revision: 91a4a592edb01ca1968b5e5abe3da21709bb72d6 URL: https://github.com/llvm/llvm-project/commit/91a4a592edb01ca1968b5e5abe3da21709bb72d6 DIFF: https://github.com/llvm/llvm-project/commit/91a4a592edb01ca1968b5e5abe3da21709bb72d6.diff LOG: [SveEmitter] Add SVE tuple types and builtins for svundef. This patch adds new SVE types to Clang that describe tuples of SVE vectors. For example `svint32x2_t` which maps to the twice-as-wide vector ``. Similarly, `svint32x3_t` will map to ``. It also adds builtins to return an `undef` vector for a given SVE type. Reviewers: c-rhodes, david-arm, ctetreau, efriedma, rengolin Reviewed By: c-rhodes Tags: #clang Differential Revision: https://reviews.llvm.org/D81459 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c Modified: clang/include/clang/Basic/AArch64SVEACLETypes.def clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenTypes.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index afa651841861..2daf4c76a1ad 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -63,6 +63,57 @@ SVE_VECTOR_TYPE("__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true) SVE_VECTOR_TYPE("__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true) SVE_VECTOR_TYPE("__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true) +// +// x2 +// +SVE_VECTOR_TYPE("__SVInt8x2_t", SveInt8x2, SveInt8x2Ty, 32, 8, true, false) +SVE_VECTOR_TYPE("__SVInt16x2_t", SveInt16x2, SveInt16x2Ty, 16, 16, true, false) +SVE_VECTOR_TYPE("__SVInt32x2_t", SveInt32x2, SveInt32x2Ty, 8, 32, true, false) +SVE_VECTOR_TYPE("__SVInt64x2_t", SveInt64x2, SveInt64x2Ty, 4, 64, true, false) + +SVE_VECTOR_TYPE("__SVUint8x2_t", SveUint8x2, SveUint8x2Ty, 32, 8, false, false) +SVE_VECTOR_TYPE("__SVUint16x2_t", SveUint16x2, SveUint16x2Ty, 16, 16, false, false) +SVE_VECTOR_TYPE("__SVUint32x2_t", SveUint32x2, SveUint32x2Ty, 8, 32, false, false) +SVE_VECTOR_TYPE("__SVUint64x2_t", SveUint64x2, SveUint64x2Ty, 4, 64, false, false) + +SVE_VECTOR_TYPE("__SVFloat16x2_t", SveFloat16x2, SveFloat16x2Ty, 16, 16, true, true) +SVE_VECTOR_TYPE("__SVFloat32x2_t", SveFloat32x2, SveFloat32x2Ty, 8, 32, true, true) +SVE_VECTOR_TYPE("__SVFloat64x2_t", SveFloat64x2, SveFloat64x2Ty, 4, 64, true, true) + +// +// x3 +// +SVE_VECTOR_TYPE("__SVInt8x3_t", SveInt8x3, SveInt8x3Ty, 48, 8, true, false) +SVE_VECTOR_TYPE("__SVInt16x3_t", SveInt16x3, SveInt16x3Ty, 24, 16, true, false) +SVE_VECTOR_TYPE("__SVInt32x3_t", SveInt32x3, SveInt32x3Ty, 12, 32, true, false) +SVE_VECTOR_TYPE("__SVInt64x3_t", SveInt64x3, SveInt64x3Ty, 6, 64, true, false) + +SVE_VECTOR_TYPE("__SVUint8x3_t", SveUint8x3, SveUint8x3Ty, 48, 8, false, false) +SVE_VECTOR_TYPE("__SVUint16x3_t", SveUint16x3, SveUint16x3Ty, 24, 16, false, false) +SVE_VECTOR_TYPE("__SVUint32x3_t", SveUint32x3, SveUint32x3Ty, 12, 32, false, false) +SVE_VECTOR_TYPE("__SVUint64x3_t", SveUint64x3, SveUint64x3Ty, 6, 64, false, false) + +SVE_VECTOR_TYPE("__SVFloat16x3_t", SveFloat16x3, SveFloat16x3Ty, 24, 16, true, true) +SVE_VECTOR_TYPE("__SVFloat32x3_t", SveFloat32x3, SveFloat32x3Ty, 12, 32, true, true) +SVE_VECTOR_TYPE("__SVFloat64x3_t", SveFloat64x3, SveFloat64x3Ty, 6, 64, true, true) + +// +// x4 +// +SVE_VECTOR_TYPE("__SVInt8x4_t", SveInt8x4, SveInt8x4Ty, 64, 8, true, false) +SVE_VECTOR_TYPE("__SVInt16x4_t", SveInt16x4, SveInt16x4Ty, 32, 16, true, false) +SVE_VECTOR_TYPE("__SVInt32x4_t", SveInt32x4, SveInt32x4Ty, 16, 32, true, false) +SVE_VECTOR_TYPE("__SVInt64x4_t", SveInt64x4, SveInt64x4Ty, 8, 64, true, false) + +SVE_VECTOR_TYPE("__SVUint8x4_t", SveUint8x4, SveUint8x4Ty, 64, 8, false, false) +SVE_VECTOR_TYPE("__SVUint16x4_t", SveUint16x4, SveUint16x4Ty, 32, 16, false, false) +SVE_VECTOR_TYPE("__SVUint32x4_t", SveUint32x4, SveUint32x4Ty, 16, 32, false, false) +SVE_VECTOR_TYPE("__SVUint64x4_t", SveUint64x4, SveUint64x4Ty, 8, 64, false, false) + +SVE_VECTOR_TYPE("__SVFloat16x4_t", SveFloat16x4, SveFloat16x4Ty, 32, 16, true, true) +SVE_VECTOR_TYPE("__SVFloat32x4_t", SveFloat32x4, SveFloat32x4Ty, 16, 32, true, true) +SVE_VECTOR_TYPE("__SVFloat64x4_t", SveFloat64x4, SveFloat64x4Ty, 8, 64, true, true) + SVE_PREDICATE_TYPE("__SVBool_t", SveBool, SveBoolTy, 16) #undef SVE_VECTOR_TYPE diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 5fa5f9f0bcef..899f6b5b6f3c 100644 --- a/clan
[clang] a06b2cb - Fix clang/test/Modules/empty.modulemap by increasing limit.
Author: Sander de Smalen Date: 2020-06-15T12:27:25+01:00 New Revision: a06b2cb0f5df68e06f0cbce5f2d5fb9d3dcb57d6 URL: https://github.com/llvm/llvm-project/commit/a06b2cb0f5df68e06f0cbce5f2d5fb9d3dcb57d6 DIFF: https://github.com/llvm/llvm-project/commit/a06b2cb0f5df68e06f0cbce5f2d5fb9d3dcb57d6.diff LOG: Fix clang/test/Modules/empty.modulemap by increasing limit. This fixes a buildbot failure on Builder llvm-clang-win-x-aarch64. The size of the module increased to just over 30kb due to new Decl TypeDefs being added to the module after D81459 that adds new ACLE types. Added: Modified: clang/test/Modules/empty.modulemap Removed: diff --git a/clang/test/Modules/empty.modulemap b/clang/test/Modules/empty.modulemap index aa9eba66a4f8..3225d88829ae 100644 --- a/clang/test/Modules/empty.modulemap +++ b/clang/test/Modules/empty.modulemap @@ -13,8 +13,8 @@ // The module file should be identical each time we produce it. // RUN: diff %t/base.pcm %t/check.pcm // -// We expect an empty module to be less than 30KB (and at least 10K, for now). +// We expect an empty module to be less than 40KB (and at least 10K, for now). // RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s -// CHECK-SIZE: {{(^|[^0-9])[12][0-9][0-9][0-9][0-9]($|[^0-9])}} +// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}} module empty { header "Inputs/empty.h" export * } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] e51c1d0 - [SveEmitter] Add builtins for svtbl2
Author: Sander de Smalen Date: 2020-06-17T09:41:38+01:00 New Revision: e51c1d06a9922c3b6ce4b8b2e74126870ade1491 URL: https://github.com/llvm/llvm-project/commit/e51c1d06a9922c3b6ce4b8b2e74126870ade1491 DIFF: https://github.com/llvm/llvm-project/commit/e51c1d06a9922c3b6ce4b8b2e74126870ade1491.diff LOG: [SveEmitter] Add builtins for svtbl2 Reviewers: david-arm, efriedma, c-rhodes Reviewed By: c-rhodes Tags: #clang Differential Revision: https://reviews.llvm.org/D81462 Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 8c6abb1c3f4f..a7223f770455 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1838,6 +1838,7 @@ def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sv // SVE2 - Extended table lookup/permute let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfd", MergeNone>; def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3b3ea5e95705..b81b2a449425 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8265,6 +8265,29 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); return Builder.CreateMul(NumEls, Builder.CreateCall(F)); } + + case SVE::BI__builtin_sve_svtbl2_u8: + case SVE::BI__builtin_sve_svtbl2_s8: + case SVE::BI__builtin_sve_svtbl2_u16: + case SVE::BI__builtin_sve_svtbl2_s16: + case SVE::BI__builtin_sve_svtbl2_u32: + case SVE::BI__builtin_sve_svtbl2_s32: + case SVE::BI__builtin_sve_svtbl2_u64: + case SVE::BI__builtin_sve_svtbl2_s64: + case SVE::BI__builtin_sve_svtbl2_f16: + case SVE::BI__builtin_sve_svtbl2_f32: + case SVE::BI__builtin_sve_svtbl2_f64: { +SVETypeFlags TF(Builtin->TypeModifier); +auto VTy = cast(getSVEType(TF)); +auto TupleTy = llvm::VectorType::get(VTy->getElementType(), + VTy->getElementCount() * 2); +Function *FExtr = +CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); +Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)}); +Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)}); +Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); +return Builder.CreateCall(F, {V0, V1, Ops[1]}); + } } /// Should not happen diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c new file mode 100644 index ..d74b17f72663 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c @@ -0,0 +1,145 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svtbl2_s8(svint8x2_t data, svuint8_t indices) +{ + // CHECK-LABEL: test_svtbl2_s8 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %data, i32 1) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl2.nxv16i8( %[[V0]], %[[V1]], %indices) + // CHECK-NEXT: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbl2'}} + // expected-warning@+1 {{implicit declaration of function 'svtbl2_s8'}} + return SVE_ACLE_FUNC(svtbl2,_s8,,)(data, indices); +} + +svint16_t test_svtbl2_s16(svint16x2_t data, svuint16_t indices) +{ + /
[clang] 1d7b4a7 - [SveEmitter] Add builtins for tuple creation (svcreate2/svcreate3/etc)
Author: Sander de Smalen Date: 2020-06-18T10:07:09+01:00 New Revision: 1d7b4a7e5e4a25605ec9926da1fb461840a1f216 URL: https://github.com/llvm/llvm-project/commit/1d7b4a7e5e4a25605ec9926da1fb461840a1f216 DIFF: https://github.com/llvm/llvm-project/commit/1d7b4a7e5e4a25605ec9926da1fb461840a1f216.diff LOG: [SveEmitter] Add builtins for tuple creation (svcreate2/svcreate3/etc) The svcreate builtins allow constructing a tuple from individual vectors, e.g. svint32x2_t svcreate2(svint32_t v2, svint32_t v2)` Reviewers: c-rhodes, david-arm, efriedma Reviewed By: c-rhodes, efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D81463 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 899f6b5b6f3c..38d82d1d869f 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -247,6 +247,7 @@ namespace clang { bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; } bool isReverseUSDOT() const { return Flags & ReverseUSDOT; } bool isUndef() const { return Flags & IsUndef; } +bool isTupleCreate() const { return Flags & IsTupleCreate; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a7223f770455..0348a3754e22 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -200,6 +200,7 @@ def IsGatherPrefetch : FlagType<0x1000>; def ReverseCompare: FlagType<0x2000>; // Compare operands must be swapped. def ReverseUSDOT : FlagType<0x4000>; // Unsigned/signed operands must be swapped. def IsUndef : FlagType<0x8000>; // Codegen `undef` of given type. +def IsTupleCreate : FlagType<0x1>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -1279,6 +1280,10 @@ def SVUNDEF_2 : SInst<"svundef2_{d}", "2", "csilUcUsUiUlhfd", MergeNone, "", [Is def SVUNDEF_3 : SInst<"svundef3_{d}", "3", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef]>; def SVUNDEF_4 : SInst<"svundef4_{d}", "4", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef]>; +def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create2", [IsTupleCreate]>; +def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create3", [IsTupleCreate]>; +def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create4", [IsTupleCreate]>; + // SVE2 WhileGE/GT let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b81b2a449425..05de88c392aa 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4646,7 +4646,7 @@ struct ARMVectorIntrinsicInfo { unsigned BuiltinID; unsigned LLVMIntrinsic; unsigned AltLLVMIntrinsic; - unsigned TypeModifier; + uint64_t TypeModifier; bool operator<(unsigned RHSBuiltinID) const { return BuiltinID < RHSBuiltinID; @@ -7998,9 +7998,8 @@ static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, Ops.insert(Ops.begin(), SplatUndef); } -SmallVector -CodeGenFunction::getSVEOverloadTypes(SVETypeFlags TypeFlags, - ArrayRef Ops) { +SmallVector CodeGenFunction::getSVEOverloadTypes( +SVETypeFlags TypeFlags, llvm::Type *ResultType, ArrayRef Ops) { if (TypeFlags.isOverloadNone()) return {}; @@ -8015,6 +8014,9 @@ CodeGenFunction::getSVEOverloadTypes(SVETypeFlags TypeFlags, if (TypeFlags.isOverloadCvt()) return {Ops[0]->getType(), Ops.back()->getType()}; + if (TypeFlags.isTupleCreate()) +return {ResultType, Ops[0]->getType()}; + assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); return {DefaultType}; } @@ -8112,7 +8114,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, } Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, - getSVEOverloadTypes(TypeFlags, Ops)); + getSVEOverloadTypes(TypeFlags, Ty, Ops)); Value *Call = Builder.CreateC
[clang] 4ea8e27 - [SveEmitter] Add builtins to insert/extract subvectors from tuples (svget/svset)
Author: Sander de Smalen Date: 2020-06-18T11:06:16+01:00 New Revision: 4ea8e27a642c6f97ca69cd39bbe44f7366870f6c URL: https://github.com/llvm/llvm-project/commit/4ea8e27a642c6f97ca69cd39bbe44f7366870f6c DIFF: https://github.com/llvm/llvm-project/commit/4ea8e27a642c6f97ca69cd39bbe44f7366870f6c.diff LOG: [SveEmitter] Add builtins to insert/extract subvectors from tuples (svget/svset) For example: svint32_t svget4(svint32x4_t tuple, uint64_t imm_index) returns the subvector at `index`, which must be in range `0..3`. svint32x3_t svset3(svint32x3_t tuple, uint64_t index, svint32_t vec) returns a tuple vector with `vec` inserted into `tuple` at `index`, which must be in range `0..2`. Reviewers: c-rhodes, efriedma Reviewed By: c-rhodes Tags: #clang Differential Revision: https://reviews.llvm.org/D81464 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_get2.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_get3.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_get4.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_set2.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_set3.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_set4.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 38d82d1d869f..eba055c302a7 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -248,6 +248,8 @@ namespace clang { bool isReverseUSDOT() const { return Flags & ReverseUSDOT; } bool isUndef() const { return Flags & IsUndef; } bool isTupleCreate() const { return Flags & IsTupleCreate; } +bool isTupleGet() const { return Flags & IsTupleGet; } +bool isTupleSet() const { return Flags & IsTupleSet; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 0348a3754e22..7c8eb8d38f75 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -201,6 +201,8 @@ def ReverseCompare: FlagType<0x2000>; // Compare operands must b def ReverseUSDOT : FlagType<0x4000>; // Unsigned/signed operands must be swapped. def IsUndef : FlagType<0x8000>; // Codegen `undef` of given type. def IsTupleCreate : FlagType<0x1>; +def IsTupleGet: FlagType<0x2>; +def IsTupleSet: FlagType<0x4>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -219,6 +221,9 @@ def ImmCheckLaneIndexDot: ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt def ImmCheckComplexRot90_270: ImmCheckType<10>; // [90,270] def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] def ImmCheck0_13: ImmCheckType<12>; // 0..13 +def ImmCheck0_1 : ImmCheckType<13>; // 0..1 +def ImmCheck0_2 : ImmCheckType<14>; // 0..2 +def ImmCheck0_3 : ImmCheckType<15>; // 0..3 class ImmCheck { int Arg = arg; @@ -1284,6 +1289,17 @@ def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create3", [IsTupleCreate]>; def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_create4", [IsTupleCreate]>; + + +// Vector insertion and extraction +def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; +def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>; +def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_get", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; + +def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tuple_set", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; +def SVSET_3 : SInst<"
[clang] f255656 - [SVE] ACLE: Fix builtins for svdup_lane_bf16 and svcvtnt_bf16_f32_x
Author: Sander de Smalen Date: 2020-07-02T09:57:34+01:00 New Revision: f255656a97f7c83f7e049fd916278bbf7446651e URL: https://github.com/llvm/llvm-project/commit/f255656a97f7c83f7e049fd916278bbf7446651e DIFF: https://github.com/llvm/llvm-project/commit/f255656a97f7c83f7e049fd916278bbf7446651e.diff LOG: [SVE] ACLE: Fix builtins for svdup_lane_bf16 and svcvtnt_bf16_f32_x bfloat16 variants of svdup_lane were missing, and svcvtnt_bf16_x was implemented incorrectly (it takes an operand for the inactive lanes) Reviewers: fpetrogalli, efriedma Reviewed By: fpetrogalli Tags: #clang Differential Revision: https://reviews.llvm.org/D82908 Added: Modified: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 0711293c4f8a..19a42e79c36a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1110,7 +1110,7 @@ defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aar let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">; - defm SVCVTNT_BF16_F32 : SInstCvtMX<"svcvtnt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvtnt_bf16f32">; + def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone]>; } // svcvt_s##_f64 @@ -1204,6 +1204,11 @@ def SVCOMPACT: SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd",MergeNo // instruction such as DUP (indexed) if the lane index fits the range of the // instruction's immediate. def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { +def SVDUP_LANE_BF16 : + SInst<"svdup_lane[_{d}]", "ddL", "b", MergeNone, "aarch64_sve_tbl">; +} + def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">; diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c index 50c5fb543a70..b5828227cac4 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c @@ -10,18 +10,18 @@ #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 #endif -svbfloat16_t test_svcvtnt_bf16_f32_x(svbool_t pg, svfloat32_t op) { +svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) { // CHECK-LABEL: test_svcvtnt_bf16_f32_x // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) - // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( undef, %[[PG]], %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( %even, %[[PG]], %op) // CHECK: ret %[[INTRINSIC]] - return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(pg, op); + return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(even, pg, op); } -svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) { +svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) { // CHECK-LABEL: test_svcvtnt_bf16_f32_m // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) - // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( %inactive, %[[PG]], %op) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( %even, %[[PG]], %op) // CHECK: ret %[[INTRINSIC]] - return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(inactive, pg, op); + return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(even, pg, op); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c index 3830f482aff3..7ef02c3330e9 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c @@ -51,3 +51,13 @@ svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) { // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}} return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op); } + +svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index) +{ + // CHECK-LABEL: test_svdup_lane_bf16 + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 %index) + // CHECK: %[[INTRINSIC:.*]]
[clang] 121e585 - [AArch64][SVE] ACLE: Add bfloat16 to struct load/stores.
Author: Sander de Smalen Date: 2020-06-23T12:12:35+01:00 New Revision: 121e585ec8aa5f973ea56807ed0a73f13ab4937e URL: https://github.com/llvm/llvm-project/commit/121e585ec8aa5f973ea56807ed0a73f13ab4937e DIFF: https://github.com/llvm/llvm-project/commit/121e585ec8aa5f973ea56807ed0a73f13ab4937e.diff LOG: [AArch64][SVE] ACLE: Add bfloat16 to struct load/stores. This patch contains: - Support in LLVM CodeGen for bfloat16 types for ld2/3/4 and st2/3/4. - New bfloat16 ACLE builtins for svld(2|3|4)[_vnum] and svst(2|3|4)[_vnum] Reviewers: stuij, efriedma, c-rhodes, fpetrogalli Reviewed By: fpetrogalli Tags: #clang, #lldb, #llvm Differential Revision: https://reviews.llvm.org/D82187 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c Modified: clang/include/clang/Basic/AArch64SVEACLETypes.def clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CodeGenTypes.cpp clang/utils/TableGen/SveEmitter.cpp lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll Removed: diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 0640da83ebb3..b98a07436e94 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -66,7 +66,7 @@ SVE_VECTOR_TYPE("__SVFloat16_t", "__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 1 SVE_VECTOR_TYPE("__SVFloat32_t", "__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true, false) SVE_VECTOR_TYPE("__SVFloat64_t", "__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true, false) -SVE_VECTOR_TYPE("__SVBFloat16_t", "__SVBFloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, false, false, true) +SVE_VECTOR_TYPE("__SVBFloat16_t", "__SVBFloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, true, false, true) // // x2 @@ -85,6 +85,7 @@ SVE_VECTOR_TYPE("__clang_svfloat16x2_t", "svfloat16x2_t", SveFloat16x2, SveFloat SVE_VECTOR_TYPE("__clang_svfloat32x2_t", "svfloat32x2_t", SveFloat32x2, SveFloat32x2Ty, 8, 32, true, true, false) SVE_VECTOR_TYPE("__clang_svfloat64x2_t", "svfloat64x2_t", SveFloat64x2, SveFloat64x2Ty, 4, 64, true, true, false) +SVE_VECTOR_TYPE("__clang_svbfloat16x2_t", "svbfloat16x2_t", SveBFloat16x2, SveBFloat16x2Ty, 16, 16, true, false, true) // // x3 // @@ -102,6 +103,7 @@ SVE_VECTOR_TYPE("__clang_svfloat16x3_t", "svfloat16x3_t", SveFloat16x3, SveFloat SVE_VECTOR_TYPE("__clang_svfloat32x3_t", "svfloat32x3_t", SveFloat32x3, SveFloat32x3Ty, 12, 32, true, true, false) SVE_VECTOR_TYPE("__clang_svfloat64x3_t", "svfloat64x3_t", SveFloat64x3, SveFloat64x3Ty, 6, 64, true, true, false) +SVE_VECTOR_TYPE("__clang_svbfloat16x3_t", "svbfloat16x3_t", SveBFloat16x3, SveBFloat16x3Ty, 24, 16, true, false, true) // // x4 // @@ -119,6 +121,8 @@ SVE_VECTOR_TYPE("__clang_svfloat16x4_t", "svfloat16x4_t", SveFloat16x4, SveFloat SVE_VECTOR_TYPE("__clang_svfloat32x4_t", "svfloat32x4_t", SveFloat32x4, SveFloat32x4Ty, 16, 32, true, true, false) SVE_VECTOR_TYPE("__clang_svfloat64x4_t", "svfloat64x4_t", SveFloat64x4, SveFloat64x4Ty, 8, 64, true, true, false) +SVE_VECTOR_TYPE("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 32, 16, true, false, true) + SVE_PREDICATE_TYPE("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16) #undef SVE_VECTOR_TYPE diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index ba429433360c..c55af44bc5ad 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -480,15 +480,22 @@ def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoa // Load one quadword and replicate (scalar base) def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; +multiclass StructLoad { + def : SInst; + let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { +def: SInst; + } +} + // Load N-element structure into N vectors (scalar base) -def SVLD2 : SInst<"svld2[_{2}]", "2Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld2", [IsStructLoad]>; -def SVLD3 : SInst<"svld3[_{2}]", "3Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld3", [IsStructLoad]>; -def SVLD4 : SInst<"svld4[_{2}]", "4Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld4", [IsStructLoad]>; +defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2">; +defm SVLD3 : StructLoad<
[clang] 515020c - [SveEmitter] Add more immediate operand checks.
Author: Sander de Smalen Date: 2020-04-20T14:41:58+01:00 New Revision: 515020c091e74723ee0876229890d71a8aa79702 URL: https://github.com/llvm/llvm-project/commit/515020c091e74723ee0876229890d71a8aa79702 DIFF: https://github.com/llvm/llvm-project/commit/515020c091e74723ee0876229890d71a8aa79702.diff LOG: [SveEmitter] Add more immediate operand checks. This patch adds a number of intrinsics that take immediates with varying ranges based on the element size one of the operands. svext: immediate ranging 0 to (2048/sizeinbits(elt) - 1) svasrd: immediate ranging 1..sizeinbits(elt) svqshlu: immediate ranging 1..sizeinbits(elt)/2 ftmad: immediate ranging 0..(sizeinbits(elt) - 1) Reviewers: efriedma, SjoerdMeijer, rovka, rengolin Reviewed By: SjoerdMeijer Tags: #clang Differential Revision: https://reviews.llvm.org/D76679 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tmad.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_asrd.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_ext.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_tmad.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qshlu.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_shrnb.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qshlu.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_shrnb.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 75fd3ca499d0..9fe4715e4ea1 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -58,9 +58,11 @@ // --- // prototype: return (arg, arg, ...) // +// u: vector of unsigned integers // d: default // c: const pointer type // P: predicate type +// h: 1/2 width elements, 2x element count // // i: constant uint64_t // @@ -157,14 +159,18 @@ class ImmCheckType { } def ImmCheck0_31: ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns) def ImmCheck1_16: ImmCheckType<1>; // 1..16 +def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1) +def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt) +def ImmCheckShiftRightNarrow: ImmCheckType<4>; // 1..sizeinbits(elt)/2 +def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1) +def ImmCheck0_7 : ImmCheckType<6>; // 0..7 class ImmCheck { int Arg = arg; - int EltSizeArg = eltSizeArg; + int EltSizeArg = eltSizeArg; ImmCheckType Kind = kind; } -// Every intrinsic subclasses Inst. class Inst ft, list ch, MemEltType met> { string Name = n; @@ -282,6 +288,30 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + +// Permutations and selection +def SVEXT: SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; + + +// Shifts +def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; + + +// SVE2 - Narrowing DSP operations +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVSHRNB : SInst<"svshrnb[_n_{d}]","hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +} + + +// SVE2 - Uniform DSP operations +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +} + + +// Floating-point arithmetic +def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>; + // Saturating scalar arithmetic def SVQDECH_S : SInst<"svqdech_pat[_{d}]", "ddIi", "s", MergeNone, "aarch
[clang] fc64539 - [SveEmitter] Add immediate checks for lanes and complex imms
Author: Sander de Smalen Date: 2020-04-20T15:10:54+01:00 New Revision: fc645397498037ccb7df230a07e9a8762aaf8c8f URL: https://github.com/llvm/llvm-project/commit/fc645397498037ccb7df230a07e9a8762aaf8c8f DIFF: https://github.com/llvm/llvm-project/commit/fc645397498037ccb7df230a07e9a8762aaf8c8f.diff LOG: [SveEmitter] Add immediate checks for lanes and complex imms Adds another bunch of of intrinsics that take immediates with varying ranges based, some being a complex rotation immediate which are a set of allowed immediates rather than a range. svmla_lane: lane immediate ranging 0..(128/(1*sizeinbits(elt)) - 1) svcmla_lane: lane immediate ranging 0..(128/(2*sizeinbits(elt)) - 1) svdot_lane: lane immediate ranging 0..(128/(4*sizeinbits(elt)) - 1) svcadd: complex rotate immediate [90, 270] svcmla: svcmla_lane: complex rotate immediate [0, 90, 180, 270] Reviewers: efriedma, SjoerdMeijer, rovka Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D76680 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cmla.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_dot.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_mla.c Modified: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Basic/arm_sve.td clang/lib/Sema/SemaChecking.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 97ad1a6c7920..a64e313bf271 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9244,6 +9244,10 @@ def err_argument_not_shifted_byte : Error< "argument should be an 8-bit value shifted by a multiple of 8 bits">; def err_argument_not_shifted_byte_or_xxff : Error< "argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF">; +def err_rotation_argument_to_cadd +: Error<"argument should be the value 90 or 270">; +def err_rotation_argument_to_cmla +: Error<"argument should be the value 0, 90, 180 or 270">; def warn_neon_vector_initializer_non_portable : Warning< "vector initializers are not compatible with NEON intrinsics in big endian " "mode">, InGroup>; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 9fe4715e4ea1..84f03e60b51f 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -62,7 +62,10 @@ // d: default // c: const pointer type // P: predicate type +// e: 1/2 width unsigned elements, 2x element count // h: 1/2 width elements, 2x element count +// q: 1/4 width elements, 4x element count +// o: 4x width elements, 1/4 element count // // i: constant uint64_t // @@ -164,6 +167,11 @@ def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt) def ImmCheckShiftRightNarrow: ImmCheckType<4>; // 1..sizeinbits(elt)/2 def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1) def ImmCheck0_7 : ImmCheckType<6>; // 0..7 +def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1) +def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1) +def ImmCheckLaneIndexDot: ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1) +def ImmCheckComplexRot90_270: ImmCheckType<10>; // [90,270] +def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] class ImmCheck { int Arg = arg; @@ -312,7 +320,19 @@ def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aa // Floating-point arithmetic def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>; +def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "i", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, + ImmCheck<4, ImmCheckComplexRotAll90>]>; + +def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +
[clang] 9986b3d - [SveEmitter] Explicitly merge with zero/undef
Author: Sander de Smalen Date: 2020-04-20T16:26:20+01:00 New Revision: 9986b3de26d31be26d978194333c44e82873f3ff URL: https://github.com/llvm/llvm-project/commit/9986b3de26d31be26d978194333c44e82873f3ff DIFF: https://github.com/llvm/llvm-project/commit/9986b3de26d31be26d978194333c44e82873f3ff.diff LOG: [SveEmitter] Explicitly merge with zero/undef Builtins that have the merge type MergeAnyExp or MergeZeroExp, merge into a 'undef' or 'zero' vector respectively, which enables the _x and _z behaviour for unary operations. This patch also adds builtins for svabs and svneg. Reviewers: SjoerdMeijer, efriedma, rovka Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D77591 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_neg.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 84f03e60b51f..6f665d0c6716 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -296,6 +296,18 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + +// Integer arithmetic + +multiclass SInstZPZ flags=[]> { + def _M : SInst; + def _X : SInst; + def _Z : SInst; +} + +defm SVABS : SInstZPZ<"svabs", "csil", "aarch64_sve_abs">; +defm SVNEG : SInstZPZ<"svneg", "csil", "aarch64_sve_neg">; + // Permutations and selection def SVEXT: SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; @@ -318,6 +330,10 @@ def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aa // Floating-point arithmetic + +defm SVABS_F : SInstZPZ<"svabs", "hfd", "aarch64_sve_fabs">; +defm SVNEG_F : SInstZPZ<"svneg", "hfd", "aarch64_sve_fneg">; + def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>; def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "i", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 96c7c9ed2d7b..df45fef9d6c1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7591,6 +7591,18 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Builder.CreateCall(F, {Val, Predicate, BasePtr}); } +static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, + SmallVectorImpl &Ops) { + auto *SplatZero = Constant::getNullValue(Ty); + Ops.insert(Ops.begin(), SplatZero); +} + +static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, + SmallVectorImpl &Ops) { + auto *SplatUndef = UndefValue::get(Ty); + Ops.insert(Ops.begin(), SplatUndef); +} + Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { // Find out if any arguments are required to be integer constant expressions. @@ -7630,6 +7642,12 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, else if (Builtin->LLVMIntrinsic != 0) { llvm::Type* OverloadedTy = getSVEType(TypeFlags); +if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) + InsertExplicitZeroOperand(Builder, Ty, Ops); + +if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) + InsertExplicitUndefOperand(Builder, Ty, Ops); + // Predicates must match the main datatype. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { if (auto PredTy = dyn_cast(Ops[i]->getType())) diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c new file mode 100644 index ..2db01ff7d64c --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c @@ -0,0 +1,197 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-an
[clang] 662cbaf - [SveEmitter] Add IsOverloadNone flag and builtins for svpfalse and svcnt[bhwd]_pat
Author: Sander de Smalen Date: 2020-04-22T16:42:08+01:00 New Revision: 662cbaf6476b7cc58d0d71ff98d95d00ce5b420e URL: https://github.com/llvm/llvm-project/commit/662cbaf6476b7cc58d0d71ff98d95d00ce5b420e DIFF: https://github.com/llvm/llvm-project/commit/662cbaf6476b7cc58d0d71ff98d95d00ce5b420e.diff LOG: [SveEmitter] Add IsOverloadNone flag and builtins for svpfalse and svcnt[bhwd]_pat Add the IsOverloadNone flag to tell CGBuiltin that it does not have an overloaded type. This is used for e.g. svpfalse which does not take any arguments and always returns a svbool_t. This patch also adds builtins for svcntb_pat, svcnth_pat, svcntw_pat and svcntd_pat, as those don't require custom codegen. Reviewers: SjoerdMeijer, efriedma, rovka Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D77596 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfalse.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index d07b2d81988d..042f60739f85 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -233,6 +233,8 @@ namespace clang { bool isStructStore() const { return Flags & IsStructStore; } bool isZExtReturn() const { return Flags & IsZExtReturn; } bool isByteIndexed() const { return Flags & IsByteIndexed; } +bool isOverloadNone() const { return Flags & IsOverloadNone; } +bool isOverloadDefault() const { return !(Flags & OverloadKindMask); } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index cc579f773669..c2794356d251 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -70,6 +70,10 @@ // o: 4x width elements, 1/4 element count // // i: constant uint64_t +// k: int32_t +// l: int64_t +// m: uint32_t +// n: uint64_t // // I: Predicate Pattern (sv_pattern) @@ -163,6 +167,8 @@ def IsScatterStore: FlagType<0x0001>; def IsStructLoad : FlagType<0x0002>; def IsStructStore : FlagType<0x0004>; def IsZExtReturn : FlagType<0x0008>; // Return value is sign-extend by default +def IsOverloadNone: FlagType<0x0010>; // Intrinsic does not take any overloaded types. +def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. // : : // : : def IsByteIndexed : FlagType<0x0200>; @@ -542,6 +548,20 @@ def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fc def SVQDECH_S : SInst<"svqdech_pat[_{d}]", "ddIi", "s", MergeNone, "aarch64_sve_sqdech", [], [ImmCheck<2, ImmCheck1_16>]>; def SVQDECH_U : SInst<"svqdech_pat[_{d}]", "ddIi", "Us", MergeNone, "aarch64_sve_uqdech", [], [ImmCheck<2, ImmCheck1_16>]>; + + +// Predicate creation + +def SVPFALSE : SInst<"svpfalse[_b]", "P", "", MergeNone, "", [IsOverloadNone]>; + + + +// Counting elements + +def SVCNTB_PAT : SInst<"svcntb_pat", "nI", "", MergeNone, "aarch64_sve_cntb", [IsOverloadNone]>; +def SVCNTH_PAT : SInst<"svcnth_pat", "nI", "", MergeNone, "aarch64_sve_cnth", [IsOverloadNone]>; +def SVCNTW_PAT : SInst<"svcntw_pat", "nI", "", MergeNone, "aarch64_sve_cntw", [IsOverloadNone]>; +def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [IsOverloadNone]>; // Integer arithmetic def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3e24f0f53ab8..fa461a5dd9bf 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7773,6 +7773,18 @@ static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, Ops.insert(Ops.begin(), SplatUndef); } +SmallVector +CodeGenFunction::getSVEOverload
[clang] 1559485 - [SveEmitter] Add builtins for svwhile
Author: Sander de Smalen Date: 2020-04-22T21:47:47+01:00 New Revision: 1559485e600242343cb21c7ffbf345172008cd59 URL: https://github.com/llvm/llvm-project/commit/1559485e600242343cb21c7ffbf345172008cd59 DIFF: https://github.com/llvm/llvm-project/commit/1559485e600242343cb21c7ffbf345172008cd59.diff LOG: [SveEmitter] Add builtins for svwhile This also adds the IsOverloadWhile flag which tells CGBuiltin to use both the default type (predicate) and the type of the second operand (scalar) as the overloaded types for the LLMV IR intrinsic. Reviewers: SjoerdMeijer, efriedma, rovka Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D77595 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilele.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilelt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilege.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilegt.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 042f60739f85..25c738bc3796 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -234,6 +234,7 @@ namespace clang { bool isZExtReturn() const { return Flags & IsZExtReturn; } bool isByteIndexed() const { return Flags & IsByteIndexed; } bool isOverloadNone() const { return Flags & IsOverloadNone; } +bool isOverloadWhile() const { return Flags & IsOverloadWhile; } bool isOverloadDefault() const { return !(Flags & OverloadKindMask); } uint64_t getBits() const { return Flags; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index c2794356d251..fd15c86bec9f 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -168,6 +168,7 @@ def IsStructLoad : FlagType<0x0002>; def IsStructStore : FlagType<0x0004>; def IsZExtReturn : FlagType<0x0008>; // Return value is sign-extend by default def IsOverloadNone: FlagType<0x0010>; // Intrinsic does not take any overloaded types. +def IsOverloadWhile : FlagType<0x0020>; // Use {default type, typeof(operand1)} as overloaded types. def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. // : : // : : @@ -528,6 +529,18 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; } + +// While comparisons + +def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile]>; +def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile]>; +def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile]>; +def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile]>; +def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile]>; +def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile]>; +def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; +def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; + // Floating-point arithmetic @@ -567,6 +580,19 @@ def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [I def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; + +// SVE2 WhileGE/GT +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile]>; +def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "
[clang] 2d1baf6 - [SveEmitter] Add builtins for svwhilerw/svwhilewr
Author: Sander de Smalen Date: 2020-04-22T21:49:18+01:00 New Revision: 2d1baf606ab46daf9a322e5751d364c55c86deca URL: https://github.com/llvm/llvm-project/commit/2d1baf606ab46daf9a322e5751d364c55c86deca DIFF: https://github.com/llvm/llvm-project/commit/2d1baf606ab46daf9a322e5751d364c55c86deca.diff LOG: [SveEmitter] Add builtins for svwhilerw/svwhilewr This also adds the IsOverloadWhileRW flag which tells CGBuiltin to use the result predicate type and the first pointer type as the overloaded types for the LLVM IR intrinsic. Reviewers: SjoerdMeijer, efriedma Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78238 Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 25c738bc3796..661691e3d2a5 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -236,6 +236,7 @@ namespace clang { bool isOverloadNone() const { return Flags & IsOverloadNone; } bool isOverloadWhile() const { return Flags & IsOverloadWhile; } bool isOverloadDefault() const { return !(Flags & OverloadKindMask); } +bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index fd15c86bec9f..1feeeba6d780 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -169,6 +169,7 @@ def IsStructStore : FlagType<0x0004>; def IsZExtReturn : FlagType<0x0008>; // Return value is sign-extend by default def IsOverloadNone: FlagType<0x0010>; // Intrinsic does not take any overloaded types. def IsOverloadWhile : FlagType<0x0020>; // Use {default type, typeof(operand1)} as overloaded types. +def IsOverloadWhileRW : FlagType<0x0040>; // Use {pred(default type), typeof(operand0)} as overloaded types. def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. // : : // : : @@ -714,3 +715,17 @@ def SVSTNT1_SCATTER_INDEX_S : MInst<"svstnt1_scatter[_{2}base]_index[_{d}]", " def SVSTNT1H_SCATTER_INDEX_S : MInst<"svstnt1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl", [IsScatterStore], MemEltTyInt16, "aarch64_sve_stnt1_scatter_scalar_offset">; def SVSTNT1W_SCATTER_INDEX_S : MInst<"svstnt1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_stnt1_scatter_scalar_offset">; } + + +// SVE2 - Contiguous conflict detection +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW]>; +def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; +def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW]>; +def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW]>; + +def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW]>; +def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; +def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW]>; +def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; +} diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6c58cfd81acc..e1dd8f9bfda5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7511,6 +7511,30 @@ llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { } } +// Return the llvm predicate vector type corresponding to the specified element +// TypeFlags. +llvm::VectorType* CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { + switch (TypeFlags.getEltType()) { + default: llvm_unreachable("Unhandled SVETypeFlag!"); + + case SVETypeFlags::EltTyInt8: +return llvm::VectorType::get(Builder.getInt1Ty(), { 16, true }); +
[clang] 0021644 - [SveEmitter] Add builtins for FP conversions
Author: Sander de Smalen Date: 2020-04-23T10:49:06+01:00 New Revision: 002164461b52e0ff13fa677a535991f89da0f633 URL: https://github.com/llvm/llvm-project/commit/002164461b52e0ff13fa677a535991f89da0f633 DIFF: https://github.com/llvm/llvm-project/commit/002164461b52e0ff13fa677a535991f89da0f633.diff LOG: [SveEmitter] Add builtins for FP conversions This adds the flag IsOverloadCvt which tells CGBulitin to use the result type and the type of the last operand as the overloaded types for the LLVM IR intrinsic. This also adds the flag IsFPConvert, which is needed to avoid converting the predicate of the operation from svbool_t to a predicate with fewer lanes, as the LLVM IR intrinsics use the as the predicate. Reviewers: SjoerdMeijer, efriedma Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78239 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cvtlt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cvtnt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cvtx.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cvtxnt.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 661691e3d2a5..bfc50535fce4 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -237,6 +237,8 @@ namespace clang { bool isOverloadWhile() const { return Flags & IsOverloadWhile; } bool isOverloadDefault() const { return !(Flags & OverloadKindMask); } bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; } +bool isOverloadCvt() const { return Flags & IsOverloadCvt; } +bool isFPConvert() const { return Flags & IsFPConvert; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 1feeeba6d780..517be0544c94 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -170,10 +170,10 @@ def IsZExtReturn : FlagType<0x0008>; // Return value is sign-ex def IsOverloadNone: FlagType<0x0010>; // Intrinsic does not take any overloaded types. def IsOverloadWhile : FlagType<0x0020>; // Use {default type, typeof(operand1)} as overloaded types. def IsOverloadWhileRW : FlagType<0x0040>; // Use {pred(default type), typeof(operand0)} as overloaded types. +def IsOverloadCvt : FlagType<0x0080>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. -// : : -// : : -def IsByteIndexed : FlagType<0x0200>; +def IsByteIndexed : FlagType<0x0100>; +def IsFPConvert : FlagType<0x0200>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -553,6 +553,102 @@ def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftma def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "i", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; + +// Floating-point conversions + +multiclass SInstCvtMXZ< +string name, string m_types, string xz_types, string types, +string intrinsic, list flags = [IsFPConvert, IsOverloadNone]> { + def _M : SInst; + def _X : SInst; + def _Z : SInst; +} + +multiclass SInstCvtMX flags = [IsFPConvert, IsOverloadNone]> { + def _M : SInst; + def _X : SInst; +} + +// svcvt_s##_f16 +defm SVFCVTZS_S16_F16 : SInstCvtMXZ<"svcvt_s16[_f16]", "ddPO", "dPO", "s", "aarch64_sve_fcvtzs", [IsOverloadCvt]>; +defm SVFCVTZS_S32_F16 : SInstCvtMXZ<"svcvt_s32[_f16]", "ddPO", "dPO", "i", "aarch64_sve_fcvtzs_i32f16">; +defm SVFCVTZS_S64_F16 : SInstCvtMXZ<"svcvt_s64[_f16]", "ddPO", "dPO", "l", "aarch64_sve_fcvtzs_i64f16">; + +// svcvt_s##_f32 +defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aarch64_sve_fcvtzs", [IsOverloadCvt]>; +defm SVFCVTZS_S64_F32
[clang] c84e130 - [SveEmitter] Add builtins for scatter stores
Author: Sander de Smalen Date: 2020-04-24T10:57:43+01:00 New Revision: c84e1305c4faf92bb75cf265e022b39bf96da608 URL: https://github.com/llvm/llvm-project/commit/c84e1305c4faf92bb75cf265e022b39bf96da608 DIFF: https://github.com/llvm/llvm-project/commit/c84e1305c4faf92bb75cf265e022b39bf96da608.diff LOG: [SveEmitter] Add builtins for scatter stores D77735 only added scatters for the non-temporal variants. Reviewers: SjoerdMeijer, efriedma, andwar Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78751 Added: Modified: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 692e76aff9a3..f3dfe2c8bb1d 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -466,6 +466,75 @@ def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl",[IsSt def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; +// Store one vector (vector base) +def SVST1_SCATTER_BASES_U : MInst<"svst1_scatter[_{2}base_{d}]", "vPud", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">; +def SVST1B_SCATTER_BASES_U: MInst<"svst1b_scatter[_{2}base_{d}]", "vPud", "ilUiUl", [IsScatterStore], MemEltTyInt8, "aarch64_sve_st1_scatter_scalar_offset">; +def SVST1H_SCATTER_BASES_U: MInst<"svst1h_scatter[_{2}base_{d}]", "vPud", "ilUiUl", [IsScatterStore], MemEltTyInt16, "aarch64_sve_st1_scatter_scalar_offset">; +def SVST1W_SCATTER_BASES_U: MInst<"svst1w_scatter[_{2}base_{d}]", "vPud", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">; + +// Store one vector (scalar base, signed vector offset in bytes) +def SVST1_SCATTER_64B_OFFSETS_S : MInst<"svst1_scatter_[{3}]offset[_{d}]", "vPpxd", "lUld", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1_scatter">; +def SVST1B_SCATTER_64B_OFFSETS_SS : MInst<"svst1b_scatter_[{3}]offset[_{d}]", "vPAxd", "l",[IsScatterStore, IsByteIndexed], MemEltTyInt8, "aarch64_sve_st1_scatter">; +def SVST1B_SCATTER_64B_OFFSETS_SU : MInst<"svst1b_scatter_[{3}]offset[_{d}]", "vPExd", "Ul", [IsScatterStore, IsByteIndexed], MemEltTyInt8, "aarch64_sve_st1_scatter">; +def SVST1H_SCATTER_64B_OFFSETS_SS : MInst<"svst1h_scatter_[{3}]offset[_{d}]", "vPBxd", "l",[IsScatterStore, IsByteIndexed], MemEltTyInt16, "aarch64_sve_st1_scatter">; +def SVST1H_SCATTER_64B_OFFSETS_SU : MInst<"svst1h_scatter_[{3}]offset[_{d}]", "vPFxd", "Ul", [IsScatterStore, IsByteIndexed], MemEltTyInt16, "aarch64_sve_st1_scatter">; +def SVST1W_SCATTER_64B_OFFSETS_SS : MInst<"svst1w_scatter_[{3}]offset[_{d}]", "vPCxd", "l",[IsScatterStore, IsByteIndexed], MemEltTyInt32, "aarch64_sve_st1_scatter">; +def SVST1W_SCATTER_64B_OFFSETS_SU : MInst<"svst1w_scatter_[{3}]offset[_{d}]", "vPGxd", "Ul", [IsScatterStore, IsByteIndexed], MemEltTyInt32, "aarch64_sve_st1_scatter">; + +def SVST1_SCATTER_32B_OFFSETS_S : MInst<"svst1_scatter_[{3}]offset[_{d}]", "vPpxd", "iUif", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1_scatter_sxtw">; +def SVST1B_SCATTER_32B_OFFSETS_SS : MInst<"svst1b_scatter_[{3}]offset[_{d}]", "vPAxd", "i",[IsScatterStore, IsByteIndexed], MemEltTyInt8, "aarch64_sve_st1_scatter_sxtw">; +def SVST1B_SCATTER_32B_OFFSETS_SU : MInst<"svst1b_scatter_[{3}]offset[_{d}]", "vPExd", "Ui", [IsScatterStore, IsByteIndexed], MemEltTyInt8, "aarch64_sve_st1_scatter_sxtw">; +def SVST1H_SCATTER_32B_OFFSETS_SS : MInst<"svst1h_scatter_[{3}]offset[_{d}]", "vPBxd", "i",[IsScatterStore, IsByteIndexed], MemEltTyInt16, "aarch64_sve_st1_scatter_sxtw">; +def SVST1H_SCATTER_32B_OFFSETS_SU : MInst<"svst1h_scatter_[{3}]offset[_{d}]", "vPFxd", "Ui", [IsScatterStore, IsByteIndexed], MemEltTyInt16, "aarch64_sve_st1_scatter_sxtw">; + +// Store one vector (scalar base, unsigned vector offset in bytes) +def SVST1_SCATTER_64B_OFFSETS_U : MInst<"svst1_scatter_[{3}]offset[_{d}]", "vPpud", "lUld", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1_scatter">; +def SVST1B_SCATTER_64B_OFFSETS_US : MInst<"svst1b_scatter_[{3}]offset[_{d}]", "vPAud", "l",[IsScatterStore, IsByteIndexed], MemEltTyInt8, "aarch64_sve_st1_scatter">; +def SVST1B_SCATTER_64B_OFFSETS_UU : MInst<"svst1b_scatter_[{3}]offset[_{d}]", "vPEud", "Ul", [IsScatterStore, IsBy
[clang] db79974 - [SveEmitter] Add builtins for svld1rq
Author: Sander de Smalen Date: 2020-04-24T11:10:28+01:00 New Revision: db7997472bbd01645f058b23a4ca9e991d5b8144 URL: https://github.com/llvm/llvm-project/commit/db7997472bbd01645f058b23a4ca9e991d5b8144 DIFF: https://github.com/llvm/llvm-project/commit/db7997472bbd01645f058b23a4ca9e991d5b8144.diff LOG: [SveEmitter] Add builtins for svld1rq Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78748 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index f3dfe2c8bb1d..4b69cdcb6468 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -444,6 +444,8 @@ def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltT // Load one vector, unextended load, non-temporal (scalar base, VL displacement) def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; +// Load one quadword and replicate (scalar base) +def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; // Stores diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c new file mode 100644 index ..f0a3d5c1a62a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svld1rq_s8(svbool_t pg, const int8_t *base) +{ + // CHECK-LABEL: test_svld1rq_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pg, i8* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_s8,,)(pg, base); +} + +svint16_t test_svld1rq_s16(svbool_t pg, const int16_t *base) +{ + // CHECK-LABEL: test_svld1rq_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8i16( %[[PG]], i16* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_s16,,)(pg, base); +} + +svint32_t test_svld1rq_s32(svbool_t pg, const int32_t *base) +{ + // CHECK-LABEL: test_svld1rq_s32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv4i32( %[[PG]], i32* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_s32,,)(pg, base); +} + +svint64_t test_svld1rq_s64(svbool_t pg, const int64_t *base) +{ + // CHECK-LABEL: test_svld1rq_s64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv2i64( %[[PG]], i64* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_s64,,)(pg, base); +} + +svuint8_t test_svld1rq_u8(svbool_t pg, const uint8_t *base) +{ + // CHECK-LABEL: test_svld1rq_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv16i8( %pg, i8* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_u8,,)(pg, base); +} + +svuint16_t test_svld1rq_u16(svbool_t pg, const uint16_t *base) +{ + // CHECK-LABEL: test_svld1rq_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8i16( %[[PG]], i16* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_u16,,)(pg, base); +} + +svuint32_t test_svld1rq_u32(svbool_t pg, const uint32_t *base) +{ + // CHECK-LABEL: test_svld1rq_u32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1rq.nxv4i32( %[[PG]], i32* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1rq,_u32,,)(pg, base); +} + +svuint64_t test_svld1rq_u64(svbool_t pg, const uint64_t *base) +{ + // CHECK-LABEL: test_svld1rq_u64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.
[clang] 823e2a6 - [SveEmitter] Add builtins for contiguous prefetches
Author: Sander de Smalen Date: 2020-04-24T11:35:59+01:00 New Revision: 823e2a670a9da8e5cd8beed108355a168ca1a23b URL: https://github.com/llvm/llvm-project/commit/823e2a670a9da8e5cd8beed108355a168ca1a23b DIFF: https://github.com/llvm/llvm-project/commit/823e2a670a9da8e5cd8beed108355a168ca1a23b.diff LOG: [SveEmitter] Add builtins for contiguous prefetches This patch also adds the enum `sv_prfop` for the prefetch operation specifier and checks to ensure the passed enum values are valid. Reviewers: SjoerdMeijer, efriedma, ctetreau Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78674 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_prfb.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_prfd.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_prfh.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_prfw.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/lib/Sema/SemaChecking.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 5ef1a58fbb0f..1a9cb4cda1a4 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -238,6 +238,7 @@ namespace clang { bool isOverloadDefault() const { return !(Flags & OverloadKindMask); } bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; } bool isOverloadCvt() const { return Flags & IsOverloadCvt; } +bool isPrefetch() const { return Flags & IsPrefetch; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4b69cdcb6468..5709dc8b8781 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -58,6 +58,7 @@ // --- // prototype: return (arg, arg, ...) // +// v: void // x: vector of signed integers // u: vector of unsigned integers // d: default @@ -82,6 +83,7 @@ // M: svfloat32_t // N: svfloat64_t +// J: Prefetch type (sv_prfop) // A: pointer to int8_t // B: pointer to int16_t // C: pointer to int32_t @@ -176,6 +178,7 @@ def IsOverloadWhileRW : FlagType<0x0040>; // Use {pred(default type) def IsOverloadCvt : FlagType<0x0080>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. def IsByteIndexed : FlagType<0x0100>; +def IsPrefetch: FlagType<0x0800>; // Contiguous prefetches. // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -193,6 +196,7 @@ def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt def ImmCheckLaneIndexDot: ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1) def ImmCheckComplexRot90_270: ImmCheckType<10>; // [90,270] def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] +def ImmCheck0_13: ImmCheckType<12>; // 0..13 class ImmCheck { int Arg = arg; @@ -543,6 +547,21 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + +// Prefetches + +// Prefetch (Scalar base) +def SVPRFB : MInst<"svprfb", "vPcJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">; +def SVPRFH : MInst<"svprfh", "vPcJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">; +def SVPRFW : MInst<"svprfw", "vPcJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; +def SVPRFD : MInst<"svprfd", "vPcJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; + +// Prefetch (Scalar base, VL displacement) +def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPclJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">; +def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPclJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">; +def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; +def SVPRFD_VNUM : MIns
[clang] 3817ca7 - [SveEmitter] Add IsAppendSVALL and builtins for svptrue and svcnt[bhwd]
Author: Sander de Smalen Date: 2020-04-26T12:44:26+01:00 New Revision: 3817ca7dbf8d360fd6a2ff44722af1405722ad38 URL: https://github.com/llvm/llvm-project/commit/3817ca7dbf8d360fd6a2ff44722af1405722ad38 DIFF: https://github.com/llvm/llvm-project/commit/3817ca7dbf8d360fd6a2ff44722af1405722ad38.diff LOG: [SveEmitter] Add IsAppendSVALL and builtins for svptrue and svcnt[bhwd] Some ACLE builtins leave out the argument to specify the predicate pattern, which is expected to be expanded to an SV_ALL pattern. This patch adds the flag IsAppendSVALL to append SV_ALL as the final operand. Reviewers: SjoerdMeijer, efriedma, rovka, rengolin Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D77597 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptrue.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 1c193ea64a28..8d25dedfef7f 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -240,6 +240,7 @@ namespace clang { bool isOverloadCvt() const { return Flags & IsOverloadCvt; } bool isPrefetch() const { return Flags & IsPrefetch; } bool isReverseCompare() const { return Flags & ReverseCompare; } +bool isAppendSVALL() const { return Flags & IsAppendSVALL; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 9387f1d711f0..af6c971000f4 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -181,6 +181,7 @@ def IsOverloadWhileRW : FlagType<0x0040>; // Use {pred(default type) def IsOverloadCvt : FlagType<0x0080>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. def IsByteIndexed : FlagType<0x0100>; +def IsAppendSVALL : FlagType<0x0200>; // Appends SV_ALL as the last operand. def IsPrefetch: FlagType<0x0800>; // Contiguous prefetches. def ReverseCompare: FlagType<0x2000>; // Compare operands must be swapped. @@ -837,6 +838,8 @@ def SVQDECH_U : SInst<"svqdech_pat[_{d}]", "ddIi", "Us", MergeNone, "aarch64_s def SVPFALSE : SInst<"svpfalse[_b]", "P", "", MergeNone, "", [IsOverloadNone]>; +def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; +def SVPTRUE : SInst<"svptrue_{d}", "P", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; // Counting elements @@ -845,6 +848,11 @@ def SVCNTB_PAT : SInst<"svcntb_pat", "nI", "", MergeNone, "aarch64_sve_cntb", [I def SVCNTH_PAT : SInst<"svcnth_pat", "nI", "", MergeNone, "aarch64_sve_cnth", [IsOverloadNone]>; def SVCNTW_PAT : SInst<"svcntw_pat", "nI", "", MergeNone, "aarch64_sve_cntw", [IsOverloadNone]>; def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [IsOverloadNone]>; + +def SVCNTB : SInst<"svcntb", "n", "", MergeNone, "aarch64_sve_cntb", [IsAppendSVALL, IsOverloadNone]>; +def SVCNTH : SInst<"svcnth", "n", "", MergeNone, "aarch64_sve_cnth", [IsAppendSVALL, IsOverloadNone]>; +def SVCNTW : SInst<"svcntw", "n", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone]>; +def SVCNTD : SInst<"svcntd", "n", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; // Integer arithmetic def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 18ad1664aa56..58965efd5c44 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7908,6 +7908,11 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) InsertExplicitUndefOperand(Builder, Ty, Ops); +// Some ACLE builtins leave out the argument to specify the predicate +// pattern, which is expected to be expanded to an SV_ALL pattern. +if
[clang] 03f419f - [SveEmitter] IsInsertOp1SVALL and builtins for svqdec[bhwd] and svqinc[bhwd]
Author: Sander de Smalen Date: 2020-04-27T11:45:10+01:00 New Revision: 03f419f3eb0c426a0a555be9abf7255a89b131cd URL: https://github.com/llvm/llvm-project/commit/03f419f3eb0c426a0a555be9abf7255a89b131cd DIFF: https://github.com/llvm/llvm-project/commit/03f419f3eb0c426a0a555be9abf7255a89b131cd.diff LOG: [SveEmitter] IsInsertOp1SVALL and builtins for svqdec[bhwd] and svqinc[bhwd] Some ACLE builtins leave out the argument to specify the predicate pattern, which is expected to be expanded to an SV_ALL pattern. This patch adds the flag IsInsertOp1SVALL to insert SV_ALL as the second operand. Reviewers: efriedma, SjoerdMeijer Reviewed By: SjoerdMeijer Tags: #clang Differential Revision: https://reviews.llvm.org/D78401 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecw.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qinch.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincw.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qdecb.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qdecd.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qdecw.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qincb.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qincd.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qinch.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qincw.c Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdech.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_qdech.c clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 8d25dedfef7f..0a06ba3e5ecc 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -241,6 +241,7 @@ namespace clang { bool isPrefetch() const { return Flags & IsPrefetch; } bool isReverseCompare() const { return Flags & ReverseCompare; } bool isAppendSVALL() const { return Flags & IsAppendSVALL; } +bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index af6c971000f4..a5cacd2103a8 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -64,6 +64,7 @@ // d: default // c: const pointer type // P: predicate type +// s: scalar of element type // a: scalar of element type (splat to vector type) // e: 1/2 width unsigned elements, 2x element count // h: 1/2 width elements, 2x element count @@ -182,6 +183,7 @@ def IsOverloadCvt : FlagType<0x0080>; // Use {typeof(operand0), def OverloadKindMask : FlagType<0x00E0>; // When the masked values are all '0', the default type is used as overload type. def IsByteIndexed : FlagType<0x0100>; def IsAppendSVALL : FlagType<0x0200>; // Appends SV_ALL as the last operand. +def IsInsertOp1SVALL : FlagType<0x0400>; // Inserts SV_ALL as the second operand. def IsPrefetch: FlagType<0x0800>; // Contiguous prefetches. def ReverseCompare: FlagType<0x2000>; // Compare operands must be swapped. @@ -827,11 +829,6 @@ def SVCVTXNT_F32: SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch6 def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; - -// Saturating scalar arithmetic -def SVQDECH_S : SInst<"svqdech_pat[_{d}]", "ddIi", "s", MergeNone, "aarch64_sve_sqdech", [], [ImmCheck<2, ImmCheck1_16>]>; -def SVQDECH_U : SInst<"svqdech_pat[_{d}]", "ddIi", "Us", MergeNone, "aarch64_sve_uqdech", [], [ImmCheck<2, ImmCheck1_16>]>; - // Predicate creation @@ -853,6 +850,55 @@ def SVCNTB : SInst<"svcntb", "n", "", MergeNone, "aarch64_sve_cntb", [IsAppendSV def SVCNTH : SInst<"svcnth", "n", "", MergeNone, "aarch64_sve_cnth", [IsAppendSVALL, IsOverloadNone]>; def SVCNTW : SInst<"svcntw", "n", "",
[clang] e4872d7 - [SveEmitter] Add builtins for svlen
Author: Sander de Smalen Date: 2020-04-27T21:27:32+01:00 New Revision: e4872d7f08a1d6f657aaf3bd83b2e3b65fccebd4 URL: https://github.com/llvm/llvm-project/commit/e4872d7f08a1d6f657aaf3bd83b2e3b65fccebd4 DIFF: https://github.com/llvm/llvm-project/commit/e4872d7f08a1d6f657aaf3bd83b2e3b65fccebd4.diff LOG: [SveEmitter] Add builtins for svlen The svlen builtins return the number of elements in a vector and are implemented using `llvm.vscale`. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D78755 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a5cacd2103a8..e77ec24634e9 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -851,6 +851,8 @@ def SVCNTH : SInst<"svcnth", "n", "", MergeNone, "aarch64_sve_cnth", [IsAppendSV def SVCNTW : SInst<"svcntw", "n", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone]>; def SVCNTD : SInst<"svcntd", "n", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; +def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; + // Saturating scalar arithmetic diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e11791a7a7df..b0e5eebe7b88 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7956,6 +7956,25 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return nullptr; case SVE::BI__builtin_sve_svpfalse_b: return ConstantInt::getFalse(Ty); + + case SVE::BI__builtin_sve_svlen_f16: + case SVE::BI__builtin_sve_svlen_f32: + case SVE::BI__builtin_sve_svlen_f64: + case SVE::BI__builtin_sve_svlen_s8: + case SVE::BI__builtin_sve_svlen_s16: + case SVE::BI__builtin_sve_svlen_s32: + case SVE::BI__builtin_sve_svlen_s64: + case SVE::BI__builtin_sve_svlen_u8: + case SVE::BI__builtin_sve_svlen_u16: + case SVE::BI__builtin_sve_svlen_u32: + case SVE::BI__builtin_sve_svlen_u64: { +SVETypeFlags TF(Builtin->TypeModifier); +auto VTy = cast(getSVEType(TF)); +auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min); + +Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); +return Builder.CreateMul(NumEls, Builder.CreateCall(F)); + } } /// Should not happen diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c new file mode 100644 index ..fc8ebaa18868 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +uint64_t test_svlen_s8(svint8_t op) +{ + // CHECK-LABEL: test_svlen_s8 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 4 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s8,,)(op); +} + +uint64_t test_svlen_s16(svint16_t op) +{ + // CHECK-LABEL: test_svlen_s16 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 3 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s16,,)(op); +} + +uint64_t test_svlen_s32(svint32_t op) +{ + // CHECK-LABEL: test_svlen_s32 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 2 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s32,,)(op); +} + +uint64_t test_svlen_s64(svint64_t op) +{ + // CHECK-LABEL: test_svlen_s64 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 1 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_s64,,)(op); +} + +uint64_t test_svlen_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svlen_u8 + // CHECK: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() + // CHECK: %[[SHL:.*]] = shl i64 %[[VSCALE]], 4 + // CHECK: ret i64 %[[SHL]] + return SVE_ACLE_FUNC(svlen,_u8,,)(op); +} + +uint64_t test_svlen_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svlen_u16 + // CHECK: %[[VSCALE:.*]] = call i64 @
[clang] c577201 - [SveEmitter] Add builtins for bitcount operations
Author: Sander de Smalen Date: 2020-04-28T13:53:54+01:00 New Revision: c57720125fa7596be2403e7810957655d04dfece URL: https://github.com/llvm/llvm-project/commit/c57720125fa7596be2403e7810957655d04dfece DIFF: https://github.com/llvm/llvm-project/commit/c57720125fa7596be2403e7810957655d04dfece.diff LOG: [SveEmitter] Add builtins for bitcount operations This patch adds builtins for svcls, svclz and svcnt. For merging (_m), zeroing (_z) and don't-care (_x) predication. Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index eaee860bd3c0..03aef14cc2d7 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -702,6 +702,19 @@ def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNon def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; + +// Counting bit + +multiclass SInstCLS flags=[]> { + def _M : SInst; + def _X : SInst; + def _Z : SInst; +} + +defm SVCLS : SInstCLS<"svcls", "csil","aarch64_sve_cls">; +defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl","aarch64_sve_clz">; +defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; + // Floating-point arithmetic diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c new file mode 100644 index ..e464ac5a2ff6 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c @@ -0,0 +1,116 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint8_t test_svcls_s8_z(svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcls_s8_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv16i8( zeroinitializer, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s8,_z,)(pg, op); +} + +svuint16_t test_svcls_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcls_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s16,_z,)(pg, op); +} + +svuint32_t test_svcls_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svcls_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s32,_z,)(pg, op); +} + +svuint64_t test_svcls_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svcls_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s64,_z,)(pg, op); +} + +svuint8_t test_svcls_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) +{ + // CHECK-LABEL: test_svcls_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv16i8( %inactive, %pg, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s8,_m,)(inactive, pg, op); +} + +svuint16_t test_svcls_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svcls_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cls.nxv8i16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svcls,_s16,_m,)(inactive, pg, op); +} + +svuint32_t test_svcls_s32_m(svuint32_t inactive, svbool_t pg, svint32_
[clang] 476ba81 - [SveEmitter] Add builtins for zero/sign extension and bit/byte reversal.
Author: Sander de Smalen Date: 2020-04-28T14:06:51+01:00 New Revision: 476ba8127bfa4553bf5ce1654cd844803e8d6dea URL: https://github.com/llvm/llvm-project/commit/476ba8127bfa4553bf5ce1654cd844803e8d6dea DIFF: https://github.com/llvm/llvm-project/commit/476ba8127bfa4553bf5ce1654cd844803e8d6dea.diff LOG: [SveEmitter] Add builtins for zero/sign extension and bit/byte reversal. This patch adds builtins for predicated unary builtins svext[bhw] and svrev[bhw] and svrbit. Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 03aef14cc2d7..e2c01023e55d 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -715,6 +715,24 @@ defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls">; defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl","aarch64_sve_clz">; defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; + +// Conversion + +defm SVEXTB_S : SInstZPZ<"svextb", "sil","aarch64_sve_sxtb">; +defm SVEXTB_U : SInstZPZ<"svextb", "UsUiUl", "aarch64_sve_uxtb">; +defm SVEXTH_S : SInstZPZ<"svexth", "il", "aarch64_sve_sxth">; +defm SVEXTH_U : SInstZPZ<"svexth", "UiUl", "aarch64_sve_uxth">; +defm SVEXTW_S : SInstZPZ<"svextw", "l", "aarch64_sve_sxtw">; +defm SVEXTW_U : SInstZPZ<"svextw", "Ul", "aarch64_sve_uxtw">; + + +// Reversal + +defm SVRBIT : SInstZPZ<"svrbit", "csilUcUsUiUl", "aarch64_sve_rbit">; +defm SVREVB : SInstZPZ<"svrevb", "silUsUiUl","aarch64_sve_revb">; +defm SVREVH : SInstZPZ<"svrevh", "ilUiUl", "aarch64_sve_revh">; +defm SVREVW : SInstZPZ<"svrevw", "lUl", "aarch64_sve_revw">; + // Floating-point arithmetic diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c new file mode 100644 index ..720b81a28a18 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c @@ -0,0 +1,173 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svextb_s16_z(svbool_t pg, svint16_t op) +{ + // CHECK-LABEL: test_svextb_s16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s16,_z,)(pg, op); +} + +svint32_t test_svextb_s32_z(svbool_t pg, svint32_t op) +{ + // CHECK-LABEL: test_svextb_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv4i32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s32,_z,)(pg, op); +} + +svint64_t test_svextb_s64_z(svbool_t pg, svint64_t op) +{ + // CHECK-LABEL: test_svextb_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sxtb.nxv2i64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_s64,_z,)(pg, op); +} + +svuint16_t test_svextb_u16_z(svbool_t pg, svuint16_t op) +{ + // CHECK-LABEL: test_svextb_u16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.uxtb.nxv8i16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svextb,_u16,_z,)(pg, op); +} + +svuint32_t test_svextb_u32_z(svbool_t pg, svuint32_t op) +{
[clang] 42a56bf - [SveEmitter] Add builtins for gather prefetches
Author: Sander de Smalen Date: 2020-04-29T11:52:49+01:00 New Revision: 42a56bf63f699a620a57c34474510d9937ebf715 URL: https://github.com/llvm/llvm-project/commit/42a56bf63f699a620a57c34474510d9937ebf715 DIFF: https://github.com/llvm/llvm-project/commit/42a56bf63f699a620a57c34474510d9937ebf715.diff LOG: [SveEmitter] Add builtins for gather prefetches Patch by Andrzej Warzynski Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78677 Added: Modified: clang/include/clang/Basic/TargetBuiltins.h clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 0a06ba3e5ecc..bf07a8950f28 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -242,6 +242,7 @@ namespace clang { bool isReverseCompare() const { return Flags & ReverseCompare; } bool isAppendSVALL() const { return Flags & IsAppendSVALL; } bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; } +bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 20a055bc6d38..a028487a96b9 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -98,6 +98,8 @@ // G: pointer to uint32_t // H: pointer to uint64_t +// Q: const pointer to void + // S: const pointer to int8_t // T: const pointer to int16_t // U: const pointer to int32_t @@ -185,6 +187,7 @@ def IsByteIndexed : FlagType<0x0100>; def IsAppendSVALL : FlagType<0x0200>; // Appends SV_ALL as the last operand. def IsInsertOp1SVALL : FlagType<0x0400>; // Inserts SV_ALL as the second operand. def IsPrefetch: FlagType<0x0800>; // Contiguous prefetches. +def IsGatherPrefetch : FlagType<0x1000>; def ReverseCompare: FlagType<0x2000>; // Compare operands must be swapped. // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h @@ -569,6 +572,39 @@ def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPclJ", "s", [IsPrefetch], MemEltTyInt16 def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +// Prefetch (Vector bases) +def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; +def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">; +def SVPRFW_GATHER_BASES : MInst<"svprfw_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; +def SVPRFD_GATHER_BASES : MInst<"svprfd_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; + +// Prefetch (Scalar base, Vector offsets) +def SVPRFB_GATHER_32B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_sxtw_index">; +def SVPRFH_GATHER_32B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_sxtw_index">; +def SVPRFW_GATHER_32B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_sxtw_index">; +def SVPRFD_GATHER_32B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "i", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_sxtw_index">; + +def SVPRFB_GATHER_64B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_index">; +def SVPRFH_GATHER_64B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">; +def SVPRFW_GATHER_64B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">; +def SVPRFD_GATHER_64B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index", "vPQdJ", "l", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">; + +d
[clang] a4dac6d - [SveEmitter] Add builtins for svmov_b and svnot_b.
Author: Sander de Smalen Date: 2020-04-29T13:33:18+01:00 New Revision: a4dac6d4e0eae47eaa51bac72c048b1e3cd89c8b URL: https://github.com/llvm/llvm-project/commit/a4dac6d4e0eae47eaa51bac72c048b1e3cd89c8b DIFF: https://github.com/llvm/llvm-project/commit/a4dac6d4e0eae47eaa51bac72c048b1e3cd89c8b.diff LOG: [SveEmitter] Add builtins for svmov_b and svnot_b. These are custom expanded in CGBuiltin: svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) Reviewers: SjoerdMeijer, efriedma, ctetreau, rengolin Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D79039 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a028487a96b9..5eaeadeb06d3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -984,8 +984,10 @@ def SVPTRUE : SInst<"svptrue_{d}", "P", "PcPsPiPl", MergeNone, "aarch64 def SVAND_B_Z : SInst<"svand[_b]_z", "", "Pc", MergeNone, "aarch64_sve_and_z">; def SVBIC_B_Z : SInst<"svbic[_b]_z", "", "Pc", MergeNone, "aarch64_sve_bic_z">; def SVEOR_B_Z : SInst<"sveor[_b]_z", "", "Pc", MergeNone, "aarch64_sve_eor_z">; +def SVMOV_B_Z : SInst<"svmov[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion def SVNAND_B_Z : SInst<"svnand[_b]_z", "", "Pc", MergeNone, "aarch64_sve_nand_z">; def SVNOR_B_Z : SInst<"svnor[_b]_z", "", "Pc", MergeNone, "aarch64_sve_nor_z">; +def SVNOT_B_Z : SInst<"svnot[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion def SVORN_B_Z : SInst<"svorn[_b]_z", "", "Pc", MergeNone, "aarch64_sve_orn_z">; def SVORR_B_Z : SInst<"svorr[_b]_z", "", "Pc", MergeNone, "aarch64_sve_orr_z">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e0b7e8a2dd78..2fa9ed9b2443 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7990,6 +7990,23 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: return nullptr; + + case SVE::BI__builtin_sve_svmov_b_z: { +// svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) +SVETypeFlags TypeFlags(Builtin->TypeModifier); +llvm::Type* OverloadedTy = getSVEType(TypeFlags); +Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy); +return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]}); + } + + case SVE::BI__builtin_sve_svnot_b_z: { +// svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) +SVETypeFlags TypeFlags(Builtin->TypeModifier); +llvm::Type* OverloadedTy = getSVEType(TypeFlags); +Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy); +return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); + } + case SVE::BI__builtin_sve_svpfalse_b: return ConstantInt::getFalse(Ty); diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c new file mode 100644 index ..609c3d22b472 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svmov_b_z(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svmov_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %op, %op) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svmov,_b,_z,)(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c index 8ce0dd47c6cf..45703d895d3c 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c @@ -219,3 +219,11 @@ svuint64_t test_svnot_u64_x(svbool_t pg, svuint64_t op) // CHECK: ret %[[INTRINSIC]] return SVE_ACLE_FUNC(svnot,_u64,_x,)(pg, op); } + +svbool_t test_svnot_b_z(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svnot_b_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.eor.z.nxv16
[clang] 334931f - [SveEmitter] Add builtins for shifts.
Author: Sander de Smalen Date: 2020-05-01T22:27:24+01:00 New Revision: 334931f54b959c3ef03386770877789d4167d24f URL: https://github.com/llvm/llvm-project/commit/334931f54b959c3ef03386770877789d4167d24f DIFF: https://github.com/llvm/llvm-project/commit/334931f54b959c3ef03386770877789d4167d24f.diff LOG: [SveEmitter] Add builtins for shifts. This patch adds builtins for: - svasrd - svlsl - svlsr Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c Modified: clang/include/clang/Basic/arm_sve.td clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index c794441dbc34..013357c3de9b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -73,8 +73,11 @@ // // w: vector of element type promoted to 64bits, vector maintains //signedness of its element type. +// f: element type promoted to uint64_t (splat to vector type) // j: element type promoted to 64bits (splat to vector type) // K: element type bitcast to a signed integer (splat to vector type) +// L: element type bitcast to an unsigned integer (splat to vector type) +// // i: constant uint64_t // k: int32_t // l: int64_t @@ -677,6 +680,29 @@ defm SVNOT : SInstZPZ<"svnot", "csilUcUsUiUl", "aarch64_sve_not">; // Shifts + +multiclass SInst_SHIFT { + def _M : SInst; + def _X : SInst; + def _Z : SInst; + + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; + + def _WIDE_M : SInst; + def _WIDE_X : SInst; + def _WIDE_Z : SInst; + + def _WIDE_N_M : SInst; + def _WIDE_N_X : SInst; + def _WIDE_N_Z : SInst; +} + +defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">; +defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">; +defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; + def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c new file mode 100644 index ..2fb80acc2822 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c @@ -0,0 +1,412 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svasr_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svasr_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s8,_z,)(pg, op1, op2); +} + +svint16_t test_svasr_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svasr_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svasr_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svasr_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asr.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasr,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svasr_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svasr_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1
[clang] 0863e94 - [AArch64] Add NVIDIA Carmel support
Author: Raul Tambre Date: 2020-05-04T13:52:30+01:00 New Revision: 0863e94ebd87f4dea7a457c8441979ec4151fedb URL: https://github.com/llvm/llvm-project/commit/0863e94ebd87f4dea7a457c8441979ec4151fedb DIFF: https://github.com/llvm/llvm-project/commit/0863e94ebd87f4dea7a457c8441979ec4151fedb.diff LOG: [AArch64] Add NVIDIA Carmel support Summary: NVIDIA's Carmel ARM64 cores are used in Tegra194 chips found in Jetson AGX Xavier, DRIVE AGX Xavier and DRIVE AGX Pegasus. References: * https://devblogs.nvidia.com/nvidia-jetson-agx-xavier-32-teraops-ai-robotics/#h.huq9xtg75a5e * NVIDIA Xavier Series System-on-Chip Technical Reference Manual 1.3 (https://developer.nvidia.com/embedded/downloads#?search=Xavier%20Series%20SoC%20Technical%20Reference%20Manual) Reviewers: sdesmalen, paquette Reviewed By: sdesmalen Subscribers: llvm-commits, ianshmean, kristof.beyls, hiraditya, jfb, danielkiss, cfe-commits, t.p.northover Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D77940 Added: Modified: clang/test/Driver/aarch64-cpus.c clang/test/Preprocessor/aarch64-target-features.c llvm/include/llvm/Support/AArch64TargetParser.def llvm/lib/Support/Host.cpp llvm/lib/Target/AArch64/AArch64.td llvm/lib/Target/AArch64/AArch64Subtarget.cpp llvm/lib/Target/AArch64/AArch64Subtarget.h llvm/test/CodeGen/AArch64/cpus.ll llvm/unittests/Support/Host.cpp llvm/unittests/Support/TargetParserTest.cpp Removed: diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index cf12a5155689..d77ab3782838 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -283,6 +283,20 @@ // ARM64-A64FX: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "a64fx" // ARM64-A64FX-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target aarch64 -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL %s +// RUN: %clang -target aarch64 -mlittle-endian -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL %s +// RUN: %clang -target aarch64 -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL-TUNE %s +// RUN: %clang -target aarch64 -mlittle-endian -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CARMEL-TUNE %s +// CARMEL: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "carmel" +// CARMEL-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target arm64 -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL %s +// RUN: %clang -target arm64 -mlittle-endian -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL %s +// RUN: %clang -target arm64 -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL-TUNE %s +// RUN: %clang -target arm64 -mlittle-endian -mtune=carmel -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CARMEL-TUNE %s +// ARM64-CARMEL: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "carmel" +// ARM64-CARMEL-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" + // RUN: %clang -target aarch64_be -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s // RUN: %clang -target aarch64 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s // RUN: %clang -target aarch64_be -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 9cb12f8afb32..8ce6b8a8a45d 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -163,6 +163,7 @@ // RUN: %clang -target aarch64 -mcpu=kryo -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-KRYO %s // RUN: %clang -target aarch64 -mcpu=thunderx2t99 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-THUNDERX2T99 %s // RUN: %clang -target aarch64 -mcpu=a64fx -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A64FX %s +// RUN: %clang -target aarch64 -mcpu=carmel -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CARMEL %s // CHECK-MCPU-APPLE-A7: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+sha2" "-target-feature" "+aes" // CHECK-MCPU-APPLE-A10: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+rdm" "-target-feature" "+zcm" "-target-feature" "+zcz" "-target-feature" "+sha2" "-target-feature" "+aes" // CHECK-MCPU-APPLE-A11: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8.2a" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+ras" "-target-feature" "+lse" "
[clang] 54fa46a - [SveEmitter] Add builtins for Int & FP reductions
Author: Sander de Smalen Date: 2020-05-04T19:50:16+01:00 New Revision: 54fa46aa0a82bd281d0ba31fad69a227de4a622c URL: https://github.com/llvm/llvm-project/commit/54fa46aa0a82bd281d0ba31fad69a227de4a622c DIFF: https://github.com/llvm/llvm-project/commit/54fa46aa0a82bd281d0ba31fad69a227de4a622c.diff LOG: [SveEmitter] Add builtins for Int & FP reductions This patch adds integer builtins for: - svaddv, svandv, sveorv, svmaxv, svminv, svorv. And FP builtins for: - svadda, svaddv, svmaxv, svmaxnmv, svminv, svminnmv Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 013357c3de9b..bde26aed43f6 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -705,6 +705,19 @@ defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; + +// Integer reductions + +def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv">; +def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv">; +def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv">; +def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv">; +def SVMAXV_S : SInst<"svmaxv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_smaxv">; +def SVMAXV_U : SInst<"svmaxv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxv">; +def SVMINV_S : SInst<"svminv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_sminv">; +def SVMINV_U : SInst<"svminv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_uminv">; +def SVORV: SInst<"svorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orv">; + // Integer comparisons @@ -876,6 +889,15 @@ def SVRECPS : SInst<"svrecps[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_fre def SVRSQRTE : SInst<"svrsqrte[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frsqrte_x">; def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frsqrts_x">; + +// Floating-point reductions + +def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">; +def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv">; +def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv">; +def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv">; +def SVFMINV : SInst<"svminv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminv">; +def SVFMINNMV : SInst<"svminnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminnmv">; // Floating-point comparisons diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c new file mode 100644 index ..6ac6e5d0d618 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +float16_t test_svadda_f16(svbool_t pg, float16_t initial, svfloat16_t op) +{ + // CHECK-LABEL: test_svadda_f16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call half
[clang] 6b90a68 - [SveEmitter] Add builtins for svdupq and svdupq_lane
Author: Sander de Smalen Date: 2020-05-04T20:38:47+01:00 New Revision: 6b90a6887d25e3375bb916a3ed09f7ccec819d0c URL: https://github.com/llvm/llvm-project/commit/6b90a6887d25e3375bb916a3ed09f7ccec819d0c DIFF: https://github.com/llvm/llvm-project/commit/6b90a6887d25e3375bb916a3ed09f7ccec819d0c.diff LOG: [SveEmitter] Add builtins for svdupq and svdupq_lane * svdupq builtins that duplicate scalars to every quadword of a vector are defined using builtins for svld1rq (load and replicate quadword). * svdupq builtins that duplicate boolean values to fill a predicate vector are defined using `svcmpne`. Reviewers: SjoerdMeijer, efriedma, ctetreau Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78750 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index bde26aed43f6..2d2a09d4524d 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -610,6 +610,13 @@ def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ" def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; +// Scalar to vector + +def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "d", "cUc", MergeNone>; +def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "d", "sUsh", MergeNone>; +def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "d", "iUif", MergeNone>; +def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; + // Integer arithmetic multiclass SInstZPZ flags=[]> { @@ -1034,7 +1041,7 @@ def SVCLASTB : SInst<"svclastb[_{d}]","dPdd", "csilUcUsUiUlhfd", MergeNo def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">; def SVCOMPACT: SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; // SVDUP_LANE(to land in D78750) -// SVDUPQ_LANE (to land in D78750) +def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; def SVEXT: SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; @@ -1072,6 +1079,12 @@ def SVPFALSE : SInst<"svpfalse[_b]", "P", "", MergeNone, "", [IsOverloadNone]>; def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; def SVPTRUE : SInst<"svptrue_{d}", "P", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; +def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "P", "Pc", MergeNone>; +def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "P", "Ps", MergeNone>; +def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "P", "Pi", MergeNone>; +def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone>; + + // Predicate operations diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 94c0adfdf4af..797fcc6deea3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7562,6 +7562,15 @@ CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyFloat64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + + case SVETypeFlags::EltTyBool8: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SVETypeFlags::EltTyBool16: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyBool32: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyBool64: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } @@ -7599,6 +7608,12 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } +llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { + Function *Ptrue = + CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); + return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); +} + constexpr unsigned SVEBitsPerBlock = 128; static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *
[clang] 90f3f62 - Revert "[SveEmitter] Add builtins for svdupq and svdupq_lane"
Author: Sander de Smalen Date: 2020-05-04T21:31:55+01:00 New Revision: 90f3f62cb087782fe2608e95d686c29067281b6e URL: https://github.com/llvm/llvm-project/commit/90f3f62cb087782fe2608e95d686c29067281b6e DIFF: https://github.com/llvm/llvm-project/commit/90f3f62cb087782fe2608e95d686c29067281b6e.diff LOG: Revert "[SveEmitter] Add builtins for svdupq and svdupq_lane" It seems this patch broke some buildbots, so reverting until I have had a chance to investigate. This reverts commit 6b90a6887d25e3375bb916a3ed09f7ccec819d0c. Added: Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/utils/TableGen/SveEmitter.cpp Removed: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 2d2a09d4524d..bde26aed43f6 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -610,13 +610,6 @@ def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ" def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; -// Scalar to vector - -def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "d", "cUc", MergeNone>; -def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "d", "sUsh", MergeNone>; -def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "d", "iUif", MergeNone>; -def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; - // Integer arithmetic multiclass SInstZPZ flags=[]> { @@ -1041,7 +1034,7 @@ def SVCLASTB : SInst<"svclastb[_{d}]","dPdd", "csilUcUsUiUlhfd", MergeNo def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">; def SVCOMPACT: SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; // SVDUP_LANE(to land in D78750) -def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; +// SVDUPQ_LANE (to land in D78750) def SVEXT: SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; @@ -1079,12 +1072,6 @@ def SVPFALSE : SInst<"svpfalse[_b]", "P", "", MergeNone, "", [IsOverloadNone]>; def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; def SVPTRUE : SInst<"svptrue_{d}", "P", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; -def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "P", "Pc", MergeNone>; -def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "P", "Ps", MergeNone>; -def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "P", "Pi", MergeNone>; -def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone>; - - // Predicate operations diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 797fcc6deea3..94c0adfdf4af 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7562,15 +7562,6 @@ CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyFloat64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); - - case SVETypeFlags::EltTyBool8: -return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); - case SVETypeFlags::EltTyBool16: -return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); - case SVETypeFlags::EltTyBool32: -return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); - case SVETypeFlags::EltTyBool64: -return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } @@ -7608,12 +7599,6 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } -llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { - Function *Ptrue = - CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); - return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); -} - constexpr unsigned SVEBitsPerBlock = 128; static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { @@ -8022,64 +8007,6 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); } - case SVE::BI__builtin_sve_svdupq_n_b8: - c
[clang] aed6bd6 - Reland D78750: [SveEmitter] Add builtins for svdupq and svdupq_lane
Author: Sander de Smalen Date: 2020-05-05T10:42:11+01:00 New Revision: aed6bd6f4293c07ca4c55bf49943943f4425794b URL: https://github.com/llvm/llvm-project/commit/aed6bd6f4293c07ca4c55bf49943943f4425794b DIFF: https://github.com/llvm/llvm-project/commit/aed6bd6f4293c07ca4c55bf49943943f4425794b.diff LOG: Reland D78750: [SveEmitter] Add builtins for svdupq and svdupq_lane Edit: Changed a few CHECK lines into CHECK-DAG lines. This reverts commit 90f3f62cb087782fe2608e95d686c29067281b6e. Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index bde26aed43f6..2d2a09d4524d 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -610,6 +610,13 @@ def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ" def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; +// Scalar to vector + +def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "d", "cUc", MergeNone>; +def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "d", "sUsh", MergeNone>; +def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "d", "iUif", MergeNone>; +def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; + // Integer arithmetic multiclass SInstZPZ flags=[]> { @@ -1034,7 +1041,7 @@ def SVCLASTB : SInst<"svclastb[_{d}]","dPdd", "csilUcUsUiUlhfd", MergeNo def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">; def SVCOMPACT: SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; // SVDUP_LANE(to land in D78750) -// SVDUPQ_LANE (to land in D78750) +def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; def SVEXT: SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; @@ -1072,6 +1079,12 @@ def SVPFALSE : SInst<"svpfalse[_b]", "P", "", MergeNone, "", [IsOverloadNone]>; def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; def SVPTRUE : SInst<"svptrue_{d}", "P", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; +def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "P", "Pc", MergeNone>; +def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "P", "Ps", MergeNone>; +def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "P", "Pi", MergeNone>; +def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone>; + + // Predicate operations diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 94c0adfdf4af..797fcc6deea3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7562,6 +7562,15 @@ CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyFloat64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + + case SVETypeFlags::EltTyBool8: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SVETypeFlags::EltTyBool16: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyBool32: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyBool64: +return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } @@ -7599,6 +7608,12 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } +llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { + Function *Ptrue = + CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); + return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); +} + constexpr unsigned SVEBitsPerBlock = 128; static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { @@ -8007,6 +8022,64 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); } + case SVE::BI__builtin_sve_svdupq_n_b8: + case SVE::BI__builtin_sve_svdupq_n_b16
[clang] 5ba3290 - [SveEmitter] Add builtins for svreinterpret
Author: Sander de Smalen Date: 2020-05-05T13:04:44+01:00 New Revision: 5ba329059f9c28dcbba912685d00a7982833abcf URL: https://github.com/llvm/llvm-project/commit/5ba329059f9c28dcbba912685d00a7982833abcf DIFF: https://github.com/llvm/llvm-project/commit/5ba329059f9c28dcbba912685d00a7982833abcf.diff LOG: [SveEmitter] Add builtins for svreinterpret The reinterpret builtins are generated separately because they need the cross product of all types, 121 functions in total, which is inconvenient to specify in the arm_sve.td file. Reviewers: SjoerdMeijer, efriedma, ctetreau, rengolin Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78756 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/big_endian.c Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 797fcc6deea3..f5530aac6085 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7917,6 +7917,19 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); + llvm::Type *Ty = ConvertType(E->getType()); + if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { +Value *Val = EmitScalarExpr(E->getArg(0)); +// FIXME: For big endian this needs an additional REV, or needs a separate +// intrinsic that is code-generated as a no-op, because the LLVM bitcast +// instruction is defined as 'bitwise' equivalent from memory point of +// view (when storing/reloading), whereas the svreinterpret builtin +// implements bitwise equivalent cast from register point of view. +// LLVM CodeGen for a bitcast must add an explicit REV for big-endian. +return Builder.CreateBitCast(Val, Ty); + } + llvm::SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { if ((ICEArguments & (1 << i)) == 0) @@ -7939,7 +7952,6 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, AArch64SVEIntrinsicsProvenSorted); SVETypeFlags TypeFlags(Builtin->TypeModifier); - llvm::Type *Ty = ConvertType(E->getType()); if (TypeFlags.isLoad()) return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, TypeFlags.isZExtReturn()); diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c new file mode 100644 index ..e33a7e0a8504 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c @@ -0,0 +1,960 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svreinterpret_s8_s8(svint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s8 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s8,_s8,,)(op); +} + +svint8_t test_svreinterpret_s8_s16(svint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_s16,,)(op); +} + +svint8_t test_svreinterpret_s8_s32(svint32_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_s32,,)(op); +} + +svint8_t test_svreinterpret_s8_s64(svint64_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8,_s64,,)(op); +} + +svint8_t test_svreinterpret_s8_u8(svuint8_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_u8 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_s8,_u8,,)(op); +} + +svint8_t test_svreinterpret_s8_u16(svuint16_t op) +{ + // CHECK-LABEL: test_svreinterpret_s8_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_
[clang] 3cb8b4c - [SveEmitter] Add builtins for SVE2 Polynomial arithmetic
Author: Sander de Smalen Date: 2020-05-07T11:53:04+01:00 New Revision: 3cb8b4c193c1904543511dfe892475c4e733a778 URL: https://github.com/llvm/llvm-project/commit/3cb8b4c193c1904543511dfe892475c4e733a778 DIFF: https://github.com/llvm/llvm-project/commit/3cb8b4c193c1904543511dfe892475c4e733a778.diff LOG: [SveEmitter] Add builtins for SVE2 Polynomial arithmetic This patch adds builtins for: - sveorbt - sveortb - svpmul - svpmullb, svpmullb_pair - svpmullt, svpmullt_pair The svpmullb and svpmullt builtins are expressed using the svpmullb_pair and svpmullt_pair LLVM IR intrinsics, respectively. Reviewers: SjoerdMeijer, efriedma, rengolin Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D79480 Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eorbt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eortb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmul.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullt.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 2d2a09d4524d..32273c3250ae 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -66,6 +66,7 @@ // P: predicate type // s: scalar of element type // a: scalar of element type (splat to vector type) +// R: scalar of 1/2 width element type (splat to vector type) // e: 1/2 width unsigned elements, 2x element count // h: 1/2 width elements, 2x element count // q: 1/4 width elements, 4x element count @@ -1319,6 +1320,26 @@ def SVSTNT1H_SCATTER_INDEX_S : MInst<"svstnt1h_scatter[_{2}base]_index[_{d}]", " def SVSTNT1W_SCATTER_INDEX_S : MInst<"svstnt1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_stnt1_scatter_scalar_offset">; } + +// SVE2 - Polynomial arithmetic + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVEORBT : SInst<"sveorbt[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">; +def SVEORBT_N : SInst<"sveorbt[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">; +def SVEORTB : SInst<"sveortb[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">; +def SVEORTB_N : SInst<"sveortb[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">; +def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul">; +def SVPMUL_N: SInst<"svpmul[_n_{d}]","dda", "Uc", MergeNone, "aarch64_sve_pmul">; +def SVPMULLB: SInst<"svpmullb[_{d}]","dhh", "UsUl", MergeNone>; +def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone>; +def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair">; +def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair">; +def SVPMULLT: SInst<"svpmullt[_{d}]","dhh", "UsUl", MergeNone>; +def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone>; +def SVPMULLT_PAIR : SInst<"svpmullt_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; +def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; +} + // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 112a0ee7752f..dbe8826454dc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7803,6 +7803,27 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, return Builder.CreateCall(F, Ops); } +// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and +// svpmullt_pair intrinsics, with the exception that their results are bitcast +// to a wider type. +Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned BuiltinID) { + // Splat scalar operand to vector (intrinsics with _n infix) + if (TypeFlags.hasSplatOperand()) { +unsigned OpNo = TypeFlags.getSplatOperand(); +Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); + } + + // The pair-wise function has a narrower overloaded type. + Function *F = CGM
[clang] 91cb13f - [SveEmitter] Add builtins for svqadd, svqsub and svdot
Author: Sander de Smalen Date: 2020-05-07T12:28:18+01:00 New Revision: 91cb13f90d0f066a02c2a79db194624f02896fa6 URL: https://github.com/llvm/llvm-project/commit/91cb13f90d0f066a02c2a79db194624f02896fa6 DIFF: https://github.com/llvm/llvm-project/commit/91cb13f90d0f066a02c2a79db194624f02896fa6.diff LOG: [SveEmitter] Add builtins for svqadd, svqsub and svdot This patch adds builtins for saturating add/sub instructions: - svqadd, svqadd_n - svqsub, svqsub_n and builtins for dot product instructions: - svdot, svdot_lane Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qadd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qsub.c Modified: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c clang/utils/TableGen/SveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 32273c3250ae..4c73c98bcb52 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -67,6 +67,7 @@ // s: scalar of element type // a: scalar of element type (splat to vector type) // R: scalar of 1/2 width element type (splat to vector type) +// r: scalar of 1/4 width element type (splat to vector type) // e: 1/2 width unsigned elements, 2x element count // h: 1/2 width elements, 2x element count // q: 1/4 width elements, 4x element count @@ -675,6 +676,25 @@ defm SVMLA : SInstZPZZZ<"svmla", "csilUcUsUiUl", "aarch64_sve_mla">; defm SVMLS : SInstZPZZZ<"svmls", "csilUcUsUiUl", "aarch64_sve_mls">; defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb">; +//-- + +def SVDOT_S: SInst<"svdot[_{0}]","ddqq", "il", MergeNone, "aarch64_sve_sdot">; +def SVDOT_U: SInst<"svdot[_{0}]","ddqq", "UiUl", MergeNone, "aarch64_sve_udot">; +def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x">; +def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">; +def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x">; +def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">; + +def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot">; +def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot">; +def SVQADD_N_S : SInst<"svqadd[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqadd_x">; +def SVQADD_N_U : SInst<"svqadd[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">; +def SVQSUB_N_S : SInst<"svqsub[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqsub_x">; +def SVQSUB_N_U : SInst<"svqsub[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">; + +def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; + // Logical operations @@ -1180,11 +1200,6 @@ defm SVQINCW_U : SInst_SAT2<"svqincw", "aarch64_sve_uqincw", UnsignedWord>; defm SVQINCD_S : SInst_SAT2<"svqincd", "aarch64_sve_sqincd", SignedDoubleWord>; defm SVQINCD_U : SInst_SAT2<"svqincd", "aarch64_sve_uqincd", UnsignedDoubleWord>; - -// Integer arithmetic -def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; -def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; - // SVE2 WhileGE/GT let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c index 94e67604fae2..fc12bb672490 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c @@ -10,6 +10,74 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +svint32_t test_svdot_s32(svint32_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svdot_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sdot.nxv4i32( %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdot,_s32,,)(op1, op2, op3); +} + +svint64_t test_svdot_s64(svint64_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svdot_s64 + // CHECK
[clang] ac894a5 - [SveEmitter] Add builtins for FFR manipulation
Author: Sander de Smalen Date: 2020-05-07T12:28:18+01:00 New Revision: ac894a5181ef622139a2c09b66a006e8fd450849 URL: https://github.com/llvm/llvm-project/commit/ac894a5181ef622139a2c09b66a006e8fd450849 DIFF: https://github.com/llvm/llvm-project/commit/ac894a5181ef622139a2c09b66a006e8fd450849.diff LOG: [SveEmitter] Add builtins for FFR manipulation This patch adds builtins for: - svrdffr, svrdffr_z - svsetffr - svwrffr Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_setffr.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_wrffr.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4c73c98bcb52..3ae2e9e60a61 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1137,6 +1137,14 @@ def SVPTEST_ANY : SInst<"svptest_any", "sPP", "Pc", MergeNone, "aarch64_sve_ def SVPTEST_FIRST : SInst<"svptest_first", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_first">; def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_last">; + +// FFR manipulation + +def SVRDFFR : SInst<"svrdffr", "P", "Pc", MergeNone, "", [IsOverloadNone]>; +def SVRDFFR_Z : SInst<"svrdffr_z", "PP", "Pc", MergeNone, "", [IsOverloadNone]>; +def SVSETFFR : SInst<"svsetffr", "v", "", MergeNone, "", [IsOverloadNone]>; +def SVWRFFR : SInst<"svwrffr", "vP", "Pc", MergeNone, "", [IsOverloadNone]>; + // Counting elements diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c new file mode 100644 index ..44d2c166af10 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +svbool_t test_svrdffr() +{ + // CHECK-LABEL: test_svrdffr + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rdffr() + // CHECK: ret %[[INTRINSIC]] + return svrdffr(); +} + +svbool_t test_svrdffr_z(svbool_t pg) +{ + // CHECK-LABEL: test_svrdffr_z + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.rdffr.z( %pg) + // CHECK: ret %[[INTRINSIC]] + return svrdffr_z(pg); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_setffr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_setffr.c new file mode 100644 index ..df9a3c647aa0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_setffr.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +void test_svsetffr() +{ + // CHECK-LABEL: test_svsetffr + // CHECK: call void @llvm.aarch64.sve.setffr() + // CHECK: ret void + svsetffr(); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_wrffr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_wrffr.c new file mode 100644 index ..84b3004e307c --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_wrffr.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +void test_svwrffr(svbool_t op) +{ + // CHECK-LABEL: test_svwrffr + // CHECK: call void @llvm.aarch64.sve.wrffr( %op) + // CHECK: ret void + svwrffr(op); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 827c8b0 - [SveEmitter] Add builtins for svcntp
Author: Sander de Smalen Date: 2020-05-07T12:28:18+01:00 New Revision: 827c8b06d33de9d19db8f0c79fb022b4ae815a08 URL: https://github.com/llvm/llvm-project/commit/827c8b06d33de9d19db8f0c79fb022b4ae815a08 DIFF: https://github.com/llvm/llvm-project/commit/827c8b06d33de9d19db8f0c79fb022b4ae815a08.diff LOG: [SveEmitter] Add builtins for svcntp Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3ae2e9e60a61..fc3d6f300b65 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1158,6 +1158,7 @@ def SVCNTH : SInst<"svcnth", "n", "", MergeNone, "aarch64_sve_cnth", [IsAppendSV def SVCNTW : SInst<"svcntw", "n", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone]>; def SVCNTD : SInst<"svcntd", "n", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; +def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl",MergeNone, "aarch64_sve_cntp">; def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c new file mode 100644 index ..f0a5f6456033 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +uint64_t test_svcntp_b8(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svcntp_b8 + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %op) + // CHECK: ret i64 %[[INTRINSIC]] + return svcntp_b8(pg, op); +} + +uint64_t test_svcntp_b16(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svcntp_b16 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntp.nxv8i1( %[[PG]], %[[OP]]) + // CHECK: ret i64 %[[INTRINSIC]] + return svcntp_b16(pg, op); +} + +uint64_t test_svcntp_b32(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svcntp_b32 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntp.nxv4i1( %[[PG]], %[[OP]]) + // CHECK: ret i64 %[[INTRINSIC]] + return svcntp_b32(pg, op); +} + +uint64_t test_svcntp_b64(svbool_t pg, svbool_t op) +{ + // CHECK-LABEL: test_svcntp_b64 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[OP:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %op) + // CHECK: %[[INTRINSIC:.*]] = call i64 @llvm.aarch64.sve.cntp.nxv2i1( %[[PG]], %[[OP]]) + // CHECK: ret i64 %[[INTRINSIC]] + return svcntp_b64(pg, op); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] dbc6a07 - [SveEmitter] Add builtins for address calculations.
Author: Sander de Smalen Date: 2020-05-07T12:28:18+01:00 New Revision: dbc6a07bcc6a72ffb79bd6cafd5dbfe50db16804 URL: https://github.com/llvm/llvm-project/commit/dbc6a07bcc6a72ffb79bd6cafd5dbfe50db16804 DIFF: https://github.com/llvm/llvm-project/commit/dbc6a07bcc6a72ffb79bd6cafd5dbfe50db16804.diff LOG: [SveEmitter] Add builtins for address calculations. This patch adds builtins for: - svadrb, svadrh, svadrw, svadrd Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrh.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrw.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index fc3d6f300b65..a70c4d4e2dfc 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -611,6 +611,14 @@ def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index", "vPdlJ" def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; + +// Address calculations + +def SVADRB : SInst<"svadrb[_{0}base]_[{2}]offset", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrb">; +def SVADRH : SInst<"svadrh[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrh">; +def SVADRW : SInst<"svadrw[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrw">; +def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrd">; + // Scalar to vector diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c new file mode 100644 index ..9b8011930572 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint32_t test_svadrb_u32base_s32offset(svuint32_t bases, svint32_t offsets) +{ + // CHECK-LABEL: test_svadrb_u32base_s32offset + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adrb.nxv4i32( %bases, %offsets) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadrb_,u32base_s32,offset,)(bases, offsets); +} + +svuint64_t test_svadrb_u64base_s64offset(svuint64_t bases, svint64_t offsets) +{ + // CHECK-LABEL: test_svadrb_u64base_s64offset + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adrb.nxv2i64( %bases, %offsets) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadrb_,u64base_s64,offset,)(bases, offsets); +} + +svuint32_t test_svadrb_u32base_u32offset(svuint32_t bases, svuint32_t offsets) +{ + // CHECK-LABEL: test_svadrb_u32base_u32offset + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adrb.nxv4i32( %bases, %offsets) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadrb_,u32base_u32,offset,)(bases, offsets); +} + +svuint64_t test_svadrb_u64base_u64offset(svuint64_t bases, svuint64_t offsets) +{ + // CHECK-LABEL: test_svadrb_u64base_u64offset + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adrb.nxv2i64( %bases, %offsets) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svadrb_,u64base_u64,offset,)(bases, offsets); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c new file mode 100644 index ..6d668d61f3a3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror
[clang] 4f94e1a - [SveEmitter] Add builtins for svasrd (zeroing/undef predication)
Author: Sander de Smalen Date: 2020-05-07T12:28:18+01:00 New Revision: 4f94e1a9f7018418b8a5605b86ac170651726e8b URL: https://github.com/llvm/llvm-project/commit/4f94e1a9f7018418b8a5605b86ac170651726e8b DIFF: https://github.com/llvm/llvm-project/commit/4f94e1a9f7018418b8a5605b86ac170651726e8b.diff LOG: [SveEmitter] Add builtins for svasrd (zeroing/undef predication) This patch adds builtins for arithmetic shift right (round towards zero) instructions for zeroing (_z) and undef (_x) predication. Added: Modified: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_asrd.c Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a70c4d4e2dfc..4662b7ffdbf4 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -740,6 +740,8 @@ defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi" defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; // Integer reductions diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c index bd0261f26189..99b00bba9183 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c @@ -10,6 +10,84 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +svint8_t test_svasrd_n_s8_z(svbool_t pg, svint8_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s8_z + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( %pg, %[[SEL]], i32 1) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasrd,_n_s8,_z,)(pg, op1, 1); +} + +svint8_t test_svasrd_n_s8_z_1(svbool_t pg, svint8_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s8_z_1 + // CHECK: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv16i8( %pg, %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asrd.nxv16i8( %pg, %[[SEL]], i32 8) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasrd,_n_s8,_z,)(pg, op1, 8); +} + +svint16_t test_svasrd_n_s16_z(svbool_t pg, svint16_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asrd.nxv8i16( %[[PG]], %[[SEL]], i32 1) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasrd,_n_s16,_z,)(pg, op1, 1); +} + +svint16_t test_svasrd_n_s16_z_1(svbool_t pg, svint16_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s16_z_1 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asrd.nxv8i16( %[[PG]], %[[SEL]], i32 16) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasrd,_n_s16,_z,)(pg, op1, 16); +} + +svint32_t test_svasrd_n_s32_z(svbool_t pg, svint32_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asrd.nxv4i32( %[[PG]], %[[SEL]], i32 1) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasrd,_n_s32,_z,)(pg, op1, 1); +} + +svint32_t test_svasrd_n_s32_z_1(svbool_t pg, svint32_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s32_z_1 + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.asrd.nxv4i32( %[[PG]], %[[SEL]], i32 32) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svasrd,_n_s32,_z,)(pg, op1, 32); +} + +svint64_t test_svasrd_n_s64_z(svbool_t pg, svint64_t op1) +{ + // CHECK-LABEL: test_svasrd_n_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.s
[clang] cac0626 - [SveEmitter] Add builtins for svinsr
Author: Sander de Smalen Date: 2020-05-07T13:31:46+01:00 New Revision: cac06263a426c28892090c6abc7ee8c6b724838d URL: https://github.com/llvm/llvm-project/commit/cac06263a426c28892090c6abc7ee8c6b724838d DIFF: https://github.com/llvm/llvm-project/commit/cac06263a426c28892090c6abc7ee8c6b724838d.diff LOG: [SveEmitter] Add builtins for svinsr Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4662b7ffdbf4..c0248a83f4ea 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -742,6 +742,8 @@ defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil",MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; + // Integer reductions diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c new file mode 100644 index ..ac5a8e78a795 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svinsr_n_s8(svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svinsr_n_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv16i8( %op1, i8 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_s8,,)(op1, op2); +} + +svint16_t test_svinsr_n_s16(svint16_t op1, int16_t op2) +{ + // CHECK-LABEL: test_svinsr_n_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv8i16( %op1, i16 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_s16,,)(op1, op2); +} + +svint32_t test_svinsr_n_s32(svint32_t op1, int32_t op2) +{ + // CHECK-LABEL: test_svinsr_n_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv4i32( %op1, i32 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_s32,,)(op1, op2); +} + +svint64_t test_svinsr_n_s64(svint64_t op1, int64_t op2) +{ + // CHECK-LABEL: test_svinsr_n_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv2i64( %op1, i64 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_s64,,)(op1, op2); +} + +svuint8_t test_svinsr_n_u8(svuint8_t op1, uint8_t op2) +{ + // CHECK-LABEL: test_svinsr_n_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv16i8( %op1, i8 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_u8,,)(op1, op2); +} + +svuint16_t test_svinsr_n_u16(svuint16_t op1, uint16_t op2) +{ + // CHECK-LABEL: test_svinsr_n_u16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv8i16( %op1, i16 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_u16,,)(op1, op2); +} + +svuint32_t test_svinsr_n_u32(svuint32_t op1, uint32_t op2) +{ + // CHECK-LABEL: test_svinsr_n_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv4i32( %op1, i32 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_u32,,)(op1, op2); +} + +svuint64_t test_svinsr_n_u64(svuint64_t op1, uint64_t op2) +{ + // CHECK-LABEL: test_svinsr_n_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv2i64( %op1, i64 %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_u64,,)(op1, op2); +} + +svfloat16_t test_svinsr_n_f16(svfloat16_t op1, float16_t op2) +{ + // CHECK-LABEL: test_svinsr_n_f16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv8f16( %op1, half %op2) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_f16,,)(op1, op2); +} + +svfloat32_t test_svinsr_n_f32(svfloat32_t op1, float32_t op2) +{ + // CHECK-LAB
[clang] 35de496 - [SveEmitter] Add builtins for svqdecp and svqincp
Author: Sander de Smalen Date: 2020-05-07T13:31:46+01:00 New Revision: 35de49655023207a888c0469b24f39fc2e5eaa8a URL: https://github.com/llvm/llvm-project/commit/35de49655023207a888c0469b24f39fc2e5eaa8a DIFF: https://github.com/llvm/llvm-project/commit/35de49655023207a888c0469b24f39fc2e5eaa8a.diff LOG: [SveEmitter] Add builtins for svqdecp and svqincp This patch adds builtins for saturating increment/decrement by svcntp, in scalar and vector forms. Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincp.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index c0248a83f4ea..4b43e02b3367 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1221,6 +1221,20 @@ defm SVQINCW_U : SInst_SAT2<"svqincw", "aarch64_sve_uqincw", UnsignedWord>; defm SVQINCD_S : SInst_SAT2<"svqincd", "aarch64_sve_sqincd", SignedDoubleWord>; defm SVQINCD_U : SInst_SAT2<"svqincd", "aarch64_sve_uqincd", UnsignedDoubleWord>; +def SVQDECP_S : SInst<"svqdecp[_{d}]", "ddP", "sil",MergeNone, "aarch64_sve_sqdecp">; +def SVQDECP_U : SInst<"svqdecp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqdecp">; +def SVQINCP_S : SInst<"svqincp[_{d}]", "ddP", "sil",MergeNone, "aarch64_sve_sqincp">; +def SVQINCP_U : SInst<"svqincp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqincp">; + +def SVQDECP_N_S32 : SInst<"svqdecp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n32">; +def SVQDECP_N_S64 : SInst<"svqdecp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n64">; +def SVQDECP_N_U32 : SInst<"svqdecp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n32">; +def SVQDECP_N_U64 : SInst<"svqdecp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n64">; +def SVQINCP_N_S32 : SInst<"svqincp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n32">; +def SVQINCP_N_S64 : SInst<"svqincp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n64">; +def SVQINCP_N_U32 : SInst<"svqincp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n32">; +def SVQINCP_N_U64 : SInst<"svqincp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n64">; + // SVE2 WhileGE/GT let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c new file mode 100644 index ..14b1ebd1ff22 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c @@ -0,0 +1,205 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +int32_t test_svqdecp_n_s32_b8(int32_t op, svbool_t pg) +{ + // CHECK-LABEL: test_svqdecp_n_s32_b8 + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %op, %pg) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svqdecp,_n_s32,_b8,)(op, pg); +} + +int32_t test_svqdecp_n_s32_b16(int32_t op, svbool_t pg) +{ + // CHECK-LABEL: test_svqdecp_n_s32_b16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %op, %[[PG]]) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svqdecp,_n_s32,_b16,)(op, pg); +} + +int32_t test_svqdecp_n_s32_b32(int32_t op, svbool_t pg) +{ + // CHECK-LABEL: test_svqdecp_n_s32_b32 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %op, %[[PG]]) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svqdecp,_n_s32,_b32,)(op, pg); +} + +int32_t test_svqdecp_n_s32_b64(int32_t op, svbool_t pg) +{ + // CHECK-LABEL: test_svqdecp_n_s32_b64 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %op, %[[PG]]) + // CHECK: ret i32 %[[INTRINSIC]] + return SVE_ACLE_FUNC(svqdecp,_n_s32,_b64,)(op, pg); +} + +int64_t test_svqdecp_n_s
[clang] 5fa0eee - [SveEmitter] Add more SVE2 builtins for shift operations
Author: Sander de Smalen Date: 2020-05-07T13:31:46+01:00 New Revision: 5fa0eeec6eb1d1f6946d3e6ee2455e95bb79d870 URL: https://github.com/llvm/llvm-project/commit/5fa0eeec6eb1d1f6946d3e6ee2455e95bb79d870 DIFF: https://github.com/llvm/llvm-project/commit/5fa0eeec6eb1d1f6946d3e6ee2455e95bb79d870.diff LOG: [SveEmitter] Add more SVE2 builtins for shift operations This patch adds builtins for: - svqshlu - svrshr - svrsra - svsli - svsra - svsri Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_rshr.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_rsra.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sli.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sra.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sri.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_rshr.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_rsra.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_sli.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_sra.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_sri.c Modified: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qshlu.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qshlu.c Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 5effceb93464..858b0eecbcb1 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -811,12 +811,6 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { def SVSHRNB : SInst<"svshrnb[_n_{d}]","hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; } - -// SVE2 - Uniform DSP operations -let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -} - // While comparisons @@ -1295,6 +1289,22 @@ defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_ defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl">; defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd">; defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd">; + +def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVQSHLU_X : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeAny, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVQSHLU_Z : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeZero, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVRSHR_M_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_M_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeOp1, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_X_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_X_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeAny, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_Z_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_Z_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeZero, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSRA_S : SInst<"svrsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_srsra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSRA_U : SInst<"svrsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_ursra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSLI : SInst<"svsli[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sli",[], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVSRA_S: SInst<"svsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_ssra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSRA_U: SInst<"svsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_usra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSRI : SInst<"svsri[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sri",[], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } // SVE2 - Non-temporal gather/scatter diff --git a/clang/test/CodeGe
[clang] 0d22076 - [SveEmitter] Add builtins for SVE2 uniform DSP operations
Author: Sander de Smalen Date: 2020-05-07T13:31:46+01:00 New Revision: 0d22076531ce9f2757ae4c69e647f02e99394e05 URL: https://github.com/llvm/llvm-project/commit/0d22076531ce9f2757ae4c69e647f02e99394e05 DIFF: https://github.com/llvm/llvm-project/commit/0d22076531ce9f2757ae4c69e647f02e99394e05.diff LOG: [SveEmitter] Add builtins for SVE2 uniform DSP operations This patch adds builtins for: - svqdmulh, svqdmulh_lane - svqrdmlah, svqrdmlah_lane - svqrdmlsh, svqrdmlsh_lane - svqrdmulh, svqrdmulh_lane Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aba.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qdmulh.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qrdmlah.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qrdmlsh.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qrdmulh.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qdmulh.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qrdmlah.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qrdmlsh.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qrdmulh.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 858b0eecbcb1..594efc507221 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1290,6 +1290,25 @@ defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_ defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd">; defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd">; +def SVABA_S: SInst<"svaba[_{d}]", "", "csil", MergeNone, "aarch64_sve_saba">; +def SVABA_U: SInst<"svaba[_{d}]", "", "UcUsUiUl", MergeNone, "aarch64_sve_uaba">; +def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh">; +def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh">; +def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "", "csil", MergeNone, "aarch64_sve_sqrdmlah">; +def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "", "csil", MergeNone, "aarch64_sve_sqrdmlsh">; + +def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba">; +def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba">; +def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh">; +def SVQRDMULH_N: SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh">; +def SVQRDMLAH_N: SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah">; +def SVQRDMLSH_N: SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh">; + +def SVQDMULH_LANE : SInst<"svqdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqdmulh_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQRDMULH_LANE : SInst<"svqrdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqrdmulh_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQRDMLAH_LANE : SInst<"svqrdmlah_lane[_{d}]", "i", "sil", MergeNone, "aarch64_sve_sqrdmlah_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQRDMLSH_LANE : SInst<"svqrdmlsh_lane[_{d}]", "i", "sil", MergeNone, "aarch64_sve_sqrdmlsh_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; + def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; def SVQSHLU_X : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeAny, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; def SVQSHLU_Z : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeZero, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aba.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aba.c new file mode 100644 index ..5ba165faf48f --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aba.c @@ -0,0 +1,181 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error
[clang] 36aab0c - [SveEmitter] Add builtins for SVE2 Bitwise ternary logical instructions
Author: Sander de Smalen Date: 2020-05-07T15:21:37+01:00 New Revision: 36aab0c055a93d95e84606368e088bd1559e4ccb URL: https://github.com/llvm/llvm-project/commit/36aab0c055a93d95e84606368e088bd1559e4ccb DIFF: https://github.com/llvm/llvm-project/commit/36aab0c055a93d95e84606368e088bd1559e4ccb.diff LOG: [SveEmitter] Add builtins for SVE2 Bitwise ternary logical instructions This patch adds builtins for: - svbcax - svbsl - svbsl1n - svbsl2n - sveor3 - svnbsl - svxar Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bcax.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bsl.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bsl1n.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bsl2n.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_eor3.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_nbsl.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_xar.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_xar.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a187591cd4d1..efdc892eb66a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1360,6 +1360,27 @@ def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_s def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_sve_uadalp">; } + +// SVE2 - Bitwise ternary logical instructions +// + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVBCAX : SInst<"svbcax[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax">; +def SVBSL : SInst<"svbsl[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl">; +def SVBSL1N : SInst<"svbsl1n[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n">; +def SVBSL2N : SInst<"svbsl2n[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n">; +def SVEOR3 : SInst<"sveor3[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3">; +def SVNBSL : SInst<"svnbsl[_{d}]", "", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl">; + +def SVBCAX_N : SInst<"svbcax[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax">; +def SVBSL_N : SInst<"svbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl">; +def SVBSL1N_N : SInst<"svbsl1n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n">; +def SVBSL2N_N : SInst<"svbsl2n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n">; +def SVEOR3_N : SInst<"sveor3[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3">; +def SVNBSL_N : SInst<"svnbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl">; +def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_xar", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +} + // SVE2 - Non-temporal gather/scatter let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bcax.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bcax.c new file mode 100644 index ..3cde14ad65df --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bcax.c @@ -0,0 +1,181 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svbcax_s8(svint8_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svbcax_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.bcax.nxv16i8( %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svbcax'}} + // expected-warning@+1 {{implicit declaration of function 'svbcax_
[clang] 5e9bc21 - [SveEmitter] Add builtins for SVE2 Multiplication by indexed elements
Author: Sander de Smalen Date: 2020-05-07T15:21:37+01:00 New Revision: 5e9bc21eea111df698cf45048b8b0e4c3c245dd5 URL: https://github.com/llvm/llvm-project/commit/5e9bc21eea111df698cf45048b8b0e4c3c245dd5 DIFF: https://github.com/llvm/llvm-project/commit/5e9bc21eea111df698cf45048b8b0e4c3c245dd5.diff LOG: [SveEmitter] Add builtins for SVE2 Multiplication by indexed elements This patch adds builtins for: - svmla_lane - svmls_lane - svmul_lane Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mla.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mls.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mul.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mla.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mls.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mul.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index db4153b28c19..a271ad9cfb1b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1396,6 +1396,15 @@ def SVSBCLB_N : SInst<"svsbclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve def SVSBCLT_N : SInst<"svsbclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclt">; } + +// SVE2 - Multiplication by indexed elements + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVMLA_LANE_2 : SInst<"svmla_lane[_{d}]", "i", "silUsUiUl", MergeNone, "aarch64_sve_mla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE_2 : SInst<"svmls_lane[_{d}]", "i", "silUsUiUl", MergeNone, "aarch64_sve_mls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi", "silUsUiUl", MergeNone, "aarch64_sve_mul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +} + // SVE2 - Non-temporal gather/scatter let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mla.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mla.c new file mode 100644 index ..b130780648c4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mla.c @@ -0,0 +1,111 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svmla_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmla_lane_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.lane.nxv8i16( %op1, %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svmla_lane_s16'}} + return SVE_ACLE_FUNC(svmla_lane,_s16,,)(op1, op2, op3, 0); +} + +svint16_t test_svmla_lane_s16_1(svint16_t op1, svint16_t op2, svint16_t op3) +{ + // CHECK-LABEL: test_svmla_lane_s16_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.lane.nxv8i16( %op1, %op2, %op3, i32 7) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svmla_lane_s16'}} + return SVE_ACLE_FUNC(svmla_lane,_s16,,)(op1, op2, op3, 7); +} + +svint32_t test_svmla_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmla_lane_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.lane.nxv4i32( %op1, %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svmla_lane_s32'}} + return SVE_ACLE_FUNC(svmla_lane,_s32,,)(op1, op2, op3, 0); +} + +svint32_t test_svmla_lane_s32_1(svint32_t op1, svint32_t op2, svint32_t op3) +{ + // CHECK-LABEL: test_svmla_lane_s32_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.mla.lane.nxv4i32( %op1, %op2, %op3, i32 3) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svmla_lane_s32'}} + return SVE_ACLE_FUNC(svmla_la
[clang] 60615cf - [SveEmitter] Add builtins for SVE2 Large integer arithmetic
Author: Sander de Smalen Date: 2020-05-07T15:21:37+01:00 New Revision: 60615cfb43f540745fd38fd2ff6b35c9d5af16ee URL: https://github.com/llvm/llvm-project/commit/60615cfb43f540745fd38fd2ff6b35c9d5af16ee DIFF: https://github.com/llvm/llvm-project/commit/60615cfb43f540745fd38fd2ff6b35c9d5af16ee.diff LOG: [SveEmitter] Add builtins for SVE2 Large integer arithmetic This patch adds builtins for: - svadclb - svadclt - svsbclb - svsbclt Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adclb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adclt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sbclb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sbclt.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index efdc892eb66a..db4153b28c19 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1381,6 +1381,21 @@ def SVNBSL_N : SInst<"svnbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aar def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_xar", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } + +// SVE2 - Large integer arithmetic + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVADCLB : SInst<"svadclb[_{d}]", "", "UiUl", MergeNone, "aarch64_sve_adclb">; +def SVADCLT : SInst<"svadclt[_{d}]", "", "UiUl", MergeNone, "aarch64_sve_adclt">; +def SVSBCLB : SInst<"svsbclb[_{d}]", "", "UiUl", MergeNone, "aarch64_sve_sbclb">; +def SVSBCLT : SInst<"svsbclt[_{d}]", "", "UiUl", MergeNone, "aarch64_sve_sbclt">; + +def SVADCLB_N : SInst<"svadclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclb">; +def SVADCLT_N : SInst<"svadclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclt">; +def SVSBCLB_N : SInst<"svsbclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclb">; +def SVSBCLT_N : SInst<"svsbclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclt">; +} + // SVE2 - Non-temporal gather/scatter let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adclb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adclb.c new file mode 100644 index ..a02ecaaad5af --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adclb.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint32_t test_svadclb_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) +{ + // CHECK-LABEL: test_svadclb_u32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adclb.nxv4i32( %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svadclb'}} + // expected-warning@+1 {{implicit declaration of function 'svadclb_u32'}} + return SVE_ACLE_FUNC(svadclb,_u32,,)(op1, op2, op3); +} + +svuint64_t test_svadclb_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) +{ + // CHECK-LABEL: test_svadclb_u64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adclb.nxv2i64( %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svadclb'}} + // expected-warning@+1 {{implicit declaration of function 'svadclb_u64'}} + return SVE_ACLE_FUNC(svadclb,_u64,,)(op1, op2, op3); +} + +svuint32_t test_svadclb_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3) +{ + // CHECK-LABEL: test_svadclb_n_u32 + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.adclb.nxv4i32( %op1, %op2, %[[DUP]]) + // CHECK: ret %[[
[clang] b0348af - [SveEmitter] Add builtins for SVE2 widening pairwise arithmetic
Author: Sander de Smalen Date: 2020-05-07T15:21:37+01:00 New Revision: b0348af1082eb3070c34fed99496cdcd511f63e5 URL: https://github.com/llvm/llvm-project/commit/b0348af1082eb3070c34fed99496cdcd511f63e5 DIFF: https://github.com/llvm/llvm-project/commit/b0348af1082eb3070c34fed99496cdcd511f63e5.diff LOG: [SveEmitter] Add builtins for SVE2 widening pairwise arithmetic This patch adds builtins for: - svadalp Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adalp.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index ba1161f2e552..a187591cd4d1 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1347,6 +1347,19 @@ defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp">; defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp">; } + +// SVE2 - Widening pairwise arithmetic + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil",MergeOp1, "aarch64_sve_sadalp">; +def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil",MergeAny, "aarch64_sve_sadalp">; +def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil",MergeZero, "aarch64_sve_sadalp">; + +def SVADALP_U_M : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeOp1, "aarch64_sve_uadalp">; +def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_sve_uadalp">; +def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_sve_uadalp">; +} + // SVE2 - Non-temporal gather/scatter let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adalp.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adalp.c new file mode 100644 index ..5d68f8109986 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_adalp.c @@ -0,0 +1,217 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svadalp_s16_z(svbool_t pg, svint16_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svadalp_s16_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv8i16( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sadalp.nxv8i16( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svadalp_z'}} + // expected-warning@+1 {{implicit declaration of function 'svadalp_s16_z'}} + return SVE_ACLE_FUNC(svadalp,_s16,_z,)(pg, op1, op2); +} + +svint32_t test_svadalp_s32_z(svbool_t pg, svint32_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svadalp_s32_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv4i32( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sadalp.nxv4i32( %[[PG]], %[[SEL]], %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svadalp_z'}} + // expected-warning@+1 {{implicit declaration of function 'svadalp_s32_z'}} + return SVE_ACLE_FUNC(svadalp,_s32,_z,)(pg, op1, op2); +} + +svint64_t test_svadalp_s64_z(svbool_t pg, svint64_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svadalp_s64_z + // CHECK-DAG: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK-DAG: %[[SEL:.*]] = call @llvm.aarch64.sve.sel.nxv2i64( %[[PG]], %op1, zeroinitializer) + // CHECK: %[[INTRINSIC:.*
[clang] 7ff0500 - [SveEmitter] Add builtins for SVE2 Non-widening pairwise arithmetic
Author: Sander de Smalen Date: 2020-05-07T15:21:37+01:00 New Revision: 7ff05002d0b5b73cb1d155e27cb15f5d459e86e8 URL: https://github.com/llvm/llvm-project/commit/7ff05002d0b5b73cb1d155e27cb15f5d459e86e8 DIFF: https://github.com/llvm/llvm-project/commit/7ff05002d0b5b73cb1d155e27cb15f5d459e86e8.diff LOG: [SveEmitter] Add builtins for SVE2 Non-widening pairwise arithmetic This patch adds builtins for: - svaddp - svmaxnmp - svmaxp - svminnmp - svminp Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addp.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_maxnmp.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_maxp.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_minnmp.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_minp.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 594efc507221..ba1161f2e552 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1325,6 +1325,28 @@ def SVSRA_S: SInst<"svsra[_n_{d}]", "dddi", "csil", MergeNone, "aa def SVSRA_U: SInst<"svsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_usra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVSRI : SInst<"svsri[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sri",[], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } + + +// SVE2 - Non-widening pairwise arithmetic + +multiclass SInstPairwise flags=[]> { + def _M : SInst; + def _X : SInst; +} + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +defm SVADDP : SInstPairwise<"svaddp", "csliUcUsUiUl", "aarch64_sve_addp">; +defm SVADDP_F : SInstPairwise<"svaddp", "hfd", "aarch64_sve_faddp">; +defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd", "aarch64_sve_fmaxnmp">; +defm SVMAXP_F : SInstPairwise<"svmaxp", "hfd", "aarch64_sve_fmaxp">; +defm SVMAXP_S : SInstPairwise<"svmaxp", "csli", "aarch64_sve_smaxp">; +defm SVMAXP_U : SInstPairwise<"svmaxp", "UcUsUiUl", "aarch64_sve_umaxp">; +defm SVMINNMP : SInstPairwise<"svminnmp", "hfd", "aarch64_sve_fminnmp">; +defm SVMINP_F : SInstPairwise<"svminp", "hfd", "aarch64_sve_fminp">; +defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp">; +defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp">; +} + // SVE2 - Non-temporal gather/scatter let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addp.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addp.c new file mode 100644 index ..0fd97cedb201 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addp.c @@ -0,0 +1,251 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svaddp_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svaddp_s8_m + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.addp.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svaddp_m'}} + // expected-warning@+1 {{implicit declaration of function 'svaddp_s8_m'}} + return SVE_ACLE_FUNC(svaddp,_s8,_m,)(pg, op1, op2); +} + +svint16_t test_svaddp_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svaddp_s16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.addp.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit dec
[clang] ce7f50c - [SveEmitter] Add builtins for SVE2 Uniform complex integer arithmetic
Author: Sander de Smalen Date: 2020-05-07T16:09:31+01:00 New Revision: ce7f50c2ce186ade48a3aba9c28a981978ec527a URL: https://github.com/llvm/llvm-project/commit/ce7f50c2ce186ade48a3aba9c28a981978ec527a DIFF: https://github.com/llvm/llvm-project/commit/ce7f50c2ce186ade48a3aba9c28a981978ec527a.diff LOG: [SveEmitter] Add builtins for SVE2 Uniform complex integer arithmetic This patch adds builtins for: - svcadd - svqcadd - svcmla - svcmla_lane - svqrdcmlah - svqrdcmlah_lane Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cadd.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cmla.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qcadd.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qrdcmlah.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_cadd.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_cmla.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qcadd.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_qrdcmlah.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a271ad9cfb1b..4094c0b5f9c3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1405,6 +1405,19 @@ def SVMLS_LANE_2 : SInst<"svmls_lane[_{d}]", "i", "silUsUiUl", MergeNone, "a def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi", "silUsUiUl", MergeNone, "aarch64_sve_mul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; } + +// SVE2 - Uniform complex integer arithmetic +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVCADD : SInst<"svcadd[_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x", [], [ImmCheck<2, ImmCheckComplexRot90_270>]>; +def SVSQCADD : SInst<"svqcadd[_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_sqcadd_x", [], [ImmCheck<2, ImmCheckComplexRot90_270>]>; +def SVCMLA : SInst<"svcmla[_{d}]", "i", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, + ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVSQRDCMLAH_X : SInst<"svqrdcmlah[_{d}]", "i", "csil", MergeNone, "aarch64_sve_sqrdcmlah_x", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, + ImmCheck<4, ImmCheckComplexRotAll90>]>; +} + // SVE2 - Non-temporal gather/scatter let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cadd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cadd.c new file mode 100644 index ..01c14f863666 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cadd.c @@ -0,0 +1,173 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svcadd_s8(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svcadd_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cadd.x.nxv16i8( %op1, %op2, i32 90) + // CH
[clang] 867bfae - [SveEmitter] Add builtins for SVE2 Widening complex integer arithmetic
Author: Sander de Smalen Date: 2020-05-07T16:09:31+01:00 New Revision: 867bfae93fd7a6731be91024c71a95a79d552411 URL: https://github.com/llvm/llvm-project/commit/867bfae93fd7a6731be91024c71a95a79d552411 DIFF: https://github.com/llvm/llvm-project/commit/867bfae93fd7a6731be91024c71a95a79d552411.diff LOG: [SveEmitter] Add builtins for SVE2 Widening complex integer arithmetic This patch adds builtins for: - svaddlbt - svqdmlalbt - svqdmlslbt - svsublbt - svsubltb Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addlbt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qdmlalbt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_qdmlslbt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sublbt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_subltb.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 77d64a579056..b12bd92d867b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1554,6 +1554,17 @@ def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_s def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt">; } + +// SVE2 - Widening complex integer arithmetic + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +defm SVADDLBT : SInstWideDSPLong<"svaddlbt", "sil", "aarch64_sve_saddlbt">; +defm SVSUBLBT : SInstWideDSPLong<"svsublbt", "sil", "aarch64_sve_ssublbt">; +defm SVSUBLTB : SInstWideDSPLong<"svsubltb", "sil", "aarch64_sve_ssubltb">; + +defm SVQDMLALBT : SInstWideDSPAcc<"svqdmlalbt", "sil", "aarch64_sve_sqdmlalbt">; +defm SVQDMLSLBT : SInstWideDSPAcc<"svqdmlslbt", "sil", "aarch64_sve_sqdmlslbt">; +} // SVE2 - Non-temporal gather/scatter diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addlbt.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addlbt.c new file mode 100644 index ..3d47de842187 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_addlbt.c @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svaddlbt_s16(svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svaddlbt_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.saddlbt.nxv8i16( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svaddlbt'}} + // expected-warning@+1 {{implicit declaration of function 'svaddlbt_s16'}} + return SVE_ACLE_FUNC(svaddlbt,_s16,,)(op1, op2); +} + +svint32_t test_svaddlbt_s32(svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svaddlbt_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.saddlbt.nxv4i32( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svaddlbt'}} + // expected-warning@+1 {{implicit declaration of function 'svaddlbt_s32'}} + return SVE_ACLE_FUNC(svaddlbt,_s32,,)(op1, op2); +} + +svint64_t test_svaddlbt_s64(svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svaddlbt_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.saddlbt.nxv2i64( %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svaddlbt'}} + // expected-warning@+1 {{implicit declaration of function 'svaddlbt_s64'}} + return SVE_ACLE_FUNC(svaddlbt,_s64,,)(op1, op2); +} + +svint16_t test_svaddlbt_n_s16(svint8_t op1, int8_t op2) +{ + // CHECK-LABEL: test_svaddlbt_n_s16 + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.sa
[clang] e76256e - [SveEmitter] Add builtins for SVE2 Complex integer dot product
Author: Sander de Smalen Date: 2020-05-07T16:09:31+01:00 New Revision: e76256e7c1b27087288e8fceb3b6c4aec8359389 URL: https://github.com/llvm/llvm-project/commit/e76256e7c1b27087288e8fceb3b6c4aec8359389 DIFF: https://github.com/llvm/llvm-project/commit/e76256e7c1b27087288e8fceb3b6c4aec8359389.diff LOG: [SveEmitter] Add builtins for SVE2 Complex integer dot product This patch adds builtins for: - svcdot, svcdot_lane Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cdot.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_cdot.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b12bd92d867b..880ca4e19f3d 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1708,6 +1708,15 @@ def SVPMULLT_PAIR : SInst<"svpmullt_pair[_{d}]", "ddd", "UcUi", Mer def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; } + +// SVE2 - Complex integer dot product + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVCDOT : SInst<"svcdot[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_cdot", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [], [ImmCheck<4, ImmCheckComplexRotAll90>, + ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +} + // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cdot.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cdot.c new file mode 100644 index ..c8094c894892 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_cdot.c @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint32_t test_svcdot_s32(svint32_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svcdot_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cdot.nxv4i32( %op1, %op2, %op3, i32 0) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svcdot'}} + // expected-warning@+1 {{implicit declaration of function 'svcdot_s32'}} + return SVE_ACLE_FUNC(svcdot,_s32,,)(op1, op2, op3, 0); +} + +svint32_t test_svcdot_s32_1(svint32_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svcdot_s32_1 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cdot.nxv4i32( %op1, %op2, %op3, i32 90) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svcdot'}} + // expected-warning@+1 {{implicit declaration of function 'svcdot_s32'}} + return SVE_ACLE_FUNC(svcdot,_s32,,)(op1, op2, op3, 90); +} + +svint32_t test_svcdot_s32_2(svint32_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svcdot_s32_2 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cdot.nxv4i32( %op1, %op2, %op3, i32 180) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svcdot'}} + // expected-warning@+1 {{implicit declaration of function 'svcdot_s32'}} + return SVE_ACLE_FUNC(svcdot,_s32,,)(op1, op2, op3, 180); +} + +svint32_t test_svcdot_s32_3(svint32_t op1, svint8_t op2, svint8_t op3) +{ + // CHECK-LABEL: test_svcdot_s32_3 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cdot.nxv4i32( %op1, %op2, %op3, i32 270) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaratio
[clang] fa0371f - [SveEmitter] Add builtins for SVE2 Floating-point integer binary logarithm instructions
Author: Sander de Smalen Date: 2020-05-07T16:15:57+01:00 New Revision: fa0371f4fd97fe9f601991bb2341a1e0426173c6 URL: https://github.com/llvm/llvm-project/commit/fa0371f4fd97fe9f601991bb2341a1e0426173c6 DIFF: https://github.com/llvm/llvm-project/commit/fa0371f4fd97fe9f601991bb2341a1e0426173c6.diff LOG: [SveEmitter] Add builtins for SVE2 Floating-point integer binary logarithm instructions This patch adds builtins for: - svlogb Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_logb.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 6f436f7f9ff4..f1689d35582c 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1735,6 +1735,15 @@ def SVMLSLT_F_N: SInst<"svmlslt[_n_{d}]","ddhR", "f", MergeNone, "aar def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; } + +// SVE2 - Floating-point integer binary logarithm + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb">; +def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb">; +def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb">; +} + // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_logb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_logb.c new file mode 100644 index ..ab0a508b5a27 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_logb.c @@ -0,0 +1,112 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint16_t test_svlogb_f16_z(svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svlogb_f16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.flogb.nxv8f16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svlogb_z'}} + // expected-warning@+1 {{implicit declaration of function 'svlogb_f16_z'}} + return SVE_ACLE_FUNC(svlogb,_f16,_z,)(pg, op); +} + +svint32_t test_svlogb_f32_z(svbool_t pg, svfloat32_t op) +{ + // CHECK-LABEL: test_svlogb_f32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.flogb.nxv4f32( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svlogb_z'}} + // expected-warning@+1 {{implicit declaration of function 'svlogb_f32_z'}} + return SVE_ACLE_FUNC(svlogb,_f32,_z,)(pg, op); +} + +svint64_t test_svlogb_f64_z(svbool_t pg, svfloat64_t op) +{ + // CHECK-LABEL: test_svlogb_f64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.flogb.nxv2f64( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svlogb_z'}} + // expected-warning@+1 {{implicit declaration of function 'svlogb_f64_z'}} + return SVE_ACLE_FUNC(svlogb,_f64,_z,)(pg, op); +} + +svint16_t test_svlogb_f16_m(svint16_t inactive, svbool_t pg, svfloat16_t op) +{ + // CHECK-LABEL: test_svlogb_f16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fl
[clang] ae65224 - [SveEmitter] Add builtins for SVE2 Vector histogram count instructions
Author: Sander de Smalen Date: 2020-05-07T16:15:57+01:00 New Revision: ae652241bd5b492332fb80e127e31964d3ba83f1 URL: https://github.com/llvm/llvm-project/commit/ae652241bd5b492332fb80e127e31964d3ba83f1 DIFF: https://github.com/llvm/llvm-project/commit/ae652241bd5b492332fb80e127e31964d3ba83f1.diff LOG: [SveEmitter] Add builtins for SVE2 Vector histogram count instructions This patch adds builtins for: - svhistcnt - svhistseg Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_histcnt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_histseg.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index f1689d35582c..ce7a8209fd36 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1744,6 +1744,14 @@ def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_ def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb">; } + +// SVE2 - Vector Histogram count + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVHISTCNT : SInst<"svhistcnt[_{d}]_z", "uPdd", "ilUiUl", MergeNone, "aarch64_sve_histcnt">; +def SVHISTSEG : SInst<"svhistseg[_{d}]", "udd", "cUc",MergeNone, "aarch64_sve_histseg">; +} + // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_histcnt.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_histcnt.c new file mode 100644 index ..3f2138bc48c4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_histcnt.c @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svuint32_t test_svhistcnt_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) +{ + // CHECK-LABEL: test_svhistcnt_s32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.histcnt.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svhistcnt_z'}} + // expected-warning@+1 {{implicit declaration of function 'svhistcnt_s32_z'}} + return SVE_ACLE_FUNC(svhistcnt,_s32,_z,)(pg, op1, op2); +} + +svuint64_t test_svhistcnt_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) +{ + // CHECK-LABEL: test_svhistcnt_s64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.histcnt.nxv2i64( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svhistcnt_z'}} + // expected-warning@+1 {{implicit declaration of function 'svhistcnt_s64_z'}} + return SVE_ACLE_FUNC(svhistcnt,_s64,_z,)(pg, op1, op2); +} + +svuint32_t test_svhistcnt_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) +{ + // CHECK-LABEL: test_svhistcnt_u32_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.histcnt.nxv4i32( %[[PG]], %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svhistcnt_z'}} + // expected-warning@+1 {{implicit declaration of function 'svhistcnt_u32_z'}} + return SVE_ACLE_FUNC(svhistcnt,_u32,_z,)(pg, op1, op2); +} + +svuint64_t test_svhistcnt_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) +{ + // CHECK-LABEL: test_svhistcnt_u64_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @
[clang] f22cdc3 - [SveEmitter] Add builtins for SVE2 Character match instructions
Author: Sander de Smalen Date: 2020-05-07T16:15:57+01:00 New Revision: f22cdc3cc32c60dfe068e7dab366009b5c68c9aa URL: https://github.com/llvm/llvm-project/commit/f22cdc3cc32c60dfe068e7dab366009b5c68c9aa DIFF: https://github.com/llvm/llvm-project/commit/f22cdc3cc32c60dfe068e7dab366009b5c68c9aa.diff LOG: [SveEmitter] Add builtins for SVE2 Character match instructions This patch adds builtins for: - svmatch - svnmatch Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_match.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_nmatch.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index ce7a8209fd36..12ad086390a5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1752,6 +1752,14 @@ def SVHISTCNT : SInst<"svhistcnt[_{d}]_z", "uPdd", "ilUiUl", MergeNone, "aarch6 def SVHISTSEG : SInst<"svhistseg[_{d}]", "udd", "cUc",MergeNone, "aarch64_sve_histseg">; } + +// SVE2 - Character match + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVMATCH : SInst<"svmatch[_{d}]", "PPdd", "csUcUs", MergeNone, "aarch64_sve_match">; +def SVNMATCH : SInst<"svnmatch[_{d}]", "PPdd", "csUcUs", MergeNone, "aarch64_sve_nmatch">; +} + // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_match.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_match.c new file mode 100644 index ..79591ad7b736 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_match.c @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svmatch_s8(svbool_t pg, svint8_t op1, svint8_t op2) +{ + // CHECK-LABEL: test_svmatch_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.match.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svmatch'}} + // expected-warning@+1 {{implicit declaration of function 'svmatch_s8'}} + return SVE_ACLE_FUNC(svmatch,_s8,,)(pg, op1, op2); +} + +svbool_t test_svmatch_s16(svbool_t pg, svint16_t op1, svint16_t op2) +{ + // CHECK-LABEL: test_svmatch_s16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.match.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[RET]] + // overload-warning@+2 {{implicit declaration of function 'svmatch'}} + // expected-warning@+1 {{implicit declaration of function 'svmatch_s16'}} + return SVE_ACLE_FUNC(svmatch,_s16,,)(pg, op1, op2); +} + +svbool_t test_svmatch_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) +{ + // CHECK-LABEL: test_svmatch_u8 + // CHECK: %[[intrinsic:.*]] = call @llvm.aarch64.sve.match.nxv16i8( %pg, %op1, %op2) + // CHECK: ret %[[intrinsic]] + // overload-warning@+2 {{implicit declaration of function 'svmatch'}} + // expected-warning@+1 {{implicit declaration of function 'svmatch_u8'}} + return SVE_ACLE_FUNC(svmatch,_u8,,)(pg, op1, op2); +} + +svbool_t test_svmatch_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) +{ + // CHECK-LABEL: test_svmatch_u16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.match.nxv8i16( %[[PG]], %op1, %op2) + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[RET]] + // overload-warning@+2 {{implicit
[clang] 086722c - [SveEmitter] Add builtins for SVE2 Floating-point widening multiply-accumulate
Author: Sander de Smalen Date: 2020-05-07T16:15:57+01:00 New Revision: 086722c18e4748f8d250806abbf8d7fa5c05e51f URL: https://github.com/llvm/llvm-project/commit/086722c18e4748f8d250806abbf8d7fa5c05e51f DIFF: https://github.com/llvm/llvm-project/commit/086722c18e4748f8d250806abbf8d7fa5c05e51f.diff LOG: [SveEmitter] Add builtins for SVE2 Floating-point widening multiply-accumulate This patch adds builtins for: - svmlalb, svmlalb_lane - svmlalt, svmlalt_lane - svmlslb, svmlslb_lane - svmlslt, svmlslt_lane Added: Modified: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlalb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlalt.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlslb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlslt.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mlalb.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mlalt.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mlslb.c clang/test/CodeGen/aarch64-sve2-intrinsics/negative/acle_sve2_mlslt.c Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 880ca4e19f3d..6f436f7f9ff4 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1717,6 +1717,24 @@ def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } + +// SVE2 - Floating-point widening multiply-accumulate + +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb">; +def SVMLALB_F_N: SInst<"svmlalb[_n_{d}]","ddhR", "f", MergeNone, "aarch64_sve_fmlalb">; +def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt">; +def SVMLALT_F_N: SInst<"svmlalt[_n_{d}]","ddhR", "f", MergeNone, "aarch64_sve_fmlalt">; +def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb">; +def SVMLSLB_F_N: SInst<"svmlslb[_n_{d}]","ddhR", "f", MergeNone, "aarch64_sve_fmlslb">; +def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt">; +def SVMLSLT_F_N: SInst<"svmlslt[_n_{d}]","ddhR", "f", MergeNone, "aarch64_sve_fmlslt">; +def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +} + // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlalb.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlalb.c index 497d7eba5849..e4f80eb639de 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlalb.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_mlalb.c @@ -217,3 +217,44 @@ svuint64_t test_svmlalb_lane_u64_1(svuint64_t op1, svuint32_t op2, svuint32_t op // expected-warning@+1 {{implicit declaration of function 'svmlalb_lane_u64'}} return SVE_ACLE_FUNC(svmlalb_lane,_u64,,)(op1, op2, op3, 3); } + +svfloat32_t test_svmlalb_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) +{ + // CHECK-LABEL: test_svmlalb_f32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmlalb.nxv4f32( %op1, %op2, %op3) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svmlalb'}} + // expected-warning@+1 {{implicit declaration of function 'svmlalb_f32'}} + return SVE_ACLE_FUNC(svmlalb,_f32,,)(op1, op2, op3); +} + +svfloat32_t test_svmlalb_n_f32(svfloat32_t op1, svfloat16_t op2, float16_t op3) +{ + // CHECK-LABEL: test_svmlalb_n_f32 + // CHECK: %[[DUP:.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half %op3) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.fmlalb.nxv4f32( %op1, %op2, %[[DUP]]) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svmlalb'}} + // expected-warning@+1 {{implicit declaration of function 'svmlalb_n_f32'}} + return
[clang] e46043b - [SveEmitter] Add builtins for SVE2 Optional extensions (AES, SHA3, SM4, BITPERM)
Author: Sander de Smalen Date: 2020-05-07T16:15:57+01:00 New Revision: e46043bba7acbf7738801f199b6fb850b1cf70ad URL: https://github.com/llvm/llvm-project/commit/e46043bba7acbf7738801f199b6fb850b1cf70ad DIFF: https://github.com/llvm/llvm-project/commit/e46043bba7acbf7738801f199b6fb850b1cf70ad.diff LOG: [SveEmitter] Add builtins for SVE2 Optional extensions (AES, SHA3, SM4, BITPERM) This patch adds various builtins under their corresponding feature macros: Defined under __ARM_FEATURE_SVE2_AES: - svaesd - svaese - svaesimc - svaesmc - svpmullb_pair - svpmullt_pair Defined under __ARM_FEATURE_SVE2_SHA3: - svrax1 Defined under __ARM_FEATURE_SVE2_SM4: - svsm4e - svsm4ekey Defined under __ARM_FEATURE_SVE2_BITPERM: - svbdep - svbext - svbgrp Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesd.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aese.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesimc.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesmc.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bdep.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bext.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_bgrp.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullb_128.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_pmullt_128.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_rax1.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sm4e.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sm4ekey.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 12ad086390a5..6615ef677127 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1773,3 +1773,37 @@ def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sv def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW]>; def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; } + + +// SVE2 - Optional + +let ArchGuard = "defined(__ARM_FEATURE_SVE2_AES)" in { +def SVAESD : SInst<"svaesd[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aesd", [IsOverloadNone]>; +def SVAESIMC : SInst<"svaesimc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesimc", [IsOverloadNone]>; +def SVAESE : SInst<"svaese[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_aese", [IsOverloadNone]>; +def SVAESMC : SInst<"svaesmc[_{d}]", "dd", "Uc", MergeNone, "aarch64_sve_aesmc", [IsOverloadNone]>; + +def SVPMULLB_PAIR_U64 : SInst<"svpmullb_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullb_pair">; +def SVPMULLB_PAIR_N_U64 : SInst<"svpmullb_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullb_pair">; + +def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, "aarch64_sve_pmullt_pair">; +def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair">; +} + +let ArchGuard = "defined(__ARM_FEATURE_SVE2_SHA3)" in { +def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone]>; +} + +let ArchGuard = "defined(__ARM_FEATURE_SVE2_SM4)" in { +def SVSM4E: SInst<"svsm4e[_{d}]","ddd", "Ui", MergeNone, "aarch64_sve_sm4e", [IsOverloadNone]>; +def SVSM4EKEY : SInst<"svsm4ekey[_{d}]", "ddd", "Ui", MergeNone, "aarch64_sve_sm4ekey", [IsOverloadNone]>; +} + +let ArchGuard = "__ARM_FEATURE_SVE2_BITPERM" in { +def SVBDEP : SInst<"svbdep[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x">; +def SVBDEP_N : SInst<"svbdep[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bdep_x">; +def SVBEXT : SInst<"svbext[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x">; +def SVBEXT_N : SInst<"svbext[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bext_x">; +def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; +def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesd.c new file mode 100644 index ..d037f452ce76 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_aesd.c @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2_AES -triple aarch64-none-linux-gnu -target-feature +sve2-aes -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2_AES -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +
[clang] 96a581d - [SveEmitter] Add builtins for SVE2 svtbx (extended table lookup)
Author: Sander de Smalen Date: 2020-05-07T16:15:57+01:00 New Revision: 96a581d0f02441320b68d71fac47d51a68c8b7d0 URL: https://github.com/llvm/llvm-project/commit/96a581d0f02441320b68d71fac47d51a68c8b7d0 DIFF: https://github.com/llvm/llvm-project/commit/96a581d0f02441320b68d71fac47d51a68c8b7d0.diff LOG: [SveEmitter] Add builtins for SVE2 svtbx (extended table lookup) This patch adds builtins for: - svtbx Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx.c Modified: clang/include/clang/Basic/arm_sve.td Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 6615ef677127..97668dfa162d 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1774,6 +1774,12 @@ def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sv def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; } + +// SVE2 - Extended table lookup/permute +let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { +def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; +} + // SVE2 - Optional diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx.c new file mode 100644 index ..78cc1b016417 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx.c @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svint8_t test_svtbx_s8(svint8_t fallback, svint8_t data, svuint8_t indices) +{ + // CHECK-LABEL: test_svtbx_s8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv16i8( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_s8'}} + return SVE_ACLE_FUNC(svtbx,_s8,,)(fallback, data, indices); +} + +svint16_t test_svtbx_s16(svint16_t fallback, svint16_t data, svuint16_t indices) +{ + // CHECK-LABEL: test_svtbx_s16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv8i16( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_s16'}} + return SVE_ACLE_FUNC(svtbx,_s16,,)(fallback, data, indices); +} + +svint32_t test_svtbx_s32(svint32_t fallback, svint32_t data, svuint32_t indices) +{ + // CHECK-LABEL: test_svtbx_s32 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv4i32( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_s32'}} + return SVE_ACLE_FUNC(svtbx,_s32,,)(fallback, data, indices); +} + +svint64_t test_svtbx_s64(svint64_t fallback, svint64_t data, svuint64_t indices) +{ + // CHECK-LABEL: test_svtbx_s64 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv2i64( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_s64'}} + return SVE_ACLE_FUNC(svtbx,_s64,,)(fallback, data, indices); +} + +svuint8_t test_svtbx_u8(svuint8_t fallback, svuint8_t data, svuint8_t indices) +{ + // CHECK-LABEL: test_svtbx_u8 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv16i8( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declar
[clang] 4cad975 - [SveEmitter] Add builtins for svmovlb and svmovlt
Author: Sander de Smalen Date: 2020-05-11T09:41:58+01:00 New Revision: 4cad97595f40f7a5bda25f4aa107cbbce05bd394 URL: https://github.com/llvm/llvm-project/commit/4cad97595f40f7a5bda25f4aa107cbbce05bd394 DIFF: https://github.com/llvm/llvm-project/commit/4cad97595f40f7a5bda25f4aa107cbbce05bd394.diff LOG: [SveEmitter] Add builtins for svmovlb and svmovlt These builtins are expanded in CGBuiltin to use intrinsics for (signed/unsigned) shift left long top/bottom. Reviewers: efriedma, SjoerdMeijer Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D79579 Added: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_movlb.c clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_movlt.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 97668dfa162d..b827601d56c1 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1481,6 +1481,11 @@ def SVSHLLB_U_N : SInst<"svshllb[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_ def SVSHLLT_S_N : SInst<"svshllt[_n_{d}]", "dhi", "sil",MergeNone, "aarch64_sve_sshllt", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; def SVSHLLT_U_N : SInst<"svshllt[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllt", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil",MergeNone>; +def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone>; +def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil",MergeNone>; +def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone>; + def SVMLALB_S_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVMLALB_U_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVMLALT_S_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index dbe8826454dc..c64fde719445 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7824,6 +7824,13 @@ Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, return EmitSVEReinterpret(Call, Ty); } +Value *CodeGenFunction::EmitSVEMovl(SVETypeFlags TypeFlags, +ArrayRef Ops, unsigned BuiltinID) { + llvm::Type *OverloadedTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); + return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); +} + Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID) { @@ -8070,6 +8077,26 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); } + case SVE::BI__builtin_sve_svmovlb_u16: + case SVE::BI__builtin_sve_svmovlb_u32: + case SVE::BI__builtin_sve_svmovlb_u64: +return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb); + + case SVE::BI__builtin_sve_svmovlb_s16: + case SVE::BI__builtin_sve_svmovlb_s32: + case SVE::BI__builtin_sve_svmovlb_s64: +return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb); + + case SVE::BI__builtin_sve_svmovlt_u16: + case SVE::BI__builtin_sve_svmovlt_u32: + case SVE::BI__builtin_sve_svmovlt_u64: +return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt); + + case SVE::BI__builtin_sve_svmovlt_s16: + case SVE::BI__builtin_sve_svmovlt_s32: + case SVE::BI__builtin_sve_svmovlt_s64: +return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt); + case SVE::BI__builtin_sve_svpmullt_u16: case SVE::BI__builtin_sve_svpmullt_u64: case SVE::BI__builtin_sve_svpmullt_n_u16: diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 06898f3232f4..61b51118212c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3924,6 +3924,9 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned BuiltinID); + llvm::Value *EmitSVEMovl(SVETypeFlags TypeFlags, + llvm::ArrayRef Ops, + unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); llvm::Value *EmitSVEGatherLoad(SVETypeFlags T
[clang] d6936be - [SveEmitter] Add builtins for svdup and svindex
Author: Sander de Smalen Date: 2020-05-12T11:02:32+01:00 New Revision: d6936be2ef8ce5d5d85b8a6cdd1477cd79688c3a URL: https://github.com/llvm/llvm-project/commit/d6936be2ef8ce5d5d85b8a6cdd1477cd79688c3a DIFF: https://github.com/llvm/llvm-project/commit/d6936be2ef8ce5d5d85b8a6cdd1477cd79688c3a.diff LOG: [SveEmitter] Add builtins for svdup and svindex Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D79357 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c Modified: clang/include/clang/Basic/arm_sve.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CodeGenFunction.h Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b827601d56c1..e8e05902102a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -627,6 +627,13 @@ def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "d", "sUsh", MergeNone>; def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "d", "iUif", MergeNone>; def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; +def SVDUP : SInst<"svdup[_n]_{d}", "ds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dup_x">; +def SVDUP_M : SInst<"svdup[_n]_{d}", "ddPs", "csilUcUsUiUlhfd", MergeOp1, "aarch64_sve_dup">; +def SVDUP_X : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeAnyExp, "aarch64_sve_dup">; +def SVDUP_Z : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeZeroExp, "aarch64_sve_dup">; + +def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl",MergeNone, "aarch64_sve_index">; + // Integer arithmetic multiclass SInstZPZ flags=[]> { @@ -1061,7 +1068,11 @@ def SVCLASTA_N : SInst<"svclasta[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNo def SVCLASTB : SInst<"svclastb[_{d}]","dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb">; def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">; def SVCOMPACT: SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; -// SVDUP_LANE(to land in D78750) +// Note: svdup_lane is implemented using the intrinsic for TBL to represent a +// splat of any possible lane. It is upto LLVM to pick a more efficient +// instruction such as DUP (indexed) if the lane index fits the range of the +// instruction's immediate. +def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; def SVEXT: SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; @@ -1104,6 +1115,7 @@ def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "P", "Pc", MergeN def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "P", "Ps", MergeNone>; def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "P", "Pi", MergeNone>; def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone>; +def SVDUP_N_B : SInst<"svdup[_n]_{d}", "Ps", "PcPsPiPl", MergeNone>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 541dac7b7580..47620c13aed6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7909,12 +7909,15 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. -Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { - auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, -getSVEVectorForElementType(Scalar->getType())); +Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { + auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty); return Builder.CreateCall(F, Scalar); } +Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { + return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType())); +} + Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) { // FIXME: For big endian this needs an additional REV, or needs a separate // intrinsic that is code-generated as a no-op, because the LLVM bitcast @@ -8109,6 +8112,17 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svpmullb_n_u64: return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair); + case SVE::BI__builtin_sve_svdup_n_b8: + case SVE::BI__builtin_sve_svdup_n_b16: + case SVE::BI__builtin_sve_svdup_n_b32: + case SVE::BI__builtin_sve_svdup_n_b64: { +
[llvm] [compiler-rt] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
https://github.com/sdesmalen-arm edited https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
https://github.com/sdesmalen-arm approved this pull request. LGTM (with nit addressed), but please check with @ilinpv on the FMV priorities before merging the patch. https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64 +; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64 + +target triple = "aarch64-unknown-linux-gnu" + +define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { +; FA64-LABEL: mla8xi8: +; FA64: // %bb.0: +; FA64-NEXT:mla v2.8b, v0.8b, v1.8b +; FA64-NEXT:fmov d0, d2 +; FA64-NEXT:ret +; +; NO-FA64-LABEL: mla8xi8: +; NO-FA64: // %bb.0: +; NO-FA64-NEXT:ptrue p0.b, vl8 +; NO-FA64-NEXT:// kill: def $d0 killed $d0 def $z0 +; NO-FA64-NEXT:// kill: def $d2 killed $d2 def $z2 +; NO-FA64-NEXT:// kill: def $d1 killed $d1 def $z1 +; NO-FA64-NEXT:mad z0.b, p0/m, z1.b, z2.b +; NO-FA64-NEXT:// kill: def $d0 killed $d0 killed $z0 +; NO-FA64-NEXT:ret + %tmp1 = mul <8 x i8> %A, %B; sdesmalen-arm wrote: nit: odd indentation? https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -1980,3 +1980,12 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } + + +// SME2 + +let TargetGuard = "sme2" in { sdesmalen-arm wrote: Can you add a comment here describing why we add SME2 intrinsics to the arm_sve.td file? https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -1180,6 +1194,32 @@ void SVEEmitter::createIntrinsic( } } +void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, +SVEEmitter &Emitter, +ACLEKind Kind) { + SmallVector, 128> Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) +createIntrinsic(R, Defs); + + // Sort intrinsics in header file by following order/priority: + // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) + // - Class (is intrinsic overloaded or not) + // - Intrinsic name + std::stable_sort( + Defs.begin(), Defs.end(), [](const std::unique_ptr &A, + const std::unique_ptr &B) { +auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName()); sdesmalen-arm wrote: 80char formatting. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -0,0 +1,1170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Single-Multi +// + +// x2 +// CHECK-LABEL: @test_svadd_write_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s32j11svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u32j12svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s64j11svint64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -354,6 +356,9 @@ class SVEEmitter { /// Emit arm_sve.h. void createHeader(raw_ostream &o); + // Emits core intrinsics in both arm_sme.h and arm_sve.h + void createCoreHeaderIntrinsics(raw_ostream &o, SVEEmitter &Emitter, ACLEKind Kind); sdesmalen-arm wrote: 80char limit. Can you clang-format this patch? https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
https://github.com/sdesmalen-arm approved this pull request. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[mlir] [clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4825,6 +4827,113 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. +// Case 1: If the vector number (vecnum) is an immediate in range, it gets +// folded into the instruction +//ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11] +// Case 2: If the vecnum is not an immediate, then it is used to modify the base +// and tile slice registers +//ldr(%tileslice, %ptr, %vecnum) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 0], [%ptr2, 0] +// Case 3: If the vecnum is an immediate out of range, then the same is done as +// case 2, but the base and slice registers are modified by the greatest +// multiple of 15 lower than the vecnum and the remainder is folded into the +// instruction. This means that successive loads and stores that are offset from +// each other can share the same base and slice register updates. +//ldr(%tileslice, %ptr, 22) +//ldr(%tileslice, %ptr, 23) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * 15 +//%tileslice2 = %tileslice + 15 +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 4: If the vecnum is an add of an immediate, then the non-immediate +// operand and the immediate can be folded into the instruction, like case 2. +//ldr(%tileslice, %ptr, %vecnum + 7) +//ldr(%tileslice, %ptr, %vecnum + 8) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 5: The vecnum being an add of an immediate out of range is also handled, +// in which case the same remainder logic as case 3 is used. +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + int Addend = 0; + + // If the vnum is an add, we can fold that add into the instruction if the + // operand is an immediate. The range check is performed below. + if (VecNum.getOpcode() == ISD::ADD) { +if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) { + Addend = ImmNode->getSExtValue(); + VecNum = VecNum.getOperand(0); +} + } + + SDValue Remainder = DAG.getTargetConstant(Addend, DL, MVT::i32); + + // true if the base and slice registers need to be modified + bool NeedsAdd = true; + auto ImmNode = dyn_cast(VecNum); + if (ImmNode || Addend != 0) { +int Imm = ImmNode ? ImmNode->getSExtValue() + Addend : Addend; +Remainder = DAG.getTargetConstant(Imm % 16, DL, MVT::i32); +if (Imm >= 0 && Imm <= 15) { + // If vnum is an immediate in range then we don't need to modify the tile + // slice and base register. We could also get here because Addend != 0 but + // vecnum is not an immediate, in which case we still want the base and + // slice register to be modified + NeedsAdd = !ImmNode; sdesmalen-arm wrote: Maybe it's me, but I find this logic a little tricky to follow. Specifically here that the value for NeedsAdd depends on previous control flow, which depends on whether ImmNode is defined. It might be a bit simpler to follow if you progressively break down VecNum in two subsequent steps. First break it down into: * A variable part (e.g. for `i + 17` that would be `i`) * A constant (e.g. for `i + 17` that would be `17`) Second to break down `17` into: * A base constant (for `17` that would be `15`) * An immediate (for `17` that would be `2`) When you then fold the base constant into the variable part, you can avoid the need for `NeedsAdd` because it can be inferred from whether there is a variable part, e.g.: ``` // First split VecNum into a "Variable" and "Constant" part. int32_t ConstAddend = 0; SDValue VariableAddend = VecNum; if (VecNum.getOpcode() == ISD::ADD && isa(VecNum.getOperand(1))) { ConstAddend = cast(VecNum.getOperand(1))->getSExtValue(); VariableAddend = VecNum.getOperand(0); } else if (auto ImmNode = dyn_cast(VecNum)) { ConstAddend = ImmNode->getSExtValue(); VariableAddend = SDValue(); } // Further try to split the constant into an immediate. int32_t ImmAddend = ConstAddend % 16; if (int32_t C = (ConstAddend - ImmAddend)) { SDValue CVal = DAG.getConstant(C, DL, MVT::i32); VariableAddend = VariableAddend ? DAG.getNode(ISD::ADD, DL, MVT::i32, {VariableAddend, CVal}) : CVal; } if (VariableAddend) { // Get the vector length that will be multiplied by VariableAddend auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, ... } ``` https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/
[llvm] [clang] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4825,6 +4827,113 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. +// Case 1: If the vector number (vecnum) is an immediate in range, it gets +// folded into the instruction +//ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11] +// Case 2: If the vecnum is not an immediate, then it is used to modify the base +// and tile slice registers +//ldr(%tileslice, %ptr, %vecnum) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 0], [%ptr2, 0] +// Case 3: If the vecnum is an immediate out of range, then the same is done as +// case 2, but the base and slice registers are modified by the greatest +// multiple of 15 lower than the vecnum and the remainder is folded into the +// instruction. This means that successive loads and stores that are offset from +// each other can share the same base and slice register updates. +//ldr(%tileslice, %ptr, 22) +//ldr(%tileslice, %ptr, 23) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * 15 +//%tileslice2 = %tileslice + 15 +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 4: If the vecnum is an add of an immediate, then the non-immediate +// operand and the immediate can be folded into the instruction, like case 2. +//ldr(%tileslice, %ptr, %vecnum + 7) +//ldr(%tileslice, %ptr, %vecnum + 8) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 5: The vecnum being an add of an immediate out of range is also handled, +// in which case the same remainder logic as case 3 is used. +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + int Addend = 0; + + // If the vnum is an add, we can fold that add into the instruction if the + // operand is an immediate. The range check is performed below. + if (VecNum.getOpcode() == ISD::ADD) { +if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) { + Addend = ImmNode->getSExtValue(); + VecNum = VecNum.getOperand(0); +} + } + + SDValue Remainder = DAG.getTargetConstant(Addend, DL, MVT::i32); + + // true if the base and slice registers need to be modified + bool NeedsAdd = true; + auto ImmNode = dyn_cast(VecNum); + if (ImmNode || Addend != 0) { +int Imm = ImmNode ? ImmNode->getSExtValue() + Addend : Addend; sdesmalen-arm wrote: In this expression either (Addend is 0 and ImmNode is non-zero) or (ImmNode is zero and Addend is non-zero). I don't think there is an instance where they can both be non-zero. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add _x2/_x4 svqrshr builtins. (PR #74100)
https://github.com/sdesmalen-arm approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/74100 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Remove IsPreservesZA from ldr_zt builtin (PR #74303)
https://github.com/sdesmalen-arm approved this pull request. https://github.com/llvm/llvm-project/pull/74303 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -0,0 +1,233 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_u16 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT:ret [[TMP8]] +// +// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT:ret [[TMP8]] +// +svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti4_lane_zt_u16_x4(0, zn, 0); sdesmalen-arm wrote: ```suggestion return svluti4_lane_zt_u16_x4(0, zn, 1); ``` Rather than using `0` as the immediate, can you use the maximum range value in this and other tests? https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/sdesmalen-arm edited https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/sdesmalen-arm commented: Looks mostly fine, just have a few nits. https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -321,9 +321,18 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} // // Zero ZT0 // +let TargetGuard = "sme2" in { sdesmalen-arm wrote: nit: I'm not really sure why this is formatted like: ``` let TargetGuard = "sme2" in { A } let TargetGuard = "sme2" in { B } let TargetGuard = "sme2" in { C } ``` Rather than: ``` let TargetGuard = "sme2" in { A B C } ``` https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -0,0 +1,280 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s sdesmalen-arm wrote: Do we actually need `-target-feature +sve` here? https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -1864,6 +1866,35 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); } +void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, +unsigned NumOutVecs, +unsigned Opc, +uint32_t MaxImm) { + if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) +if (Imm->getZExtValue() > MaxImm) + return; + + SDValue ZtValue; + if (!ImmToReg(Node->getOperand(2), ZtValue)) +return; + SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + + SDNode *Instruction = + CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops); + SDValue SuperReg = SDValue(Instruction, 0); + + for (unsigned i = 0; i < NumOutVecs; ++i) sdesmalen-arm wrote: nit: variable names should start with upper-case ```suggestion for (unsigned I = 0; I < NumOutVecs; ++I) ``` https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -6,20 +6,20 @@ #include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) { +void test_sme(svbool_t pg, void *ptr) __arm_streaming { svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) { +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming { svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) { +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming { svst1_ver_za16(0, 0, pg, ptr); } -void undefined(svbool_t pg, void *ptr) { - svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}} +void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}} sdesmalen-arm wrote: Ah, I see that the actual message changed too. It's now complaining about `__arm_streaming` on a non-SME function, whereas before it was testing that the builtin was predicated with the correct attribute. This is already tested in `aarch64-sme-func-attrs-without-target-feature.cpp`. I think instead you want to compile this test with `__attribute__((target("+sme")))` but without the `__arm_streaming` to ensure you get a diagnostic on the builtin call that the behaviour is undefined when the (parent) function is not a streaming function. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -321,9 +321,18 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} // // Zero ZT0 // +let TargetGuard = "sme2" in { sdesmalen-arm wrote: I personally don't see much value in having these separate blocks, but I'm not going hold up the patch over it. We may want to do a cleanup after all builtins are implemented to remove the unnecessary target guards. https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/sdesmalen-arm approved this pull request. https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +// CHECK-LABEL: @test_svptrue_c8_attr( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svptrue_c8_attrv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c8_attr(void) __arm_streaming { sdesmalen-arm wrote: If you make the intrinsic `IsStreamingCompatible`, you should be able to add the RUN lines I think? https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -1981,6 +1979,11 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreaming], []>; sdesmalen-arm wrote: ```suggestion def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>; ``` As pointed out [here](https://github.com/llvm/llvm-project/pull/71927/files#r1395522522), this will need an attribute like `IsStreamingOrHasSVE2p1`, for which the compiler will give a diagnostic when compiling for +sme2 and the function is not in streaming mode. To move this patch forward without that diagnostic, you could make this `IsStreamingCompatible` for now. Can you also add a `FIXME` to say we need to change this later? https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3183,6 +3140,114 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return HasError; } +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmStreaming; + if (const auto *T = FD->getType()->getAs()) { +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + ArmStreamingType FnType = getArmStreamingFnType(FD); + + if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming"; + } + + if (FnType == ArmStreamingCompatible && + BuiltinType != ArmStreamingCompatible) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming compatible"; +return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr()) +return true; + if (const auto *T = FD->getType()->getAs()) +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: +return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; + +switch (BuiltinID) { +default: + break; +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" +#undef GET_SME_STREAMING_ATTRS +} + +if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + +if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_za_builtin_no_za_state) + << TheCall->getSourceRange(); + } + + // Range check SME intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: +return false; +#define GET_SME_IMMEDIATE_CHECK +#include "clang/Basic/arm_sme_sema_rangechecks.inc" +#undef GET_SME_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + +bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { sdesmalen-arm wrote: Can you commit the change to move the immediate checks to `ParseSVEImmChecks` as an NFC patch and rebase this one? https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3172,6 +3117,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) HasError = true; break; +case SVETypeFlags::ImmCheck1_1: sdesmalen-arm wrote: Why have these case statements moved? https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics (PR #73305)
https://github.com/sdesmalen-arm approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/73305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (PR #73304)
https://github.com/sdesmalen-arm approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/73304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -15,7 +17,7 @@ // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() // CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] // -svcount_t test_svptrue_c8(void) { +svcount_t test_svptrue_c8(void) __arm_streaming_compatible { sdesmalen-arm wrote: Can you rewrite the tests in such a way that we do test for the two different configurations: * For SVE2p1: no attribute * For SME2: `__arm_streaming` You can do something with macros, so that you replace `__arm_streaming_compatible` here with `ATTR` and then do something like: ``` #ifndef TEST_SME #define ATTR #else #define ATTR __arm_streaming #endif ``` and then for the `+sme2` RUN line you'd add `-DTEST_SME`. That way, we're sure to test for the correct mode when we update the builtin to use a different attribute than `__arm_streaming_compatible`. https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/sdesmalen-arm approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3168,11 +3168,70 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, << TheCall->getSourceRange() << "streaming compatible"; return; } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} sdesmalen-arm wrote: I think this functionality is big enough to warrant it own PR, such that we have: * One PR for testing the compatibility of streaming-mode for both SVE and SME. * One PR for testing that the calling function of a ZA-using builtin has ZA state. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -1375,6 +1381,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "#define __aio static __inline__ __attribute__((__always_inline__, " "__nodebug__, __overloadable__))\n\n"; + OS << "#ifdef __ARM_FEATURE_SME\n"; + OS << "#define __asc __attribute__((arm_streaming_compatible))\n"; sdesmalen-arm wrote: A few things: * The `__attribute__((arm_streaming_compatible))` syntax is not supported. * `__asc` is not used/emitted anywhere * I'm not sure adding the attribute to the function prototype in the header file adds much value anyway. Better to just omit it. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible { // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); } + +void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} + return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr); sdesmalen-arm wrote: nit: returning a `void` value from a `void` function doesn't seem right. Also, is `incompat_sme_norm` testing anything that `incompat_sme_sm` isn't testing? Or should this be a test where we'd call a non-streaming SVE/SME builtin from a streaming-function? https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -500,6 +506,12 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmSmeRangeChecks: EmitSmeRangeChecks(Records, OS); break; + case GenArmSmeStreamingAttrs: sdesmalen-arm wrote: We also need to do this for SVE (you seem to have added the logic for it in SveEmitter.cpp, but are not using it otherwise). https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3058,6 +3058,11 @@ bool Sema::ParseSVEImmChecks( if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7)) HasError = true; break; +case SVETypeFlags::ImmCheck2_4_Mul2: sdesmalen-arm wrote: This shouldn't have moved. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -6,20 +6,21 @@ #include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) { +void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) { +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) { +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za16(0, 0, pg, ptr); } -void undefined(svbool_t pg, void *ptr) { - svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}} +__attribute__((target("+sme"))) +void undefined(svbool_t pg, void *ptr) __arm_shared_za { + svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}} sdesmalen-arm wrote: This test should not have changed. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits