[clang] [compiler-rt] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -1133,6 +1133,9 @@ typedef struct __ifunc_arg_t { #ifndef HWCAP2_SME_F64F64 #define HWCAP2_SME_F64F64 (1 << 25) #endif +#ifndef HWCAP2_SME_FA64 +#define HWCAP2_SME_FA64 (1 << 26) MDevereau wrote: This comes from here https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h so it is in fact 30. https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -491,13 +491,15 @@ bool AArch64Subtarget::isStreamingCompatible() const { } bool AArch64Subtarget::isNeonAvailable() const { - return hasNEON() && !isStreaming() && !isStreamingCompatible(); + if (hasSMEFA64()) +return true; + return (hasNEON() && !isStreaming() && !isStreamingCompatible()); MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -491,13 +491,15 @@ bool AArch64Subtarget::isStreamingCompatible() const { } bool AArch64Subtarget::isNeonAvailable() const { - return hasNEON() && !isStreaming() && !isStreamingCompatible(); + if (hasSMEFA64()) +return true; + return (hasNEON() && !isStreaming() && !isStreamingCompatible()); } -bool AArch64Subtarget::isSVEAvailable() const{ - // FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet - // as we don't yet support the feature in LLVM. - return hasSVE() && !isStreaming() && !isStreamingCompatible(); +bool AArch64Subtarget::isSVEAvailable() const { + if (hasSMEFA64()) MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -508,6 +508,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; +def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", + "Enable the full A64 instruction set in SVE streaming mode (FEAT_SME_FA64)", []>; MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -162,6 +163,7 @@ enum ArchExtKind : unsigned { AEK_FPMR = 58, // FEAT_FPMR AEK_FP8 = 59, // FEAT_FP8 AEK_FAMINMAX = 60, // FEAT_FAMINMAX + AEK_SMEFA64 = 61, // FEAT_SMEFA64 MDevereau wrote: I've moved it to be AEK_SME = 38, // FEAT_SME AEK_SMEF64F64 = 39, // FEAT_SME_F64F64 AEK_SMEI16I64 = 40, // FEAT_SME_I16I64 AEK_SMEFA64 = 41, // FEAT_SME_FA64 https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64 +; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64 + + +target triple = "aarch64-unknown-linux-gnu" + +define half @fadda_v4f16(half %start, <4 x half> %a) { MDevereau wrote: Added test `llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll` https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/71795 Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) Patch by: Kerry McLaughlin >From 9846bc9efd79e6e3c2662ea42367c102df88799d Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 10:50:05 + Subject: [PATCH] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) --- clang/include/clang/Basic/arm_sme.td | 5 ++ clang/include/clang/Basic/arm_sve.td | 9 .../acle_sme2_ldr_str_zt.c| 51 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 21 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 11 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fe3de56ce3298c5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,8 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..f0b3747898d4145 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_ def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; } +// // +// // Spill and fill of ZT0 +// // + +// let TargetGuard = "sme2" in { +// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// } + // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71795 >From 9846bc9efd79e6e3c2662ea42367c102df88799d Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 10:50:05 + Subject: [PATCH 1/2] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) --- clang/include/clang/Basic/arm_sme.td | 5 ++ clang/include/clang/Basic/arm_sve.td | 9 .../acle_sme2_ldr_str_zt.c| 51 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 21 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 11 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fe3de56ce3298c5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,8 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..f0b3747898d4145 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_ def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; } +// // +// // Spill and fill of ZT0 +// // + +// let TargetGuard = "sme2" in { +// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// } + // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13tes
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71795 >From 9846bc9efd79e6e3c2662ea42367c102df88799d Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 10:50:05 + Subject: [PATCH 1/3] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) --- clang/include/clang/Basic/arm_sme.td | 5 ++ clang/include/clang/Basic/arm_sve.td | 9 .../acle_sme2_ldr_str_zt.c| 51 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 21 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 11 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fe3de56ce3298c5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,8 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..f0b3747898d4145 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_ def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; } +// // +// // Spill and fill of ZT0 +// // + +// let TargetGuard = "sme2" in { +// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// } + // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13tes
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71795 >From 9846bc9efd79e6e3c2662ea42367c102df88799d Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 10:50:05 + Subject: [PATCH 1/4] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) --- clang/include/clang/Basic/arm_sme.td | 5 ++ clang/include/clang/Basic/arm_sve.td | 9 .../acle_sme2_ldr_str_zt.c| 51 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 21 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 11 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fe3de56ce3298c5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,8 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..f0b3747898d4145 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_ def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; } +// // +// // Spill and fill of ZT0 +// // + +// let TargetGuard = "sme2" in { +// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// } + // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13tes
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71795 >From 9846bc9efd79e6e3c2662ea42367c102df88799d Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 10:50:05 + Subject: [PATCH 1/4] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr) --- clang/include/clang/Basic/arm_sme.td | 5 ++ clang/include/clang/Basic/arm_sve.td | 9 .../acle_sme2_ldr_str_zt.c| 51 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 21 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 11 files changed, 153 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fe3de56ce3298c5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,8 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..f0b3747898d4145 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_ def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; } +// // +// // Spill and fill of ZT0 +// // + +// let TargetGuard = "sme2" in { +// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +// } + // SVE2 - Extended table lookup/permute let TargetGuard = "sve2" in { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13tes
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
MDevereau wrote: Well spotted - I forgot to delete these. Removed https://github.com/llvm/llvm-project/pull/71795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/71953 Extend pfalse and ptrue builtins with svcount_t return types to be enabled for sve2p1 and sme2 >From 79c05d1f8554d5a4469b9501b61b47863f086ffa Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Fri, 10 Nov 2023 13:40:31 + Subject: [PATCH] Add SME2 builtins for pfalse and ptrue Extend pfalse and ptrue builtins with svcount_t return types to be enabled for sve2p1 and sme2 --- clang/include/clang/Basic/arm_sve.td | 7 ++-- clang/lib/Sema/Sema.cpp | 3 +- .../acle_sme2_ptrue_pfalse_attr.c | 34 +++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..eccf87ac94abb3f 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1861,8 +1861,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; -def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; @@ -1981,6 +1979,11 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; +} + // SME2 diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index d7d8d2eaa37e1d6..e4f6a291a869c27 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2084,7 +2084,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { llvm::StringMap CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); if (!Builtin::evaluateRequiredTargetFeatures( - "sve", CallerFeatureMap)) + "sve", CallerFeatureMap) && !Builtin::evaluateRequiredTargetFeatures( + "sme", CallerFeatureMap)) Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; } }; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c new file mode 100644 index 000..90d9434d87cf4a0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c @@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +// CHECK-LABEL: @test_svptrue_c8_attr( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svptrue_c8_attrv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c8_attr(void) __arm_streaming { + return svptrue_c8(); +} + +// CHECK-LABEL: @test_svptrue_c( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( zeroinitializer) +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svptrue_cv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( zeroinitializer) +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c(void) __arm_stre
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { MDevereau wrote: ```suggestion let TargetGuard = "sve2p1|sme2" in { ``` https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/71795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/72274 None >From e03469125d082973451da609ad5edfd3065d94a5 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 16:08:57 + Subject: [PATCH] Add SME2 builtins for zero { zt0 } --- clang/include/clang/Basic/arm_sme.td | 5 +++ .../acle_sme2_zero_zt.c | 32 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +++ .../Target/AArch64/AArch64ISelLowering.cpp| 32 --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +++ .../AArch64/sme2-intrinsics-zero-zt.ll| 13 8 files changed, 88 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index fb3f54ecff95080..189a0cafdc3370e 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -305,4 +305,9 @@ defm SVSUB : ZAAddSub<"sub">; let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + +// +// Zero ZT0 +// + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c new file mode 100644 index 000..4ea26119301cab2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1) A1 +#else +#define SVE_ACLE_FUNC(A1) A1 +#endif + +// CHECK-LABEL: @test_svzero_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_ztv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CPP-CHECK-NEXT:ret void +// +void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svzero_zt(0); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9164604f7d78cbc..2d6065edbd3554e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3457,4 +3457,8 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_ldr_zt : SME_LDR_STR_Intrinsic; def int_aarch64_sme_str_zt : SME_LDR_STR_Intrinsic; + // + // Zero ZT0 + // + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrWriteMem]>; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 289c2f4409f9532..7f477a524f04709 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2746,19 +2746,25 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, - MachineBasicBlo
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72274 >From e6240fa09aca504a9340ff7bfd88ad449d5775da Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 16:08:57 + Subject: [PATCH] Add SME2 builtins for zero { zt0 } Patch by: Kerry McLaughlin kerry.mclaugh...@arm.com --- clang/include/clang/Basic/arm_sme.td | 5 +++ .../acle_sme2_zero_zt.c | 32 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +++ .../Target/AArch64/AArch64ISelLowering.cpp| 32 --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +++ .../AArch64/sme2-intrinsics-zero-zt.ll| 13 8 files changed, 88 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index fb3f54ecff95080..189a0cafdc3370e 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -305,4 +305,9 @@ defm SVSUB : ZAAddSub<"sub">; let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + +// +// Zero ZT0 +// + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c new file mode 100644 index 000..4ea26119301cab2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1) A1 +#else +#define SVE_ACLE_FUNC(A1) A1 +#endif + +// CHECK-LABEL: @test_svzero_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_ztv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CPP-CHECK-NEXT:ret void +// +void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svzero_zt(0); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9164604f7d78cbc..2d6065edbd3554e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3457,4 +3457,8 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_ldr_zt : SME_LDR_STR_Intrinsic; def int_aarch64_sme_str_zt : SME_LDR_STR_Intrinsic; + // + // Zero ZT0 + // + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrWriteMem]>; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 289c2f4409f9532..7f477a524f04709 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2746,19 +2746,25 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, -
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/72274 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -321,9 +321,18 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} // // Zero ZT0 // +let TargetGuard = "sme2" in { MDevereau wrote: I think these were added in individually which might explain it. The blocks of instructions are separated by the comments, and maybe it makes it easier to see what individual instructions need once the list of defs grow in size? I've no objections to your suggestion, but the rest of this file doesn't really follow it I think? https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -0,0 +1,280 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s MDevereau wrote: I'm not sure if this is a diagnostic problem but I get errors about requiring sve when using the C tuple types. https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Remove IsPreservesZA from ldr_zt builtin (PR #74303)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/74303 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add multi-vector builtins for cvt (PR #74450)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/74450 Adds builtins for: - FCVT - BFCVT - FCVTZS - FCVTZU - SCVTF - UCVTF - BFCVTN - FCVTN - SQCVT - SQCVTU - UQCVT - SQCVTN - SQCVTUN - UQCVTN See https://github.com/ARM-software/acle/pull/217 >From 07244773d0733906fd485b1cf997f941009ecbec Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 30 Nov 2023 15:10:10 + Subject: [PATCH] [AArch64][SME2] Add multi-vector builtins for cvt Adds builtins for: - FCVT - BFCVT - FCVTZS - FCVTZU - SCVTF - UCVTF - BFCVTN - FCVTN - SQCVT - SQCVTU - UQCVT - SQCVTN - SQCVTUN - UQCVTN See https://github.com/ARM-software/acle/pull/217 --- clang/include/clang/Basic/arm_sve.td | 60 +++ .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 499 ++ .../aarch64-sme2-intrinsics/acle_sme2_cvtn.c | 242 + 3 files changed, 801 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtn.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4fcc9327f22fe..a73ac75e3bbc0 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2149,3 +2149,63 @@ let TargetGuard = "sve2p1" in { defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [], ImmCheck1_3>; defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>; } + +// +// Multi-vector convert to/from floating-point. +// +let TargetGuard = "sme2" in { + def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>; + def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>; + + def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_fcvtu_x2", [IsStreaming], []>; + def SVCVT_U32_F32_X2 : SInst<"svcvt_u32[_{d}_x2]", "2.u2.d", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming], []>; + def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_fcvts_x2", [IsStreaming], []>; + def SVCVT_S32_F32_X2 : SInst<"svcvt_s32[_{d}_x2]", "2.x2.d", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming], []>; + + def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_fcvtu_x4", [IsStreaming], []>; + def SVCVT_U32_F32_X4 : SInst<"svcvt_u32[_{d}_x4]", "4.u4.d", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming], []>; + def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_fcvts_x4", [IsStreaming], []>; + def SVCVT_S32_F32_X4 : SInst<"svcvt_s32[_{d}_x4]", "4.x4.d", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming], []>; +} + +// +// Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16 +// +let TargetGuard = "sme2" in { + def SVCVTN_F16_X2 : SInst<"svcvtn_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvtn_x2", [IsStreaming],[]>; + def SVCVTN_BF16_X2 : SInst<"svcvtn_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvtn_x2", [IsOverloadNone, IsStreaming],[]>; +} + +// +// Multi-vector saturating extract narrow +// +let TargetGuard = "sme2" in { + def SVQCVT_S16_S32_X2 : SInst<"svqcvt_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvt_x2", [IsStreaming], []>; + def SVQCVT_U16_U32_X2 : SInst<"svqcvt_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvt_x2", [IsStreaming], []>; + def SVQCVT_U16_S32_X2 : SInst<"svqcvt_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtu_x2", [IsStreaming], []>; + + def SVQCVT_S8_S32_X4 : SInst<"svqcvt_s8[_{d}_x4]", "q4.d", "i", MergeNone, "aarch64_sve_sqcvt_x4", [IsStreaming], []>; + def SVQCVT_U8_U32_X4 : SInst<"svqcvt_u8[_{d}_x4]", "b4.d", "Ui", MergeNone, "aarch64_sve_uqcvt_x4", [IsStreaming], []>; + def SVQCVT_U8_S32_X4 : SInst<"svqcvt_u8[_{d}_x4]", "b4.d", "i", MergeNone, "aarch64_sve_sqcvtu_x4", [IsStreaming], []>; + + def SVQCVT_S16_S64_X4 : SInst<"svqcvt_s16[_{d}_x4]", "q4.d", "l", MergeNone, "aarch64_sve_sqcvt_x4", [IsStreaming], []>; + def SVQCVT_U16_U64_X4 : SInst<"svqcvt_u16[_{d}_x4]", "b4.d", "Ul", MergeNone, "aarch64_sve_uqcvt_x4", [IsStreaming], []>; + def SVQCVT_U16_S64_X4 : SInst<"svqcvt_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtu_x4", [IsStreaming], []>; +} + +// +// Multi-vector saturating extract narrow and interleave +// +let TargetGuard = "sme2" in { + def SVQCVTN_S16_S32_X2 : SInst<"svqcvtn_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvtn_x2", [IsStreamingCompatible], []>; + def SVQCVTN_U16_U32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvtn_x2", [IsStreamingCompatible], []>; + def SVQCVTN_U16_S32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqc
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -0,0 +1,233 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_u16 +// CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT:ret [[TMP8]] +// +// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t +// CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT:ret [[TMP8]] +// +svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti4_lane_zt_u16_x4(0, zn, 0); MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -1864,6 +1866,35 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); } +void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, +unsigned NumOutVecs, +unsigned Opc, +uint32_t MaxImm) { + if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) +if (Imm->getZExtValue() > MaxImm) + return; + + SDValue ZtValue; + if (!ImmToReg(Node->getOperand(2), ZtValue)) +return; + SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + + SDNode *Instruction = + CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops); + SDValue SuperReg = SDValue(Instruction, 0); + + for (unsigned i = 0; i < NumOutVecs; ++i) MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
@@ -0,0 +1,1889 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif MDevereau wrote: This is unused. rm? https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
@@ -0,0 +1,1889 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK MDevereau wrote: I think we can get rid of the `-DSVE_ORDERLOADED_FORMS` RUN lines now? https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang-tools-extra] [clang] [libc] [libcxx] [compiler-rt] [flang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (PR #73304)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73304 >From aaf7270f50c7deaebb34acab636cd1e40e872477 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/4] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 ++ .../acle_sme2_luti2_lane_zt.c | 96 +++ .../acle_sme2_luti4_lane_zt.c | 95 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 30 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 ++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 22 - .../AArch64/sme2-intrinsics-luti2-lane.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane.ll | 35 +++ 9 files changed, 328 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 08d484acb3c10..0d2dd9e95a749 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -336,3 +336,11 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; } + +// +// lookup table expand one register +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; + def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c new file mode 100644 index 0..ebabbfc815c1d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_u8(0, zn, 2); +} + + +// CHECK-LABEL: @test_svluti2_lane_zt_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svint8_t test_svluti2_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_s8(0, zn, 2); +} + +// CHECK-LABEL: @test_svluti2_lane_zt_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics (PR #73305)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/73305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71953 >From cb6e6b5c6fe76a2b4a3bd1d78f4f7cec82d84067 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Fri, 10 Nov 2023 13:40:31 + Subject: [PATCH 1/3] Add SME2 builtins for pfalse and ptrue Extend pfalse and ptrue builtins with svcount_t return types to be enabled for sve2p1 and sme2 --- clang/include/clang/Basic/arm_sve.td | 23 +++-- clang/lib/Sema/Sema.cpp | 3 +- .../acle_sme2_ptrue_pfalse_attr.c | 34 +++ 3 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3f69a3df9e616..44a8a2328081b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1925,8 +1925,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; -def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; @@ -2045,23 +2043,12 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; +} -let TargetGuard = "sve2p1,b16b16" in { -defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; -defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; -defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; -defm SVMAXNM_BF : SInstZPZZ<"svmaxnm","b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u">; -defm SVMINNM_BF : SInstZPZZ<"svminnm","b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u">; -defm SVMAX_BF: SInstZPZZ<"svmax", "b", "aarch64_sve_fmax", "aarch64_sve_fmax_u">; -defm SVMIN_BF: SInstZPZZ<"svmin", "b", "aarch64_sve_fmin", "aarch64_sve_fmin_u">; -defm SVMLA_BF : SInstZPZZZ<"svmla", "b", "aarch64_sve_fmla", "aarch64_sve_fmla_u", []>; -defm SVMLS_BF : SInstZPZZZ<"svmls", "b", "aarch64_sve_fmls", "aarch64_sve_fmls_u", []>; -def SVMLA_LANE_BF : SInst<"svmla_lane[_{d}]", "i", "b", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE_BF : SInst<"svmls_lane[_{d}]", "i", "b", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE_BF : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "", "b", MergeNone, "aarch64_sve_fclamp", [], []>; -} //sve2p1,b16b16 - + // SME2 // SME intrinsics which operate only on vectors and do not require ZA should be added here, diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 9771aaa2f3b03..b9187cf86ebf7 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2085,7 +2085,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { llvm::StringMap CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); if (!Builtin::evaluateRequiredTargetFeatures( - "sve", CallerFeatureMap)) + "sve", CallerFeatureMap) && !Builtin::evaluateRequiredTargetFeatures( + "sme", CallerFeatureMap)) Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; } }; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c new file mode 100644 index 0..90d9434d87cf4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c @@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-li
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -1981,6 +1979,11 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreaming], []>; MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +// CHECK-LABEL: @test_svptrue_c8_attr( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svptrue_c8_attrv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c8_attr(void) __arm_streaming { MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add multi-vector builtins for cvt (PR #74450)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/74450 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71953 >From cb6e6b5c6fe76a2b4a3bd1d78f4f7cec82d84067 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Fri, 10 Nov 2023 13:40:31 + Subject: [PATCH 1/4] Add SME2 builtins for pfalse and ptrue Extend pfalse and ptrue builtins with svcount_t return types to be enabled for sve2p1 and sme2 --- clang/include/clang/Basic/arm_sve.td | 23 +++-- clang/lib/Sema/Sema.cpp | 3 +- .../acle_sme2_ptrue_pfalse_attr.c | 34 +++ 3 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3f69a3df9e616..44a8a2328081b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1925,8 +1925,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; -def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; @@ -2045,23 +2043,12 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; +} -let TargetGuard = "sve2p1,b16b16" in { -defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; -defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; -defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; -defm SVMAXNM_BF : SInstZPZZ<"svmaxnm","b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u">; -defm SVMINNM_BF : SInstZPZZ<"svminnm","b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u">; -defm SVMAX_BF: SInstZPZZ<"svmax", "b", "aarch64_sve_fmax", "aarch64_sve_fmax_u">; -defm SVMIN_BF: SInstZPZZ<"svmin", "b", "aarch64_sve_fmin", "aarch64_sve_fmin_u">; -defm SVMLA_BF : SInstZPZZZ<"svmla", "b", "aarch64_sve_fmla", "aarch64_sve_fmla_u", []>; -defm SVMLS_BF : SInstZPZZZ<"svmls", "b", "aarch64_sve_fmls", "aarch64_sve_fmls_u", []>; -def SVMLA_LANE_BF : SInst<"svmla_lane[_{d}]", "i", "b", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE_BF : SInst<"svmls_lane[_{d}]", "i", "b", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE_BF : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "", "b", MergeNone, "aarch64_sve_fclamp", [], []>; -} //sve2p1,b16b16 - + // SME2 // SME intrinsics which operate only on vectors and do not require ZA should be added here, diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 9771aaa2f3b03..b9187cf86ebf7 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2085,7 +2085,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { llvm::StringMap CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); if (!Builtin::evaluateRequiredTargetFeatures( - "sve", CallerFeatureMap)) + "sve", CallerFeatureMap) && !Builtin::evaluateRequiredTargetFeatures( + "sme", CallerFeatureMap)) Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; } }; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c new file mode 100644 index 0..90d9434d87cf4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c @@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-li
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -15,7 +17,7 @@ // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() // CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] // -svcount_t test_svptrue_c8(void) { +svcount_t test_svptrue_c8(void) __arm_streaming_compatible { MDevereau wrote: I've called the `#if` guard `TEST_SME2`, but done https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[flang] [llvm] [mlir] [libcxx] [clang] [openmp] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (PR #73304)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73304 >From aaf7270f50c7deaebb34acab636cd1e40e872477 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/4] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 ++ .../acle_sme2_luti2_lane_zt.c | 96 +++ .../acle_sme2_luti4_lane_zt.c | 95 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 30 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 ++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 22 - .../AArch64/sme2-intrinsics-luti2-lane.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane.ll | 35 +++ 9 files changed, 328 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 08d484acb3c10..0d2dd9e95a749 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -336,3 +336,11 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; } + +// +// lookup table expand one register +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; + def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c new file mode 100644 index 0..ebabbfc815c1d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_u8(0, zn, 2); +} + + +// CHECK-LABEL: @test_svluti2_lane_zt_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svint8_t test_svluti2_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_s8(0, zn, 2); +} + +// CHECK-LABEL: @test_svluti2_lane_zt_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32
[llvm] [flang] [libcxx] [clang] [openmp] [mlir] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (PR #73304)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/73304 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/71953 >From 79c05d1f8554d5a4469b9501b61b47863f086ffa Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Fri, 10 Nov 2023 13:40:31 + Subject: [PATCH 1/2] Add SME2 builtins for pfalse and ptrue Extend pfalse and ptrue builtins with svcount_t return types to be enabled for sve2p1 and sme2 --- clang/include/clang/Basic/arm_sve.td | 7 ++-- clang/lib/Sema/Sema.cpp | 3 +- .../acle_sme2_ptrue_pfalse_attr.c | 34 +++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..eccf87ac94abb3f 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1861,8 +1861,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; -def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>; @@ -1981,6 +1979,11 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; +} + // SME2 diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index d7d8d2eaa37e1d6..e4f6a291a869c27 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2084,7 +2084,8 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { llvm::StringMap CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); if (!Builtin::evaluateRequiredTargetFeatures( - "sve", CallerFeatureMap)) + "sve", CallerFeatureMap) && !Builtin::evaluateRequiredTargetFeatures( + "sme", CallerFeatureMap)) Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; } }; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c new file mode 100644 index 000..90d9434d87cf4a0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ptrue_pfalse_attr.c @@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +// CHECK-LABEL: @test_svptrue_c8_attr( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svptrue_c8_attrv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c8_attr(void) __arm_streaming { + return svptrue_c8(); +} + +// CHECK-LABEL: @test_svptrue_c( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( zeroinitializer) +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svptrue_cv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( zeroinitializer) +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c(void) __arm_streaming { + return svpfalse_c(); +} >From 00cbdbc4fcc3f523238d5082b3636fab2d1224f6 Mon Sep 17
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +// CHECK-LABEL: @test_svptrue_c8_attr( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svptrue_c8_attrv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c8_attr(void) __arm_streaming { MDevereau wrote: As discussed we might be missing an HasSVE2p1OrSME2 target guard which is currently stopping me from doing this in this patch https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -1981,6 +1979,11 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; MDevereau wrote: Done https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -1363,6 +1367,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) setCPUFeature(FEAT_SME_F64); + if (hwcap2 & HWCAP2_SME_FA64) +setCPUFeature(FEAT_SME_FA64); MDevereau wrote: Do you want me to delete the definition of `HWCAP2_SME_FA64` or are you fine with this being left in for later use? https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
@@ -298,3 +298,48 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// SME2 - MOVA + +// +// 2 and 4 vector-group read/write intrinsics. MDevereau wrote: Opinion: Given you've defined write and read separately you should have separate comments for read and write https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
@@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN:-target-feature +sve2 -target-feature +sme2 -target-feature +sve -fsyntax-only -verify %s MDevereau wrote: Can `-target-feature +sve2` be deleted? https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
@@ -298,3 +298,48 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// SME2 - MOVA + +// +// 2 and 4 vector-group read/write intrinsics. +// + +multiclass WriteHV_VG checks> { + let TargetGuard = "sme2" in { +def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; +def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; +def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; +def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; + } +} + +defm SVWRITE_ZA8 : WriteHV_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; +defm SVWRITE_ZA16 : WriteHV_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; +defm SVWRITE_ZA32 : WriteHV_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : WriteHV_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; + +multiclass ReadHV_VG checks> { + let TargetGuard = "sme2" in { +def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; +def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; +def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; +def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + } +} + +defm SVREAD_ZA8 : ReadHV_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREAD_ZA16 : ReadHV_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; +defm SVREAD_ZA32 : ReadHV_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ReadHV_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; + +// +// Single vector-group read/write intrinsics. +// + +let TargetGuard = "sme2" in { + def SVWRITE_ZA64_VG1x2 : Inst<"svwrite_za64[_{d}]_vg1x2", "vm2", "lUld", MergeNone, "aarch64_sme_write_vg1x2", [IsSharedZA, IsStreaming], []>; + def SVWRITE_ZA64_VG1x4 : Inst<"svwrite_za64[_{d}]_vg1x4", "vm4", "lUld", MergeNone, "aarch64_sme_write_vg1x4", [IsSharedZA, IsStreaming], []>; + def SVREAD_ZA64_VG1x2 : Inst<"svread_za64_{d}_vg1x2","2m", "lUld", MergeNone, "aarch64_sme_read_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + def SVREAD_ZA64_VG1x4 : Inst<"svread_za64_{d}_vg1x4","4m", "lUld", MergeNone, "aarch64_sme_read_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; +} MDevereau wrote: Nit: Since you defined write and read separately for normal the `_ZA` builtins it might make sense to do the same here even if its just 4 lines. https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -508,6 +508,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; +def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", + "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2, FeatureNEON]>; MDevereau wrote: Removed. https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [compiler-rt] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -1363,6 +1367,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) setCPUFeature(FEAT_SME_F64); + if (hwcap2 & HWCAP2_SME_FA64) +setCPUFeature(FEAT_SME_FA64); MDevereau wrote: Removed both this and the definition. https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
MDevereau wrote: @mikaelholmen Thanks. I've reverted the patch https://github.com/llvm/llvm-project/pull/71795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/72849 Use ZTR instead of MatrixOP to prevent expensive test check and machine verifier failures. >From e1685cc0dea9501a993cbe3f8185a40e1f285591 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 10:49:27 + Subject: [PATCH] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics --- clang/include/clang/Basic/arm_sme.td | 8 +++ .../acle_sme2_ldr_str_zt.c| 51 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 ++-- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 8 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 20 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 10 files changed, 147 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fb3f54ecff95080 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Spill and fill of ZT0 +// +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} ; + + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a42e2c49cb477ba..9164604f7d78cbc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2679,10 +2679,10 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic; // Spil
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/72849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72849 >From 3b1d34afb1ae365f48716ae5eb9202a474adf234 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 10:49:27 + Subject: [PATCH] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics --- clang/include/clang/Basic/arm_sme.td | 8 +++ .../acle_sme2_ldr_str_zt.c| 51 +++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 8 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 20 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 9 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf2..fb3f54ecff95080d 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Spill and fill of ZT0 +// +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index ..3d70ded6b469ba15 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} ; + + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7617dccdeee397f7..abfe14e52509d58a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -326,15 +326,19 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } - template bool ImmToTile(SDValue N, SDValue &Imm) { + template + bool ImmToTile(SDValue N, SDValue &Imm) { if (auto *CI = dyn_cast(N)) {
[clang] [llvm] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/72849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72849 >From 3b1d34afb1ae365f48716ae5eb9202a474adf234 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 10:49:27 + Subject: [PATCH 1/2] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics --- clang/include/clang/Basic/arm_sme.td | 8 +++ .../acle_sme2_ldr_str_zt.c| 51 +++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 8 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 20 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 9 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fb3f54ecff95080 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Spill and fill of ZT0 +// +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} ; + + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7617dccdeee397f..abfe14e52509d58 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -326,15 +326,19 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } - template bool ImmToTile(SDValue N, SDValue &Imm) { + template + bool ImmToTile(SDValue N, SDValue &Imm) { if (auto *CI = dyn_cast(N)) {
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -2,6 +2,8 @@ // RUN:| FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme < %s \ // RUN:| FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-fa64 < %s \ MDevereau wrote: I've removed them all (some whitespace in some survived though somehow :facepalm:) https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -491,13 +491,13 @@ bool AArch64Subtarget::isStreamingCompatible() const { } bool AArch64Subtarget::isNeonAvailable() const { - return hasNEON() && !isStreaming() && !isStreamingCompatible(); + return (hasNEON() || hasSMEFA64()) && !isStreaming() && MDevereau wrote: I've added the file [sve-streaming-mode-fixed-length-fp-reduce-fa64.ll](https://github.com/llvm/llvm-project/pull/70809/commits/9d2078b903a9ae8d54cdf5e34b4d88a10c0d372e#diff-cdcb53699f7848dacb5e12e4c40f2418be9ef30428b5308016f91b3bfe224d1a) https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -508,6 +508,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; +def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", + "Enable the full A64 instruction set in SVE streaming mode (FEAT_SME_FA64)", [FeatureSVE2, FeatureNEON]>; MDevereau wrote: Removed the implied features. https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -1363,6 +1364,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) setCPUFeature(FEAT_SME_F64); + if (hwcap2 & HWCAP2_SME_FA64) MDevereau wrote: I've defined it now. https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -248,8 +250,9 @@ inline constexpr ExtensionInfo Extensions[] = { {"simd", AArch64::AEK_SIMD, "+neon", "-neon", FEAT_SIMD, "+fp-armv8,+neon", 100}, {"sm4", AArch64::AEK_SM4, "+sm4", "-sm4", FEAT_SM4, "+sm4,+fp-armv8,+neon", 60}, {"sme-f16f16", AArch64::AEK_SMEF16F16, "+sme-f16f16", "-sme-f16f16", FEAT_INIT, "", 0}, -{"sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", "-sme-f64f64", FEAT_SME_F64, "+sme,+sme-f64f64,+bf16", 560}, -{"sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", "-sme-i16i64", FEAT_SME_I64, "+sme,+sme-i16i64,+bf16", 570}, +{"sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", "-sme-f64f64", FEAT_SME_F64, "+sme,+sme-f64f64,+bf16", 560}, +{"sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", "-sme-i16i64", FEAT_SME_I64, "+sme,+sme-i16i64,+bf16", 570}, +{"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_SME_FA64, "+sve2", 580}, MDevereau wrote: Apparently its the Function Multi Versioning priority? I'm not sure how it works but it seems to be tested by `attr-target-version.c` which does not appear to be testing the other SME flags such as F16F16 https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -1133,6 +1133,9 @@ typedef struct __ifunc_arg_t { #ifndef HWCAP2_SME_F64F64 #define HWCAP2_SME_F64F64 (1 << 25) #endif +#ifndef HWCAP2_SME_FA64 +#define HWCAP2_SME_FA64 (1 << 26) MDevereau wrote: Where did you get 30 from? https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
@@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN:-target-feature +sve2 -target-feature +sme2 -target-feature +sve -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include + +void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_shared_za { + // Test Tile Range + svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svmopa_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + + svmops_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svmops_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + + svbmopa_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}} MDevereau wrote: It looks like tests `svbmopa_za32_s32_m` and `svbmops_za32_u32_m` are missing. https://github.com/llvm/llvm-project/pull/71176 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
@@ -298,3 +298,19 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Outer produce and accumulate/subtract +// + +let TargetGuard = "sme2" in { + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + MDevereau wrote: ```suggestion // // Bitwise exclusive NOR population count outer product and accumulate/subtract // ``` https://github.com/llvm/llvm-project/pull/71176 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
@@ -0,0 +1,170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// MOPA + +// CHECK-LABEL: @test_svmopa_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_u16u10__SVBool_tS_u12__SVUint16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svmopa_za32,_u16,_m,)(3, pn, pm, zn, zm); +} + +// MOPS + +// CHECK-LABEL: @test_svmops_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmops_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svmops_za32,_s16,_m,)(3, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] =
[clang] [AArch64][SME2] Enable CLAMP multi-vector builtins for SME2 (PR #72272)
MDevereau wrote: Hi Dinar, I can see you've added FCLAMP to #72487. Given these builtins are all defined in the same section in the acle [here](https://github.com/ARM-software/acle/pull/217/files#diff-516526d4a18101dc85300bc2033d0f86dc46c505b7510a7694baabea851aedfaR11371-R11394) I think it would make sense to introduce the single-vector variants in this pull request instead. https://github.com/llvm/llvm-project/pull/72272 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (PR #73304)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/73304 See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi >From bca5297fe897edf6faf51ffde3e8fe1baa26b148 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 ++ .../acle_sme2_luti2_lane_zt.c | 96 +++ .../acle_sme2_luti4_lane_zt.c | 95 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 33 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 +- .../Target/AArch64/AArch64RegisterInfo.cpp| 6 ++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 33 +-- .../AArch64/sme2-intrinsics-luti2-lane.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane.ll | 35 +++ 11 files changed, 349 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..1174e30cb0885e1 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand one register +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; + def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c new file mode 100644 index 000..ebabbfc815c1dfe --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_u8(0, zn, 2); +} + + +// CHECK-LABEL: @test_svluti2_lane_zt_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svint8_t test_svluti2_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_s8(0, zn, 2); +} + +// CHECK-LABEL: @test_svluti2_lane_zt_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u12__SVUint16_
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics (PR #73305)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/73305 See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi >From 5aba2f1d2fe34f721a8e85eef6eecc25cb60851f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x2.c | 153 ++ .../acle_sme2_luti4_lane_zt_x2.c | 153 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 34 llvm/include/llvm/IR/IntrinsicsAArch64.td | 10 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 63 .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 1 + .../AArch64/sme2-intrinsics-luti2-lane-x2.ll | 35 .../AArch64/sme2-intrinsics-luti4-lane-x2.ll | 35 10 files changed, 498 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x2.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x2.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..c7b9dbacf071c43 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand two contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c new file mode 100644 index 000..51adc31c6968e7e --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -0,0 +1,153 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/73317 See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi >From f5b909e24e3cea49d98b40797880e4329a7a1e4f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x4.c | 201 ++ .../acle_sme2_luti4_lane_zt_x4.c | 148 + .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 31 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 55 - .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +- .../AArch64/sme2-intrinsics-luti2-lane-x4.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane-x4.ll | 25 +++ 10 files changed, 525 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x4.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..53319a57d73fdd2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand four contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt[_{d}]_x4", "4.didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt[_{d}]_x4", "4.didi", "sUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c new file mode 100644 index 000..5479fa109e839c5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -0,0 +1,201 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics (PR #73305)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73305 >From 5aba2f1d2fe34f721a8e85eef6eecc25cb60851f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/2] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x2.c | 153 ++ .../acle_sme2_luti4_lane_zt_x2.c | 153 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 34 llvm/include/llvm/IR/IntrinsicsAArch64.td | 10 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 63 .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 1 + .../AArch64/sme2-intrinsics-luti2-lane-x2.ll | 35 .../AArch64/sme2-intrinsics-luti4-lane-x2.ll | 35 10 files changed, 498 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x2.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x2.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..c7b9dbacf071c43 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand two contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c new file mode 100644 index 000..51adc31c6968e7e --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -0,0 +1,153 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +//
[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)
@@ -1,10 +1,17 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s MDevereau wrote: ```suggestion // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s ``` I don't think we need `-target-feature +sme` in this file? https://github.com/llvm/llvm-project/pull/72827 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)
@@ -1859,19 +1859,28 @@ def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sv def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; } +let TargetGuard = "sve2p1|sme" in { MDevereau wrote: ```suggestion let TargetGuard = "sve2p1|sme2" in { ``` Should this be sme2? https://github.com/llvm/llvm-project/pull/72827 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)
@@ -5,6 +5,11 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ MDevereau wrote: As per https://github.com/ARM-software/acle/pull/217/files#diff-516526d4a18101dc85300bc2033d0f86dc46c505b7510a7694baabea851aedfaR11352-R11358 should this be `__arm_streaming_compatible` instead of `__arm_streaming` for psel? https://github.com/llvm/llvm-project/pull/72827 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/MDevereau approved this pull request. https://github.com/llvm/llvm-project/pull/71176 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73317 >From f5b909e24e3cea49d98b40797880e4329a7a1e4f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/3] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x4.c | 201 ++ .../acle_sme2_luti4_lane_zt_x4.c | 148 + .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 31 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 55 - .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +- .../AArch64/sme2-intrinsics-luti2-lane-x4.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane-x4.ll | 25 +++ 10 files changed, 525 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x4.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..53319a57d73fdd2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand four contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt[_{d}]_x4", "4.didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt[_{d}]_x4", "4.didi", "sUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c new file mode 100644 index 000..5479fa109e839c5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -0,0 +1,201 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.inse
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73317 >From f5b909e24e3cea49d98b40797880e4329a7a1e4f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/4] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x4.c | 201 ++ .../acle_sme2_luti4_lane_zt_x4.c | 148 + .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 31 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 55 - .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +- .../AArch64/sme2-intrinsics-luti2-lane-x4.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane-x4.ll | 25 +++ 10 files changed, 525 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x4.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..53319a57d73fdd2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand four contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt[_{d}]_x4", "4.didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt[_{d}]_x4", "4.didi", "sUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c new file mode 100644 index 000..5479fa109e839c5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -0,0 +1,201 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.inse
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -5098,6 +5099,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::LUTI2_4ZTZI_S})) // Second Immediate must be <= 3: SelectMultiVectorLuti<3>(Node, 4, Opc); + else if (auto Opc = SelectOpcodeFromVT( MDevereau wrote: Yeah that does seem better. I'm not sure what the implications of letting i1, i64 and f64 are though. https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics (PR #73305)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73305 >From 5aba2f1d2fe34f721a8e85eef6eecc25cb60851f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/3] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x2.c | 153 ++ .../acle_sme2_luti4_lane_zt_x2.c | 153 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 34 llvm/include/llvm/IR/IntrinsicsAArch64.td | 10 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 63 .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 1 + .../AArch64/sme2-intrinsics-luti2-lane-x2.ll | 35 .../AArch64/sme2-intrinsics-luti4-lane-x2.ll | 35 10 files changed, 498 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x2.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x2.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..c7b9dbacf071c43 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand two contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c new file mode 100644 index 000..51adc31c6968e7e --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -0,0 +1,153 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +//
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -1666,7 +1674,8 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef Opcodes) { return 0; break; case SelectTypeKind::FP: -if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) +if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 && +EltVT != MVT::f64) MDevereau wrote: No its not. I'll remove it. https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73317 >From f5b909e24e3cea49d98b40797880e4329a7a1e4f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/5] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x4.c | 201 ++ .../acle_sme2_luti4_lane_zt_x4.c | 148 + .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 31 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 55 - .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +- .../AArch64/sme2-intrinsics-luti2-lane-x4.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane-x4.ll | 25 +++ 10 files changed, 525 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x4.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..53319a57d73fdd2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand four contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt[_{d}]_x4", "4.didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt[_{d}]_x4", "4.didi", "sUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c new file mode 100644 index 000..5479fa109e839c5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -0,0 +1,201 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.inse
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics (PR #73304)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73304 >From bca5297fe897edf6faf51ffde3e8fe1baa26b148 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/3] [SME2] Add LUTI2 and LUTI4 single Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 ++ .../acle_sme2_luti2_lane_zt.c | 96 +++ .../acle_sme2_luti4_lane_zt.c | 95 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 33 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 7 +- .../Target/AArch64/AArch64RegisterInfo.cpp| 6 ++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 33 +-- .../AArch64/sme2-intrinsics-luti2-lane.ll | 35 +++ .../AArch64/sme2-intrinsics-luti4-lane.ll | 35 +++ 11 files changed, 349 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..1174e30cb0885e1 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand one register +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; + def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt[_{d}]", "didi", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c new file mode 100644 index 000..ebabbfc815c1dfe --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -0,0 +1,96 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_u8(0, zn, 2); +} + + +// CHECK-LABEL: @test_svluti2_lane_zt_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 2) +// CPP-CHECK-NEXT:ret [[TMP0]] +// +svint8_t test_svluti2_lane_zt_s8(svint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { + return svluti2_lane_zt_s8(0, zn, 2); +} + +// CHECK-LABEL: @test_svluti2_lane_zt_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 2) +// CHECK-NEXT:ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics (PR #73305)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/73305 >From 5aba2f1d2fe34f721a8e85eef6eecc25cb60851f Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 15:50:28 + Subject: [PATCH 1/5] [SME2] Add LUTI2 and LUTI4 double Builtins and Intrinsics See https://github.com/ARM-software/acle/pull/217 Patch by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_sme.td | 8 + .../acle_sme2_luti2_lane_zt_x2.c | 153 ++ .../acle_sme2_luti4_lane_zt_x2.c | 153 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 34 llvm/include/llvm/IR/IntrinsicsAArch64.td | 10 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 63 .../Target/AArch64/AArch64RegisterInfo.cpp| 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td| 1 + .../AArch64/sme2-intrinsics-luti2-lane-x2.ll | 35 .../AArch64/sme2-intrinsics-luti4-lane-x2.ll | 35 10 files changed, 498 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x2.ll create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x2.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..c7b9dbacf071c43 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// lookup table expand two contiguous registers +// +let TargetGuard = "sme2" in { + def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt[_{d}]_x2", "2.dmdm", "cUcsUsiUi", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c new file mode 100644 index 000..51adc31c6968e7e --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -0,0 +1,153 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +//
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72849 >From 3b1d34afb1ae365f48716ae5eb9202a474adf234 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 20 Nov 2023 10:49:27 + Subject: [PATCH 1/3] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics --- clang/include/clang/Basic/arm_sme.td | 8 +++ .../acle_sme2_ldr_str_zt.c| 51 +++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 8 ++- .../Target/AArch64/AArch64ISelLowering.cpp| 20 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp| 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 23 +++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll| 27 ++ 9 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..fb3f54ecff95080 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Spill and fill of ZT0 +// +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 000..3d70ded6b469ba1 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} ; + + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7617dccdeee397f..abfe14e52509d58 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -326,15 +326,19 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } - template bool ImmToTile(SDValue N, SDValue &Imm) { + template + bool ImmToTile(SDValue N, SDValue &Imm) { if (auto *CI = dyn_cast(N)) {
[clang] [llvm] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
@@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s MDevereau wrote: Done. https://github.com/llvm/llvm-project/pull/72849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
@@ -2748,6 +2748,22 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } +MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsSpill) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB; + if (IsSpill) { MDevereau wrote: This isn't quite equivalent as the different methods will implicitly define registers in different manners causing the added tests in this patch to fail the machine-verifier pass. However adding the line `auto Rs = IsSpill ? RegState::Kill : RegState::Define;` in addition to your suggestion works as intended. https://github.com/llvm/llvm-project/pull/72849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
MDevereau wrote: > It looks like a few other pull requests are changing the same code around > ImmToTile. Might be good to land this smaller patch first so you can rebase > the others and reduce the diffs! The idea was that the changes to `ImmToTile` were small and any of my in-flight PRs could land first. But this one is definitely easiest to get in first. https://github.com/llvm/llvm-project/pull/72849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -0,0 +1,280 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svluti2_lane_zt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT:ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT:[[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT:[[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT:ret [[TMP8]] +// +svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { MDevereau wrote: I believe the overloaded forms for LUTI have been removed a few days ago as per https://github.com/ARM-software/acle/pull/278/files#diff-516526d4a18101dc85300bc2033d0f86dc46c505b7510a7694baabea851aedfaR11241-R11245, unless I've misread the latest change to the ACLE (I believe things in square brackets represent the overloaded forms?) https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -1859,6 +1867,34 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); } +template +void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, +unsigned NumOutVecs, +unsigned Opc) { + if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) +if (Imm->getZExtValue() > Max) + return; + + SDValue ZtValue; + ImmToTile(Node->getOperand(2), ZtValue); MDevereau wrote: I'm not quite sure what you mean here. Why would `Op2 != 0` crash? I think tests added such as [this one](https://github.com/llvm/llvm-project/pull/73317/files/32ce28d2fccda24ea9b223f204ca327133e3d0f9#diff-4626c48918de6a8e9ba8eecc9909d7a6febd2208294852599d4b147f6906656fR188-R189) should demonstrate Op2 != 0 ok https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
https://github.com/MDevereau edited https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #72849)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/72849 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72274 >From 86c61659cf99486965dffe201385b28420e93f41 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 16:08:57 + Subject: [PATCH 1/3] Add SME2 builtins for zero { zt0 } Patch by: Kerry McLaughlin kerry.mclaugh...@arm.com --- clang/include/clang/Basic/arm_sme.td | 5 +++ .../acle_sme2_zero_zt.c | 32 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +++ .../Target/AArch64/AArch64ISelLowering.cpp| 26 --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +++ .../AArch64/sme2-intrinsics-zero-zt.ll| 13 8 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7aae3c832bb1fe20..48afd6431fc8b692 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -321,4 +321,9 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + +// +// Zero ZT0 +// + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c new file mode 100644 index ..4ea26119301cab23 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1) A1 +#else +#define SVE_ACLE_FUNC(A1) A1 +#endif + +// CHECK-LABEL: @test_svzero_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_ztv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CPP-CHECK-NEXT:ret void +// +void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svzero_zt(0); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 60a8d98f3bc0d262..3c0a07be50607bbc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3544,6 +3544,10 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic; def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic; + // + // Zero ZT0 + // + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrWriteMem]>; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4379c3fde6f3c5dd..f0f0fe1e807b4be8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2753,17 +2753,19 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZ
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
@@ -305,4 +305,9 @@ defm SVSUB : ZAAddSub<"sub">; let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + +// +// Zero ZT0 +// + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; MDevereau wrote: Removed. https://github.com/llvm/llvm-project/pull/72274 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Add SME2 builtins for zero { zt0 } (PR #72274)
@@ -2746,19 +2746,25 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, - MachineBasicBlock *BB, - bool IsSpill) const { +MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Opcode, + bool IsZTDest) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB; - if (IsSpill) { -MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STR_TX)); -MIB.addReg(MI.getOperand(0).getReg()); - } else -MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_TX), + + if (IsZTDest) +MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode), MI.getOperand(0).getReg()); - MIB.add(MI.getOperand(1)); // Base - MI.eraseFromParent(); // The pseudo is gone now. + else { +MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode)); +MIB.addReg(MI.getOperand(0).getReg()); + } + + for (unsigned I = 1; I < MI.getNumOperands(); ++I) +MIB.add(MI.getOperand(I)); MDevereau wrote: I've rebased this patch which includes a refactor of this function from 5fe7ae8. Your suggestion is still an improve to that though so I've added it. https://github.com/llvm/llvm-project/pull/72274 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Add SME2 builtins for zero { zt0 } (PR #72274)
@@ -2746,19 +2746,25 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, - MachineBasicBlock *BB, - bool IsSpill) const { +MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Opcode, + bool IsZTDest) const { MDevereau wrote: Done. https://github.com/llvm/llvm-project/pull/72274 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72274 >From 86c61659cf99486965dffe201385b28420e93f41 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Thu, 9 Nov 2023 16:08:57 + Subject: [PATCH 1/4] Add SME2 builtins for zero { zt0 } Patch by: Kerry McLaughlin kerry.mclaugh...@arm.com --- clang/include/clang/Basic/arm_sme.td | 5 +++ .../acle_sme2_zero_zt.c | 32 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +++ .../Target/AArch64/AArch64ISelLowering.cpp| 26 --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 11 +++ .../AArch64/sme2-intrinsics-zero-zt.ll| 13 8 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7aae3c832bb1fe2..48afd6431fc8b69 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -321,4 +321,9 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + +// +// Zero ZT0 +// + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c new file mode 100644 index 000..4ea26119301cab2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1) A1 +#else +#define SVE_ACLE_FUNC(A1) A1 +#endif + +// CHECK-LABEL: @test_svzero_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_ztv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CPP-CHECK-NEXT:ret void +// +void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svzero_zt(0); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 60a8d98f3bc0d26..3c0a07be50607bb 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3544,6 +3544,10 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic; def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic; + // + // Zero ZT0 + // + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrWriteMem]>; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4379c3fde6f3c5d..f0f0fe1e807b4be 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2753,17 +2753,19 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFi
[clang] [llvm] Add SME2 builtins for zero { zt0 } (PR #72274)
@@ -0,0 +1,23 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +// CHECK-LABEL: @test_svzero_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_ztv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CPP-CHECK-NEXT:ret void +// +void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { MDevereau wrote: Removed it. I didn't see this fail when testing it locally when I expected it to, so what you say is correct. https://github.com/llvm/llvm-project/pull/72274 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] Add SME2 builtins for zero { zt0 } (PR #72274)
https://github.com/MDevereau closed https://github.com/llvm/llvm-project/pull/72274 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits