[clang] 3d6cab2 - [AArch64][SVE] Add bfloat16 support to load intrinsics
Author: Kerry McLaughlin Date: 2020-06-24T10:32:19+01:00 New Revision: 3d6cab271c7cecf105b77834d837ccd4406700d7 URL: https://github.com/llvm/llvm-project/commit/3d6cab271c7cecf105b77834d837ccd4406700d7 DIFF: https://github.com/llvm/llvm-project/commit/3d6cab271c7cecf105b77834d837ccd4406700d7.diff LOG: [AArch64][SVE] Add bfloat16 support to load intrinsics Summary: Bfloat16 support added for the following intrinsics: - LD1 - LD1RQ - LDNT1 - LDNF1 - LDFF1 Reviewers: sdesmalen, c-rhodes, efriedma, stuij, fpetrogalli, david-arm Reviewed By: fpetrogalli Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, danielkiss, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D82298 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldff1-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnf1-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c Modified: clang/include/clang/Basic/arm_sve.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index c55af44bc5ad..091c9e230b51 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -271,6 +271,11 @@ def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtRetu def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1">; def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ld1">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVLD1_BF : MInst<"svld1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; + def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; +} + // Load one vector (scalar base, VL displacement) def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8,"aarch64_sve_ld1">; @@ -376,6 +381,11 @@ def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl", [IsL def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVLDFF1_BF : MInst<"svldff1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; + def SVLDFF1_VNUM_BF : MInst<"svldff1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +} + // First-faulting load one vector (vector base) def SVLDFF1_GATHER_BASES_U : MInst<"svldff1_gather[_{2}base]_{d}", "dPu", "ilUiUlfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">; def SVLDFF1SB_GATHER_BASES_U : MInst<"svldff1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl", [IsGatherLoad], MemEltTyInt8, "aarch64_sve_ldff1_gather_scalar_offset">; @@ -471,15 +481,29 @@ def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl", [IsL def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVLDNF1_BF : MInst<"svldnf1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; + def SVLDNF1_VNUM_BF : MInst<"svldnf1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +} + // Load one vector, unextended load, non-temporal (scalar base) def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displ
[clang] edcfef8 - [AArch64][SVE] Add bfloat16 support to store intrinsics
Author: Kerry McLaughlin Date: 2020-06-26T11:05:56+01:00 New Revision: edcfef8fee134cf98e0e812a6569c4900045d31c URL: https://github.com/llvm/llvm-project/commit/edcfef8fee134cf98e0e812a6569c4900045d31c DIFF: https://github.com/llvm/llvm-project/commit/edcfef8fee134cf98e0e812a6569c4900045d31c.diff LOG: [AArch64][SVE] Add bfloat16 support to store intrinsics Summary: Bfloat16 support added for the following intrinsics: - ST1 - STNT1 Reviewers: sdesmalen, c-rhodes, fpetrogalli, efriedma, stuij, david-arm Reviewed By: fpetrogalli Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, danielkiss, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D82448 Added: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c Modified: clang/include/clang/Basic/arm_sve.td llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll Removed: diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 59adbeaf645f..e2aab5f2c344 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -563,6 +563,11 @@ def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl",[IsSt def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVST1_BF : MInst<"svst1[_{d}]", "vPpd", "b", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; + def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; +} + // Store one vector (vector base) def SVST1_SCATTER_BASES_U : MInst<"svst1_scatter[_{2}base_{d}]", "vPud", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">; def SVST1B_SCATTER_BASES_U: MInst<"svst1b_scatter[_{2}base_{d}]", "vPud", "ilUiUl", [IsScatterStore], MemEltTyInt8, "aarch64_sve_st1_scatter_scalar_offset">; @@ -654,6 +659,11 @@ def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEl // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; +} + // Prefetches diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c new file mode 100644 index ..b3756c8f7f90 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#def
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -257,7 +257,7 @@ class ImmCheck { } class Inst ft, list ch, MemEltType met> { + list ft, list ch, MemEltType met = MemEltTyDefault> { kmclaughlin-arm wrote: Hi @dtemirbulatov, this change is just to set the default MemEltType to MemEltTyDefault, so that we don't have to provide this in every builtin definition. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
@@ -0,0 +1,1562 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s kmclaughlin-arm wrote: I think the `-D__ARM_FEATURE` flags can be removed as you're also passing `-target-feature` https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
@@ -296,5 +296,28 @@ multiclass ZAAddSub { } } + +// SME2 - MIN, MAX + +multiclass MinMaxIntr { + def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; + def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; +} + +let TargetGuard = "sme2" in { + // SMAX / UMAX / FMAX + defm MAX_SINGLE_X2 : MinMaxIntr<"max", "_single", "x2", "22d">; + defm MAX_MULTI_X2 : MinMaxIntr<"max", "","x2", "222">; + defm MAX_SINGLE_X4 : MinMaxIntr<"max", "_single", "x4", "44d">; + defm MAX_MULTI_X4 : MinMaxIntr<"max", "","x4", "444">; + + // SMIN / UMIN / FMIN + defm MIN_SINGLE_X2 : MinMaxIntr<"min", "_single", "x2", "22d">; + defm MIN_MULTI_X2 : MinMaxIntr<"min", "","x2", "222">; + defm MIN_SINGLE_X4 : MinMaxIntr<"min", "_single", "x4", "44d">; + defm MIN_MULTI_X4 : MinMaxIntr<"min", "","x4", "444">; kmclaughlin-arm wrote: Hi Sam, please can you add the definitions for any builtins that operate on vectors only (and not ZA) to arm_sve.td? The reason for this is that these builtins could possibly become SVE instructions in the future. The LLVM IR intrinsic names will also begin with "aarch64_sve" for this reason. https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
@@ -0,0 +1,1562 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// Single, x2 + +// CHECK-LABEL: @test_svmax_single_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) +// CHECK-NEXT:[[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT:ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x210svint8x2_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) +// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT:ret [[TMP6]] +// +svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) { kmclaughlin-arm wrote: Please can you add the `__arm_streaming` attribute to these tests? https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
https://github.com/kmclaughlin-arm approved this pull request. Thanks @SamTebbs33, I just have one small suggestion but otherwise this LGTM. https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
https://github.com/kmclaughlin-arm edited https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
@@ -1987,8 +1987,26 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; // SME intrinsics which operate only on vectors and do not require ZA should be added here, // as they could possibly become SVE instructions in the future. +multiclass MinMaxIntr { + def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; + def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; +} + let TargetGuard = "sme2" in { // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; + +// == SMAX / UMAX / FMAX == + defm MAX_SINGLE_X2 : MinMaxIntr<"max", "_single", "x2", "22d">; + defm MAX_MULTI_X2 : MinMaxIntr<"max", "","x2", "222">; + defm MAX_SINGLE_X4 : MinMaxIntr<"max", "_single", "x4", "44d">; + defm MAX_MULTI_X4 : MinMaxIntr<"max", "","x4", "444">; + +// == SMIN / UMIN / FMIN == + defm MIN_SINGLE_X2 : MinMaxIntr<"min", "_single", "x2", "22d">; + defm MIN_MULTI_X2 : MinMaxIntr<"min", "","x2", "222">; + defm MIN_SINGLE_X4 : MinMaxIntr<"min", "_single", "x4", "44d">; + defm MIN_MULTI_X4 : MinMaxIntr<"min", "","x4", "444">; kmclaughlin-arm wrote: This is just a suggestion, but I think it could be worth adding the definitions that use a multiclass separately from this `let TargetGuard = "sme2"` block. I find it quite useful to have the multiclasses close to the definitions that use them when reading how the builtins are defined, and when more are added here this could become harder to follow. https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)
@@ -0,0 +1,444 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 kmclaughlin-arm wrote: The last few args are not needed by any these tests, so I think if you can remove them you won't need to add `,,,` when using `SVE_ACLE_FUNC` in the tests below, i.e. ``` #define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 #else #define SVE_ACLE_FUNC(A1,A2) A1##A2 #endif ``` https://github.com/llvm/llvm-project/pull/71707 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -1981,6 +1979,11 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } +let TargetGuard = "sve2p1|sme2" in { + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone]>; kmclaughlin-arm wrote: The streaming mode attributes can now be added to these, I believe they are IsStreaming for ptrue and IsStreamingCompatible for pfalse. https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
@@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +// CHECK-LABEL: @test_svptrue_c8_attr( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svptrue_c8_attrv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT:ret target("aarch64.svcount") [[TMP0]] +// +svcount_t test_svptrue_c8_attr(void) __arm_streaming { kmclaughlin-arm wrote: There are already tests for these builtins in `acle_sve2p1_ptrue.c` & `acle_sve2p1_pfalse.c`, so I think you can just add more RUN lines to these for SME2. https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/71176 >From 7ff0f13bdf5f81681145d63843c66a27e77ecc3b Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 2 Nov 2023 17:02:32 + Subject: [PATCH] [Clang][SME2] Add outer product and accumulate/subtract builtins Adds the following SME2 builtins: - svmop(a|s)_za32, - svbmop(a|s)_za32 See https://github.com/ARM-software/acle/pull/217 --- clang/include/clang/Basic/arm_sme.td | 16 ++ .../aarch64-sme2-intrinsics/acle_sme2_mop.c | 170 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 18 ++ 3 files changed, 204 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..1f88b39468105f2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,19 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Outer produce and accumulate/subtract +// + +let TargetGuard = "sme2" in { + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c new file mode 100644 index 000..bb804a523c449d3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c @@ -0,0 +1,170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// MOPA + +// CHECK-LABEL: @test_svmopa_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, pn, p
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/71176 >From 7ff0f13bdf5f81681145d63843c66a27e77ecc3b Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 2 Nov 2023 17:02:32 + Subject: [PATCH 1/2] [Clang][SME2] Add outer product and accumulate/subtract builtins Adds the following SME2 builtins: - svmop(a|s)_za32, - svbmop(a|s)_za32 See https://github.com/ARM-software/acle/pull/217 --- clang/include/clang/Basic/arm_sme.td | 16 ++ .../aarch64-sme2-intrinsics/acle_sme2_mop.c | 170 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 18 ++ 3 files changed, 204 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..1f88b39468105f2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,19 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Outer produce and accumulate/subtract +// + +let TargetGuard = "sme2" in { + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c new file mode 100644 index 000..bb804a523c449d3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c @@ -0,0 +1,170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// MOPA + +// CHECK-LABEL: @test_svmopa_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, p
[clang] [llvm] [Clang][SME2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
https://github.com/kmclaughlin-arm edited https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add multi-vector builtins for cvt (PR #74450)
https://github.com/kmclaughlin-arm approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/74450 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
@@ -1316,6 +1321,13 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; } +let TargetGuard = "sve2p1" in { + def SVGET_2_B : SInst<"svget2[_{d}]", "d2i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; + def SVGET_4_B : SInst<"svget4[_{d}]", "d4i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; + + def SVSET_2_B : SInst<"svset2[_{d}]", "22id", "Pc", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; + def SVSET_4_B : SInst<"svset4[_{d}]", "44id", "Pc", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; kmclaughlin-arm wrote: Can you please add some tests for the ImmChecks used by svget/svset to acle_sve2p1_imm.cpp? https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
@@ -1316,6 +1321,13 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; } +let TargetGuard = "sve2p1" in { + def SVGET_2_B : SInst<"svget2[_{d}]", "d2i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; + def SVGET_4_B : SInst<"svget4[_{d}]", "d4i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; + + def SVSET_2_B : SInst<"svset2[_{d}]", "22id", "Pc", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; + def SVSET_4_B : SInst<"svset4[_{d}]", "44id", "Pc", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; +} kmclaughlin-arm wrote: I think these builtins should also have the IsStreamingCompatible flag. https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)
@@ -2130,6 +2130,9 @@ let TargetGuard = "sme2" in { def SVURSHL_X2 : SInst<"svrshl[_{d}_x2]", "222", "UcUsUiUl", MergeNone, "aarch64_sve_urshl_x2", [IsStreaming], []>; def SVSRSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "csil", MergeNone, "aarch64_sve_srshl_x4", [IsStreaming], []>; def SVURSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "UcUsUiUl", MergeNone, "aarch64_sve_urshl_x4", [IsStreaming], []>; + + def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>; + def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>; kmclaughlin-arm wrote: I believe the Inst class uses a default MemEltType value of `MemEltTyDefault`, so you should be able to remove it from these definitions. https://github.com/llvm/llvm-project/pull/74720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)
https://github.com/kmclaughlin-arm approved this pull request. Thank you @dtemirbulatov, LGTM! https://github.com/llvm/llvm-project/pull/74720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
https://github.com/kmclaughlin-arm edited https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
@@ -167,3 +167,23 @@ void test_svpmov_lane(){ zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 5); // expected-error {{argument value 5 is outside the valid range [1, 3]}} zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 8); // expected-error {{argument value 8 is outside the valid range [1, 7]}} } + +__attribute__((target("+sve2p1"))) +void test_svget_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4){ + svbool_t res; + + svset2(tuple2, -1, res); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svset2(tuple2, 2, res); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svset4(tuple4, -1, res); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svset4(tuple4, 4, res); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + + res = svget2(tuple2, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + res = svget2(tuple2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res = svget4(tuple4, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + res = svget4(tuple4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} kmclaughlin-arm wrote: I don't think you need to use `res =` here, you can just call svget similar to svset above https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
@@ -167,3 +167,23 @@ void test_svpmov_lane(){ zn_u32 = svpmov_lane_u32_m(zn_u32, pn, 5); // expected-error {{argument value 5 is outside the valid range [1, 3]}} zn_u64 = svpmov_lane_u64_m(zn_u64, pn, 8); // expected-error {{argument value 8 is outside the valid range [1, 7]}} } + +__attribute__((target("+sve2p1"))) +void test_svget_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4){ + svbool_t res; kmclaughlin-arm wrote: nit: can this be added to the list of args for `test_svget_b`, along with idx & tuple2/4? https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
https://github.com/kmclaughlin-arm commented: Thank you for adding the tests @CarolineConcatto! https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
@@ -1316,6 +1321,13 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; } +let TargetGuard = "sve2p1" in { + def SVGET_2_B : SInst<"svget2[_{d}]", "d2i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; + def SVGET_4_B : SInst<"svget4[_{d}]", "d4i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; + + def SVSET_2_B : SInst<"svset2[_{d}]", "22id", "Pc", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; + def SVSET_4_B : SInst<"svset4[_{d}]", "44id", "Pc", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; +} kmclaughlin-arm wrote: You're right, we only need to add the flag once it's enabled for SME2 https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2p1]Add svboolx2 and svboolx4 types for svcreate, svget, s… (PR #74594)
https://github.com/kmclaughlin-arm approved this pull request. https://github.com/llvm/llvm-project/pull/74594 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector unpack builtins (PR #75075)
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/75075 Adds the following SME2 builtins: - svunpk (x2 & x4) See https://github.com/ARM-software/acle/pull/217/files Patch by David Sherwood >From 35ea4be3c6fe26a4dd956df6717df1e6f6e4ae6d Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 11 Dec 2023 17:12:58 + Subject: [PATCH] [Clang][SME2] Add multi-vector unpack builtins Adds the following SME2 builtins: - svunpk (x2 & x4) See https://github.com/ARM-software/acle/pull/217/files Patch by David Sherwood --- clang/include/clang/Basic/arm_sve.td | 11 + .../acle_sme2_unpkx2.c| 150 .../acle_sme2_unpkx4.c| 222 ++ 3 files changed, 383 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 85656c00c5b3e..62dc1c9e5b3a5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2258,3 +2258,14 @@ let TargetGuard = "sme2" in { def SVQCVTN_U16_U64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "Ul", MergeNone, "aarch64_sve_uqcvtn_x4", [IsStreaming], []>; def SVQCVTN_U16_S64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtun_x4", [IsStreaming], []>; } + +// +// Multi-vector unpack +// + +let TargetGuard = "sme2" in { + def SVSUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "sil",MergeNone, "aarch64_sve_sunpk_x2", [IsStreaming], []>; + def SVUUNPK_X2 : SInst<"svunpk_{d}[_{1}_x2]", "2h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x2", [IsStreaming], []>; + def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; + def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c new file mode 100644 index 0..2f427689323b4 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -0,0 +1,150 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: @test_svunpk_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT:ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT:[[TMP4:%.*]]
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -263,3 +263,38 @@ multiclass ZAFPOuterProd { defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; + + +// SME2 - ADD, SUB + +multiclass ZAAddSub { kmclaughlin-arm wrote: Hi @sdesmalen-arm, I'm happy to change this, though the reason I chose this name was to match other multiclasses in the file such as ZAStore, ZAAdd, ZAFPOuterProd, etc. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -9571,22 +9571,17 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, Value *BasePtr = Ops[1]; // Does the store have an offset? - if (Ops.size() > 3) + if (Ops.size() > (2 + N)) kmclaughlin-arm wrote: This change was intended to be part of this patch. Now that EmitSVEStructLoad is being called after GetAArch64SVEProcessedOperands, the first operand will have been split into a series of vector extracts and the number of operands expected here will change depending on the value of N. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -10266,35 +10288,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - // Find out if any arguments are required to be integer constant expressions. kmclaughlin-arm wrote: I've moved these changes to #70662 and I'll update this patch shortly to remove the non-functional changes. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70662)
@@ -9893,24 +9888,40 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +void CodeGenFunction::GetAArch64SVEProcessedOperands( +unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, +SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && - BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { -Value *Val = EmitScalarExpr(E->getArg(0)); -return EmitSVEReinterpret(Val, Ty); - } + // Tuple set/get only requires one insert/extract vector, which is + // created by EmitSVETupleSetOrGet. + bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); - llvm::SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { -if ((ICEArguments & (1 << i)) == 0) +bool IsICE = ICEArguments & (1 << i); +if (!IsTupleGetOrSet && !IsICE) { kmclaughlin-arm wrote: Hi @sdesmalen-arm, I've tried to rewrite this in such a way that we only have to check flags such as IsICE & IsTupleGetOrSet once https://github.com/llvm/llvm-project/pull/70662 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70662)
@@ -9893,24 +9888,40 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +void CodeGenFunction::GetAArch64SVEProcessedOperands( +unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, +SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && - BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { -Value *Val = EmitScalarExpr(E->getArg(0)); -return EmitSVEReinterpret(Val, Ty); - } + // Tuple set/get only requires one insert/extract vector, which is + // created by EmitSVETupleSetOrGet. + bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); - llvm::SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { -if ((ICEArguments & (1 << i)) == 0) +bool IsICE = ICEArguments & (1 << i); +if (!IsTupleGetOrSet && !IsICE) { + Value *Arg = EmitScalarExpr(E->getArg(i)); + if (auto *VTy = dyn_cast(Arg->getType())) { +unsigned MinElts = VTy->getMinNumElements(); +bool IsPred = VTy->getElementType()->isIntegerTy(1); +unsigned N = +(MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); +for (unsigned I = 0; I < N; ++I) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); + auto *NewVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts / N); + if (N == 1 && VTy == NewVTy) kmclaughlin-arm wrote: I don't think both checks are needed, so this can be hoisted out https://github.com/llvm/llvm-project/pull/70662 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)
@@ -1363,6 +1364,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) setCPUFeature(FEAT_SME_F64); + if (hwcap2 & HWCAP2_SME_FA64) kmclaughlin-arm wrote: Does HWCAP2_SME_FA64 need to be defined above? For example, the other SME features are defined around line 1127: ``` #ifndef HWCAP2_SME #define HWCAP2_SME (1 << 23) #endif #ifndef HWCAP2_SME_I16I64 #define HWCAP2_SME_I16I64 (1 << 24) #endif ``` https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70662)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/70662 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70959)
kmclaughlin-arm wrote: This patch was originally committed in #70662, but it was reverted as it was missing updates to the acle_sve2p1_st1.c & acle_sve2p1_stnt1.c tests. The first commit in this pull request contains the original patch and the second contains the updated tests. https://github.com/llvm/llvm-project/pull/70959 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70959)
https://github.com/kmclaughlin-arm unassigned https://github.com/llvm/llvm-project/pull/70959 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70959)
https://github.com/kmclaughlin-arm ready_for_review https://github.com/llvm/llvm-project/pull/70959 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][Clang] Refactor code to emit SVE & SME builtins (PR #70959)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/70959 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/71176 Adds the following SME2 builtins: - svmop(a|s)_za32, - svbmop(a|s)_za32 See https://github.com/ARM-software/acle/pull/217 >From b8560b9a4496db32b730ba5715fcd7febf27b98d Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 2 Nov 2023 17:02:32 + Subject: [PATCH] [Clang][SME2] Add outer product and accumulate/subtract builtins Adds the following SME2 builtins: - svmop(a|s)_za32, - svbmop(a|s)_za32 See https://github.com/ARM-software/acle/pull/217 --- clang/include/clang/Basic/arm_sme.td | 15 ++ clang/include/clang/Basic/arm_sve_sme_incl.td | 2 +- .../aarch64-sme2-intrinsics/acle_sme2_mop.c | 170 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 18 ++ 4 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 8d85327a86b1aaf..822c9c5621a8f18 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -263,3 +263,18 @@ multiclass ZAFPOuterProd { defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; + + +// SME2 - UMOPA, SMOPA, UMOPS, SMOPS, BMOPA, BMOPS + +let TargetGuard = "sme2" in { + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 3a7a5b51b25801e..22a2a3c5434d657 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -257,7 +257,7 @@ class ImmCheck { } class Inst ft, list ch, MemEltType met> { + list ft, list ch, MemEltType met = MemEltTyDefault> { string Name = n; string Prototype = p; string Types = t; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c new file mode 100644 index 000..bb804a523c449d3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c @@ -0,0 +1,170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// MOPA + +// CHECK-LABEL: @test_svmopa_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*
[clang] [Clang][SVE2.1] Add intrinsics for `WHILEcc` resulting in predicate pair (PR #75107)
@@ -1341,6 +1341,26 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNon def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile]>; } +let TargetGuard = "sve2p1|sme2" in { + def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2">; + def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2">; + def SVWHILEHI_S64_X2 : SInst<"svwhilehi_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2">; + def SVWHILEHS_S64_X2 : SInst<"svwhilehs_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2">; + def SVWHILELE_S64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2">; + def SVWHILELO_S64_X2 : SInst<"svwhilelo_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2">; + def SVWHILELS_S64_X2 : SInst<"svwhilels_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2">; + def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2">; + + def SVWHILEGE_U64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2">; + def SVWHILEGT_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2">; + def SVWHILEHI_U64_X2 : SInst<"svwhilehi_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2">; + def SVWHILEHS_U64_X2 : SInst<"svwhilehs_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2">; + def SVWHILELE_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2">; + def SVWHILELO_U64_X2 : SInst<"svwhilelo_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2">; + def SVWHILELS_U64_X2 : SInst<"svwhilels_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2">; + def SVWHILELT_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2">; +} kmclaughlin-arm wrote: Hi @momchil-velikov, There were some changes made to the SME2 ACLE recently which included unifying the svwhile signed/unsigned builtins to use the same mnemonics (see https://github.com/ARM-software/acle/pull/217/commits/cdaf16f2fbe329f8951acfe11a6ae227f753d521). I believe these changes should be applied for SVE2p1 as well? https://github.com/llvm/llvm-project/pull/75107 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector unpack builtins (PR #75075)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/75075 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1950,19 +1950,17 @@ let TargetGuard = "sve2p1|sme2" in { //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; -} -let TargetGuard = "sve2p1" in { def SVFCLAMP : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>; -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; kmclaughlin-arm wrote: Please can you add some tests for the immediate range to acle_sve2p1_imm.cpp for these builtins? https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64]Add QCVTN builtin to SVE2.1 (PR #75454)
@@ -0,0 +1,78 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve1p1 -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK kmclaughlin-arm wrote: This run line uses +sme2, but I think it should only be for sve2p1? It also contains +sve1p1, which I think is meant to be +sve2p1 :) https://github.com/llvm/llvm-project/pull/75454 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s kmclaughlin-arm wrote: Do we need some extra run lines for testing these builtins with sme2 as well? https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
@@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s kmclaughlin-arm wrote: Thank you :) I agree that having a single line to test both sve2p1 and sme2 would be preferable, however I'm not sure if this will be enough coverage when we add the streaming mode attribute to the builtins? For SME2, we'll need to add `__arm_streaming` to the functions here as the tests should fail without it, but for SVE2p1 we do not need the attribute. @dtemirbulatov recently committed a patch for the pext & psel builtins which has both tests for SME2 & SVE2p1 and passes an extra option for the +sme2 lines to set the attribute: https://github.com/llvm/llvm-project/pull/72827. https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64]Add QCVTN builtin to SVE2.1 (PR #75454)
https://github.com/kmclaughlin-arm approved this pull request. https://github.com/llvm/llvm-project/pull/75454 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)
@@ -0,0 +1,760 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Multi, multi +// CHECK-LABEL: @test_svmla2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT:[[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmla2_f16j13svfloat16x2_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT:[[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmla2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_shared_za +{ + SVE_ACLE_FUNC(svmla_za32,_f16,_vg2x2,,)(slice_base + 6, zn, zm); kmclaughlin-arm wrote: Since the slice base and offset are now combined, I don't think there is much value in adding an immediate to `slice_base` in any of these tests. https://github.com/llvm/llvm-project/pull/75584 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)
@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; } +// FMLA/FMLS +let TargetGuard = "sme2" in { + def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>; + def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>; + + def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", "f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", "f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>; + def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", "f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", "f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>; + + def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; +} + +let TargetGuard = "sme2,sme-f64f64" in { + def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>; + def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>; + + def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", "d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", "d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>; + def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", "d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>; + def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", "d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>; + + def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>; + def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>; + def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>; + def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_1>]>; kmclaughlin-arm wrote: I think there are tests missing for the vg1x2 and vg1x4 MLA/MLS builtins. Please can you also add some tests to acle_sme2_imm.cpp for those builtins which use an ImmCheck? https://github.com/llvm/llvm-project/pull/75584 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Enable multi-vector loads & stores for SME2 (PR #75821)
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/75821 This patch enables the following builtins for SME2: - svld1, svld1_vnum - svldnt1, svldnt1_vnum - svst1, svst1_vnum - svstnt1, svstnt1_vnum >From 1b2022f34ad3b038f714d8d0559f4e129d5e731a Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 18 Dec 2023 14:58:57 + Subject: [PATCH] [Clang][SME2] Enable multi-vector loads & stores for SME2 This patch enables the following builtins for SME2: - svld1, svld1_vnum - svldnt1, svldnt1_vnum - svst1, svst1_vnum - svstnt1, svstnt1_vnum --- clang/include/clang/Basic/arm_sve.td | 124 -- .../acle_sve2p1_ld1.c | 5 +- .../acle_sve2p1_ldnt1.c | 5 +- .../acle_sve2p1_st1.c | 4 +- .../acle_sve2p1_stnt1.c | 5 +- 5 files changed, 68 insertions(+), 75 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a429a3c5fe378a..e9a45c3e655f23 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1990,79 +1990,61 @@ def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "a def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +} + +multiclass MultiVecLoad { + // FIXME: Add IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming) + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; +} + +let TargetGuard = "sve2p1|sme2" in { + defm LD1 : MultiVecLoad<"ld1">; + defm LDNT1 : MultiVecLoad<"ldnt1">; +} + +multiclass MultiVecStore { + // FIXME: Add IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming) + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_" # i # "_p
[llvm] [clang-tools-extra] [clang] [Clang][SME2] Add multi-vector zip & unzip builtins (PR #74841)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/74841 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [Clang][SME2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add builtins for multi-vector fp round to integral value (PR #75941)
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/75941 Adds the following SME2 builtins: - svrinta, svrintm, svrintn, svrintp (x2 & x4) >From 85674fa6c6b568c900b3728555a3e276439818a2 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Tue, 19 Dec 2023 14:53:06 + Subject: [PATCH] [Clang][SME2] Add builtins for multi-vector fp round to integral value Adds the following SME2 builtins: - svrinta, svrintm, svrintn, svrintp (x2 & x4) --- clang/include/clang/Basic/arm_sve.td | 15 + .../aarch64-sme2-intrinsics/acle_sme2_frint.c | 282 ++ 2 files changed, 297 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98d7028eb28309..1d601f2f2dd490 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2141,6 +2141,21 @@ let TargetGuard = "sme2" in { defm SVMAXNM : SInstMinMaxByVector<"max">; } +let TargetGuard = "sme2" in { + // FRINTA / FRINTM / FRINTN / FRINTP + def SVRINTA_X2 : SInst<"svrinta[_{d}_x2]", "22", "f", MergeNone, "aarch64_sve_frinta_x2", [IsStreaming], []>; + def SVRINTA_X4 : SInst<"svrinta[_{d}_x4]", "44", "f", MergeNone, "aarch64_sve_frinta_x4", [IsStreaming], []>; + + def SVRINTM_X2 : SInst<"svrintm[_{d}_x2]", "22", "f", MergeNone, "aarch64_sve_frintm_x2", [IsStreaming], []>; + def SVRINTM_X4 : SInst<"svrintm[_{d}_x4]", "44", "f", MergeNone, "aarch64_sve_frintm_x4", [IsStreaming], []>; + + def SVRINTN_X2 : SInst<"svrintn[_{d}_x2]", "22", "f", MergeNone, "aarch64_sve_frintn_x2", [IsStreaming], []>; + def SVRINTN_X4 : SInst<"svrintn[_{d}_x4]", "44", "f", MergeNone, "aarch64_sve_frintn_x4", [IsStreaming], []>; + + def SVRINTP_X2 : SInst<"svrintp[_{d}_x2]", "22", "f", MergeNone, "aarch64_sve_frintp_x2", [IsStreaming], []>; + def SVRINTP_X4 : SInst<"svrintp[_{d}_x4]", "44", "f", MergeNone, "aarch64_sve_frintp_x4", [IsStreaming], []>; +} + let TargetGuard = "sme2" in { def SVSCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x2", [IsStreaming], []>; def SVUCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x2", [IsStreaming], []>; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c new file mode 100644 index 00..2a34b0e2878ef9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c @@ -0,0 +1,282 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// FRINTA + +// CHECK-LABEL: @test_svfrinta_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT:[[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[TMP0]], [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT:[[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT:[[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT:ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[T
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -11,10 +11,16 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve \ -// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming kmclaughlin-arm wrote: I think this should be __arm_streaming_compatible? https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -10,6 +10,10 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s kmclaughlin-arm wrote: Should this test also use the ATTR macro with `__arm_streaming_compatible` since it now tests sme2 & sve2p1? https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -1,14 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming kmclaughlin-arm wrote: I think svpfalse_c needs the `__arm_streaming_compatible` attribute https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -11,10 +11,16 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve \ -// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming kmclaughlin-arm wrote: Sorry, this was my mistake! You were right to add it as `__arm_streaming` :) https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -1,14 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming kmclaughlin-arm wrote: `__arm_streaming` is correct here too https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/71176 >From c975abe9015d5c9f5f7c7388101900cbcf738ab6 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 2 Nov 2023 17:02:32 + Subject: [PATCH 1/3] [Clang][SME2] Add outer product and accumulate/subtract builtins Adds the following SME2 builtins: - svmop(a|s)_za32, - svbmop(a|s)_za32 See https://github.com/ARM-software/acle/pull/217 --- clang/include/clang/Basic/arm_sme.td | 16 ++ .../aarch64-sme2-intrinsics/acle_sme2_mop.c | 170 ++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 18 ++ 3 files changed, 204 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf..1f88b39468105f2 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,19 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Outer produce and accumulate/subtract +// + +let TargetGuard = "sme2" in { + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c new file mode 100644 index 000..bb804a523c449d3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c @@ -0,0 +1,170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// MOPA + +// CHECK-LABEL: @test_svmopa_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, p
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
@@ -298,3 +298,19 @@ multiclass ZAAddSub { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Outer produce and accumulate/subtract +// + +let TargetGuard = "sme2" in { + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + kmclaughlin-arm wrote: I think the "Outer product and accumulate/subtract" header applies to all of these, including the bmopa/s builtins? I've fixed the typo though :) https://github.com/llvm/llvm-project/pull/71176 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
@@ -0,0 +1,170 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// MOPA + +// CHECK-LABEL: @test_svmopa_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmopa_u16u10__SVBool_tS_u12__SVUint16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svmopa_za32,_u16,_m,)(3, pn, pm, zn, zm); +} + +// MOPS + +// CHECK-LABEL: @test_svmops_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT:ret void +// +// CPP-CHECK-LABEL: @_Z15test_svmops_s16u10__SVBool_tS_u11__SVInt16_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT:tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT:ret void +// +void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { + SVE_ACLE_FUNC(svmops_za32,_s16,_m,)(3, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] =
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/71176 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -5098,6 +5099,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::LUTI2_4ZTZI_S})) // Second Immediate must be <= 3: SelectMultiVectorLuti<3>(Node, 4, Opc); + else if (auto Opc = SelectOpcodeFromVT( kmclaughlin-arm wrote: Can you instead just pass `` in the if statement above? https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -5098,6 +5099,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::LUTI2_4ZTZI_S})) // Second Immediate must be <= 3: SelectMultiVectorLuti<3>(Node, 4, Opc); + else if (auto Opc = SelectOpcodeFromVT( kmclaughlin-arm wrote: If AnyType is used and i1 is passed, `SelectOpcodeFromVT` will return 0. I think it would be the same for i64/f64 as the size of `Opcodes` for these intrinsics is 3 and the function will check this when trying to return the correct opcode from the list: `return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];` https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [SME2] Add LUTI2 and LUTI4 quad Builtins and Intrinsics (PR #73317)
@@ -1666,7 +1674,8 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef Opcodes) { return 0; break; case SelectTypeKind::FP: -if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64) +if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 && +EltVT != MVT::f64) kmclaughlin-arm wrote: Is this change needed now that you're using AnyType? https://github.com/llvm/llvm-project/pull/73317 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -2995,6 +2995,134 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible }; +bool Sema::ParseSVEImmChecks( +CallExpr *TheCall, SmallVector, 3> &ImmChecks) { + // Perform all the immediate checks for this builtin call. + bool HasError = false; + for (auto &I : ImmChecks) { +int ArgNum, CheckTy, ElementSizeInBits; +std::tie(ArgNum, CheckTy, ElementSizeInBits) = I; + +typedef bool (*OptionSetCheckFnTy)(int64_t Value); + +// Function that checks whether the operand (ArgNum) is an immediate +// that is one of the predefined values. +auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm, + int ErrDiag) -> bool { + // We can't check the value of a dependent argument. + Expr *Arg = TheCall->getArg(ArgNum); + if (Arg->isTypeDependent() || Arg->isValueDependent()) +return false; + + // Check constant-ness first. + llvm::APSInt Imm; + if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm)) +return true; + + if (!CheckImm(Imm.getSExtValue())) +return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange(); + return false; +}; + +switch ((SVETypeFlags::ImmCheckType)CheckTy) { +case SVETypeFlags::ImmCheck0_31: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_13: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13)) +HasError = true; + break; +case SVETypeFlags::ImmCheck1_16: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_7: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7)) +HasError = true; + break; +case SVETypeFlags::ImmCheckExtract: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (2048 / ElementSizeInBits) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckShiftRight: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits)) +HasError = true; + break; +case SVETypeFlags::ImmCheckShiftRightNarrow: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, + ElementSizeInBits / 2)) +HasError = true; + break; +case SVETypeFlags::ImmCheckShiftLeft: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + ElementSizeInBits - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckLaneIndex: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (1 * ElementSizeInBits)) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckLaneIndexCompRotate: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (2 * ElementSizeInBits)) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckLaneIndexDot: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (4 * ElementSizeInBits)) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckComplexRot90_270: + if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; }, + diag::err_rotation_argument_to_cadd)) +HasError = true; + break; +case SVETypeFlags::ImmCheckComplexRotAll90: + if (CheckImmediateInSet( + [](int64_t V) { +return V == 0 || V == 90 || V == 180 || V == 270; + }, + diag::err_rotation_argument_to_cmla)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_1: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_3: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_0: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_15: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_255: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) +HasError = true; + break; +case SVETypeFlags::ImmCheck2_4_Mul2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) +HasError = true; + break; kmclaughlin-arm wrote
[clang] [AArch64][SME2] Add _x2/_x4 svqrshr builtins. (PR #74100)
@@ -2119,6 +2119,21 @@ let TargetGuard = "sme2" in { // 2-way and 4-way selects def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>; def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>; + + def SVQRSHRN_X4 : SInst<"svqrshrn[_{0}_{d}_x4]", "q4i", "il", MergeNone, "aarch64_sve_sqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + def SVUQRSHRN_X4 : SInst<"svqrshrn[_{0}_{d}_x4]", "b4i", "UiUl", MergeNone, "aarch64_sve_uqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + + // SQRSHR / UQRSHR + def SVQRSHR_X2 : SInst<"svqrshr[_{0}_{d}_x2]", "h2i", "i",MergeNone, "aarch64_sve_sqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>; + def SVUQRSHR_X2 : SInst<"svqrshr[_{0}_{d}_x2]", "e2i", "Ui", MergeNone, "aarch64_sve_uqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>; + def SVQRSHR_X4 : SInst<"svqrshr[_{0}_{d}_x4]", "q4i", "il", MergeNone, "aarch64_sve_sqrshr_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + def SVUQRSHR_X4 : SInst<"svqrshr[_{0}_{d}_x4]", "b4i", "UiUl", MergeNone, "aarch64_sve_uqrshr_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + + // SQRSHRU + def SVSQRSHRU_X2 : SInst<"svqrshru[_{0}_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshru_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>; + def SVSQRSHRU_X4 : SInst<"svqrshru[_{0}_{d}_x4]", "b4i", "il", MergeNone, "aarch64_sve_sqrshru_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + + def SVSQRSHRUN_X4 : SInst<"svqrshrun[_{0}_{d}_x4]", "b4i", "il", MergeNone, "aarch64_sve_sqrshrun_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; } kmclaughlin-arm wrote: The qrshr* builtins have changed again in the latest ACLE update (https://github.com/ARM-software/acle/pull/278): ``` arm_neon.h shift-by-immediate instructions use an _n suffix to indicate that the shift amount is scalar. arm_sve.h carried this across to the full/non-overloaded forms of SVE immediate shifts. It seems worth doing the same here for consistency, and to protect against vector-vector forms being added in future. ``` Please can you apply this change here? https://github.com/llvm/llvm-project/pull/74100 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -2069,21 +2070,20 @@ def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; } -let TargetGuard = "sve2p1|sme" in { -def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; -def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; +let TargetGuard = "sve2p1|sme2" in { +def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, "aarch64_sve_sclamp", [IsStreamingCompatible], []>; +def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [IsStreamingCompatible], []>; kmclaughlin-arm wrote: The `SVFCLAMP` builtin a few lines below this is using `IsStreamingOrSVE2p1`, should these be the same? https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add builtins for multi-vector fp round to integral value (PR #75941)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/75941 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang-tools-extra] [Clang][SME2] Enable multi-vector loads & stores for SME2 (PR #75821)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/75821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
https://github.com/kmclaughlin-arm commented: Thanks for fixing these tests @MDevereau! There are also some tests in acle_sme2_mlal.c, acle_sme2_mlall.c & acle_sme2_mlsl.c which have a similar issue, could you please update them in this patch too? https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
https://github.com/kmclaughlin-arm edited https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
@@ -246,7 +246,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z // CPP-CHECK-NEXT:ret void // void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { - SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm); + SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm); kmclaughlin-arm wrote: I think there are some `svmls` & `svmls_lane` tests that should be updated in this test file as well, for example on line 39: `SVE_ACLE_FUNC(svmls_za32,,_f32,,_vg1x2)(slice_base, zn, zm);` https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
@@ -246,7 +246,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z // CPP-CHECK-NEXT:ret void // void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { - SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm); + SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm); kmclaughlin-arm wrote: I think it is incorrect, for `svmls_za32[_f32]_vg1x2` I would have expected something like `SVE_ACLE_FUNC(svmls_za32,_f32,_vg1x2,,)`, since `_f32` is the optional part of the name. https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Fix PSEL builtin predicates (PR #77097)
https://github.com/kmclaughlin-arm created https://github.com/llvm/llvm-project/pull/77097 PSEL intrinsics which return a predicate-as-counter are available in SVE2p1 & SME2. >From 0cea7a1c7d72493de5533815903aec868543d544 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 5 Jan 2024 11:44:53 + Subject: [PATCH] [Clang][SME2] Fix PSEL builtin predicates PSEL intrinsics which return a predicate-as-counter are available in SVE2p1 & SME2. --- clang/include/clang/Basic/arm_sve.td | 9 +- .../acle_sve2p1_psel.c| 92 +-- .../acle_sve2p1_psel_svcount.c| 110 ++ 3 files changed, 118 insertions(+), 93 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel_svcount.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 7f80fb0386cc77..ae1a154d63355a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1952,10 +1952,6 @@ def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [IsStreamin def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; } // Standalone sve2.1 builtins @@ -1979,6 +1975,11 @@ let TargetGuard = "sve2p1|sme2" in { def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_1>]>; +def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; + def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c index de3f6a9a57bfeb..7cf3d94ccd55d4 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c @@ -11,13 +11,13 @@ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -target-feature +sme -S -DTEST_SME -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -DTEST_SME -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include -#ifndef TEST_SME2 +#ifndef TEST_SME #define ATTR #else #define ATTR __arm_streaming_compatible @@ -92,89 +92,3 @@ svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { return svpsel_lane_b64(p1, p2, idx + 1); } - -// CHECK-LABEL: @test_svpsel_
[clang] [Clang][SME2] Fix PSEL builtin predicates (PR #77097)
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/77097 >From 0cea7a1c7d72493de5533815903aec868543d544 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 5 Jan 2024 11:44:53 + Subject: [PATCH 1/2] [Clang][SME2] Fix PSEL builtin predicates PSEL intrinsics which return a predicate-as-counter are available in SVE2p1 & SME2. --- clang/include/clang/Basic/arm_sve.td | 9 +- .../acle_sve2p1_psel.c| 92 +-- .../acle_sve2p1_psel_svcount.c| 110 ++ 3 files changed, 118 insertions(+), 93 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel_svcount.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 7f80fb0386cc77..ae1a154d63355a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1952,10 +1952,6 @@ def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [IsStreamin def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; } // Standalone sve2.1 builtins @@ -1979,6 +1975,11 @@ let TargetGuard = "sve2p1|sme2" in { def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_1>]>; +def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; + def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c index de3f6a9a57bfeb..7cf3d94ccd55d4 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c @@ -11,13 +11,13 @@ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -target-feature +sme -S -DTEST_SME -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -DTEST_SME -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include -#ifndef TEST_SME2 +#ifndef TEST_SME #define ATTR #else #define ATTR __arm_streaming_compatible @@ -92,89 +92,3 @@ svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { return svpsel_lane_b64(p1, p2, idx + 1); } - -// CHECK-LABEL: @test_svpsel_lane_c8( -// CHECK-NEXT: entry: -// CHECK-NEXT:[[ADD:%.*]] = add i32 [[IDX:%
[clang] [Clang][SME2] Fix PSEL builtin predicates (PR #77097)
@@ -1952,10 +1952,6 @@ def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [IsStreamin def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; kmclaughlin-arm wrote: I think it should use `IsStreamingOrSVE2p1` in both cases. https://github.com/llvm/llvm-project/pull/77097 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
https://github.com/kmclaughlin-arm edited https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
https://github.com/kmclaughlin-arm commented: Thanks for updating this @MDevereau, I think there are just a few more tests that should be included in this PR. https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
@@ -460,7 +460,7 @@ void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za { - SVE_ACLE_FUNC(svmla_single_za32,,_s16,,_vg2x4)(slice_base, zn, zm); + SVE_ACLE_FUNC(svmla,_single,_za32,_s16,_vg2x4)(slice_base, zn, zm); } kmclaughlin-arm wrote: The tests for the `svmla_lane` builtins in this file also need to be updated. https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
@@ -494,7 +494,7 @@ void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) _ // void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { - SVE_ACLE_FUNC(svmls_lane_za32,,_bf16,,_vg2x1)(slice_base, zn, zm, 7); + SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x1,,)(slice_base, zn, zm, 7); kmclaughlin-arm wrote: There is one extra `svmls_lane` test above this that also needs to be fixed :) https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Rename and enable boolean get, set, create and undef for sme2 (PR #77338)
@@ -1321,12 +1321,17 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; } -let TargetGuard = "sve2p1" in { - def SVGET_2_B : SInst<"svget2[_{d}]", "d2i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; - def SVGET_4_B : SInst<"svget4[_{d}]", "d4i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; +let TargetGuard = "sve2p1|sme2" in { + def SVGET_2_B : SInst<"svget2_b", "d2i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; + def SVGET_4_B : SInst<"svget4_b", "d4i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; kmclaughlin-arm wrote: Does the `_b` still need to be in square brackets? I think this part of the name is optional for get, set & create (but not undef). https://github.com/llvm/llvm-project/pull/77338 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Rename and enable boolean get, set, create and undef for sme2 (PR #77338)
@@ -0,0 +1,35 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK kmclaughlin-arm wrote: Please can you also add a line to test these builtins end to end? For example: `RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s` https://github.com/llvm/llvm-project/pull/77338 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Fix SME2 mla/mls tests (PR #76711)
https://github.com/kmclaughlin-arm approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/76711 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64] Add missing SME macros (PR #80293)
https://github.com/kmclaughlin-arm approved this pull request. https://github.com/llvm/llvm-project/pull/80293 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [Clang] Make sdot builtins available to SME (PR #77792)
https://github.com/kmclaughlin-arm approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/77792 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Fix PSEL builtin predicates (PR #77097)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/77097 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Rename and enable boolean get, set, create and undef for sme2 (PR #77338)
https://github.com/kmclaughlin-arm approved this pull request. https://github.com/llvm/llvm-project/pull/77338 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add missing IsStreamingCompatible flag to svget, svcreate & svset (PR #78430)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/78430 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Refactor diagnostics for SME builtins. (PR #78258)
@@ -342,331 +342,331 @@ let TargetGuard = "sme2" in { // let TargetGuard = "sme2" in { - def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVUSMOPS : Inst<"svmops_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; - def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; // VERTICAL DOT-PRODUCT - def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; - def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; - def SVVDOT_LANE_ZA32_VG1x2_U : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za32_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; - def SVVDOT_LANE_ZA32_VG1x4_U : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "Uc", MergeNone, "aarch64_sme_uvdot_lane_za32_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; - def SVVDOT_LANE_ZA32_VG1x2_F : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "hb", MergeNone, "aarch64_sme_fvdot_lane_za32_vg1x2", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; - def SVSUVDOT_LANE_ZA32_VG1x4 : Inst<"svsuvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_suvdot_lane_za32_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; - def SVUSVDOT_LANE_ZA32_VG1x4 : Inst<"svusvdot_lane_za32[_{d}]_vg1x4", "vm4di", "Uc", MergeNone, "aarch64_sme_usvdot_lane_za32_vg1x4", [IsStreaming, IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVVDOT_LANE_ZA32_VG1x2_U : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVVDOT_LANE_ZA32_VG1x4_U : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "Uc", MergeNone, "aarch64_sme_uvdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVVDOT_LANE_ZA32_VG1x2_F : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "hb", MergeNone, "aarch64_sme_fvdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVSUVDOT_LANE_ZA32_VG1x4 : Inst<"svsuvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_suvdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; + def SVUSVDOT_LANE_ZA32_VG1x4 : Inst<"svusvdot_lane_za32[_{d}]_vg1x4", "vm4di", "Uc", MergeNone, "aarch64_sme_usvdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; // Multi-vector signed & unsigned integer dot-product - def SVDOT_MULTI_ZA32_VG1x2_S : Inst<"svdot_za32[_{d}]_vg1x2", "vm22", "cs", MergeNone, "aarch64_sme_sdot_za32_vg1x2", [IsStreaming, IsSharedZA], []>; - def SVDOT_MULTI_ZA32_VG1x4_S : Inst<"svdot_za32[_{d}]_vg1x4", "vm44", "cs", M
[clang-tools-extra] [clang] [compiler-rt] [llvm] [flang] [libcxx] [lldb] [lld] [libc] [AArch64][SME2] Preserve ZT0 state around function calls (PR #78321)
https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/78321 >From 11dce217ed307601d0ea1eb5b016b47f80e67786 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 11 Jan 2024 17:46:00 + Subject: [PATCH 1/7] [SME2][Clang] Add tests with ZT0 state --- llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 125 + 1 file changed, 125 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sme-zt0-state.ll diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll new file mode 100644 index 00..ff560681665f8b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s + +; Callee with no ZT state +declare void @no_state_callee(); + +; Callees with ZT0 state +declare void @zt0_shared_callee() "aarch64_in_zt0"; + +; Callees with ZA state + +declare void @za_shared_callee() "aarch64_pstate_za_shared"; +declare void @za_zt0_shared_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0"; + +; +; Private-ZA Callee +; + +; Expect spill & fill of ZT0 around call +; Expect smstop/smstart za around call +define void @zt0_in_caller_no_state_callee() "aarch64_in_zt0" nounwind { +; CHECK-LABEL: zt0_in_caller_no_state_callee: +; CHECK: // %bb.0: +; CHECK-NEXT:str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT:bl no_state_callee +; CHECK-NEXT:ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT:ret + call void @no_state_callee(); + ret void; +} + +; Expect spill & fill of ZT0 around call +; Expect setup and restore lazy-save around call +; Expect smstart za after call +define void @za_zt0_shared_caller_no_state_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0" nounwind { +; CHECK-LABEL: za_zt0_shared_caller_no_state_callee: +; CHECK: // %bb.0: +; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT:mov x29, sp +; CHECK-NEXT:sub sp, sp, #16 +; CHECK-NEXT:rdsvl x8, #1 +; CHECK-NEXT:mov x9, sp +; CHECK-NEXT:msub x9, x8, x8, x9 +; CHECK-NEXT:mov sp, x9 +; CHECK-NEXT:sub x10, x29, #16 +; CHECK-NEXT:stur wzr, [x29, #-4] +; CHECK-NEXT:sturh wzr, [x29, #-6] +; CHECK-NEXT:stur x9, [x29, #-16] +; CHECK-NEXT:sturh w8, [x29, #-8] +; CHECK-NEXT:msr TPIDR2_EL0, x10 +; CHECK-NEXT:bl no_state_callee +; CHECK-NEXT:smstart za +; CHECK-NEXT:mrs x8, TPIDR2_EL0 +; CHECK-NEXT:sub x0, x29, #16 +; CHECK-NEXT:cbnz x8, .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT:bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT:msr TPIDR2_EL0, xzr +; CHECK-NEXT:mov sp, x29 +; CHECK-NEXT:ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT:ret + call void @no_state_callee(); + ret void; +} + +; +; Shared-ZA Callee +; + +; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required +define void @zt0_shared_caller_zt0_shared_callee() "aarch64_in_zt0" nounwind { +; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee: +; CHECK: // %bb.0: +; CHECK-NEXT:str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT:bl zt0_shared_callee +; CHECK-NEXT:ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT:ret + call void @zt0_shared_callee(); + ret void; +} + +; Expect spill & fill of ZT0 around call +define void @za_zt0_shared_caller_za_shared_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0" nounwind { +; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee: +; CHECK: // %bb.0: +; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT:mov x29, sp +; CHECK-NEXT:sub sp, sp, #16 +; CHECK-NEXT:rdsvl x8, #1 +; CHECK-NEXT:mov x9, sp +; CHECK-NEXT:msub x8, x8, x8, x9 +; CHECK-NEXT:mov sp, x8 +; CHECK-NEXT:stur wzr, [x29, #-4] +; CHECK-NEXT:sturh wzr, [x29, #-6] +; CHECK-NEXT:stur x8, [x29, #-16] +; CHECK-NEXT:bl za_shared_callee +; CHECK-NEXT:mov sp, x29 +; CHECK-NEXT:ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT:ret + call void @za_shared_callee(); + ret void; +} + +; Caller and callee have shared ZA & ZT0 +define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_pstate_za_shared" "aarch64_in_zt0" nounwind { +; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee: +; CHECK: // %bb.0: +; CHECK-NEXT:stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT:mov x29, sp +; CHECK-NEXT:sub sp, sp, #16 +; CHECK-NEXT:rdsvl x8, #1 +; CHECK-NEXT:mov x9, sp +; CHECK-NEXT:msub x8, x8, x8, x9 +; CHECK-NEXT:mov sp, x8 +; CHECK-NEXT:stur wzr, [x29, #-4] +; CHECK-NEXT:sturh wzr, [x29, #-6] +; CHECK-NEXT:stur x8, [x29, #-16] +; CHECK-NEXT:bl za
[clang-tools-extra] [clang] [compiler-rt] [llvm] [flang] [libcxx] [lldb] [lld] [libc] [AArch64][SME2] Preserve ZT0 state around function calls (PR #78321)
https://github.com/kmclaughlin-arm closed https://github.com/llvm/llvm-project/pull/78321 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME] Take arm_sme.h out of draft (PR #78961)
https://github.com/kmclaughlin-arm approved this pull request. https://github.com/llvm/llvm-project/pull/78961 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64] Add diagnostics for builtins that use ZT0. (PR #79140)
@@ -108,3 +108,11 @@ svint8_t new_za(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { // expected-no-warning return svread_hor_za8_s8_m(zd, pg, 0, slice_base); } + +void missing_zt0(void) __arm_streaming { + // expected-warning@+1 {{builtin call is not valid when calling from a function without active ZT0 state}} + svzero_zt(0); +} + +__arm_new("zt0") +void new_zt0(void) __arm_streaming { svzero_zt(0); } // no warning kmclaughlin-arm wrote: Can this be changed to: `// expected-no-warning` to match similar tests above? https://github.com/llvm/llvm-project/pull/79140 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
https://github.com/kmclaughlin-arm commented: Thank you for reviewing this @david-arm! https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
https://github.com/kmclaughlin-arm edited https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -9893,24 +9888,37 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +void CodeGenFunction::GetAArch64SMEProcessedOperands( +unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, +SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); - llvm::Type *Ty = ConvertType(E->getType()); - if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && - BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { -Value *Val = EmitScalarExpr(E->getArg(0)); -return EmitSVEReinterpret(Val, Ty); - } + bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); - llvm::SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { -if ((ICEArguments & (1 << i)) == 0) kmclaughlin-arm wrote: Added the following comment: ``` // Tuple set/get only requires one insert/extract vector, which is // created by EmitSVETupleSetOrGet. ``` https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -1016,29 +1021,24 @@ std::string Intrinsic::mangleName(ClassKind LocalCK) const { getMergeSuffix(); } -void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const { +void Intrinsic::emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const { bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1; std::string FullName = mangleName(ClassS); std::string ProtoName = mangleName(getClassKind()); std::string SMEAttrs = ""; - if (Flags & Emitter.getEnumValueForFlag("IsStreaming")) -SMEAttrs += ", arm_streaming"; - if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible")) -SMEAttrs += ", arm_streaming_compatible"; - if (Flags & Emitter.getEnumValueForFlag("IsSharedZA")) -SMEAttrs += ", arm_shared_za"; - if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA")) -SMEAttrs += ", arm_preserves_za"; - OS << (IsOverloaded ? "__aio " : "__ai ") - << "__attribute__((__clang_arm_builtin_alias(" - << (SMEAttrs.empty() ? "__builtin_sve_" : "__builtin_sme_") - << FullName << ")"; - if (!SMEAttrs.empty()) -OS << SMEAttrs; kmclaughlin-arm wrote: The attributes aren't currently handled elsewhere in Clang so this shouldn't have been removed. I've added most of this code back in, but kept the ACLEKind switch in below as this is needed when the builtin kind is SVE but it has SME attributes. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -10272,29 +10291,13 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); kmclaughlin-arm wrote: I've removed this, it wasn't needed here now that it's checked in GetAArch64SVEProcessedOperands. https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)
@@ -9893,24 +9888,37 @@ Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { return Call; } -Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +void CodeGenFunction::GetAArch64SMEProcessedOperands( kmclaughlin-arm wrote: I think this makes sense, I've changed it to GetAArch64SVEProcessedOperands https://github.com/llvm/llvm-project/pull/69725 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits