r305551 - Expand vector oparation to as IR constants, PR28129.

2017-06-16 Thread Dinar Temirbulatov via cfe-commits
Author: dinar
Date: Fri Jun 16 07:09:52 2017
New Revision: 305551

URL: http://llvm.org/viewvc/llvm-project?rev=305551&view=rev
Log:
Expand vector oparation to as IR constants, PR28129.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=305551&r1=305550&r2=305551&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 16 07:09:52 2017
@@ -7923,6 +7923,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 }
 
 // We can't handle 8-31 immediates with native IR, use the intrinsic.
+// Except for predicates that create constants.
 Intrinsic::ID ID;
 switch (BuiltinID) {
 default: llvm_unreachable("Unsupported intrinsic!");
@@ -7930,12 +7931,32 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   ID = Intrinsic::x86_sse_cmp_ps;
   break;
 case X86::BI__builtin_ia32_cmpps256:
+  // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+  // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+  if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
+ Value *Constant = (CC == 0xf || CC == 0x1f) ?
+llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
+llvm::Constant::getNullValue(Builder.getInt32Ty());
+ Value *Vec = Builder.CreateVectorSplat(
+Ops[0]->getType()->getVectorNumElements(), Constant);
+ return Builder.CreateBitCast(Vec, Ops[0]->getType());
+  }
   ID = Intrinsic::x86_avx_cmp_ps_256;
   break;
 case X86::BI__builtin_ia32_cmppd:
   ID = Intrinsic::x86_sse2_cmp_pd;
   break;
 case X86::BI__builtin_ia32_cmppd256:
+  // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+  // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+  if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
+ Value *Constant = (CC == 0xf || CC == 0x1f) ?
+llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
+llvm::Constant::getNullValue(Builder.getInt64Ty());
+ Value *Vec = Builder.CreateVectorSplat(
+Ops[0]->getType()->getVectorNumElements(), Constant);
+ return Builder.CreateBitCast(Vec, Ops[0]->getType());
+  }
   ID = Intrinsic::x86_avx_cmp_pd_256;
   break;
 }

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=305551&r1=305550&r2=305551&view=diff
==
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Fri Jun 16 07:09:52 2017
@@ -1427,3 +1427,51 @@ float test_mm256_cvtss_f32(__m256 __a)
  // CHECK: extractelement <8 x float> %{{.*}}, i32 0
  return _mm256_cvtss_f32(__a);
 }
+
+__m256 test_mm256_cmp_ps_true(__m256 a, __m256 b) {
+ // CHECK-LABEL: @test_mm256_cmp_ps_true
+ // CHECK: store <8 x float>   zeroinitializer, <8 x float>* %tmp, align 32
+ return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
+}
+
+__m256 test_mm256_cmp_pd_false(__m256 a, __m256 b) {
+ // CHECK-LABEL: @test_mm256_cmp_pd_false
+ // CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32
+  return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ);
+}
+
+__m256 test_mm256_cmp_ps_strue(__m256 a, __m256 b) {
+ // CHECK-LABEL: @test_mm256_cmp_ps_strue
+ // CHECK: store <8 x float>   zeroinitializer, <8 x float>* %tmp, align 32
+ return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
+}
+
+__m256 test_mm256_cmp_pd_sfalse(__m256 a, __m256 b) {
+ // CHECK-LABEL: @test_mm256_cmp_pd_sfalse
+ // CHECK: store <4 x double> zeroinitializer, <4 x double>* %tmp, align 32
+  return _mm256_cmp_pd(a, b, _CMP_FALSE_OS);
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add multi-vector add/sub builtins (PR #69725)

2023-11-07 Thread Dinar Temirbulatov via cfe-commits


@@ -257,7 +257,7 @@ class ImmCheck {
 }
 
 class Inst ft, list ch, MemEltType met> {
+   list ft, list ch, MemEltType met = 
MemEltTyDefault> {

dtemirbulatov wrote:

I am not sure about propose of this change "met = MemEltTyDefault"?  The last 
parameter in multiclass ZAAddSub with Inst class usage is always []?

https://github.com/llvm/llvm-project/pull/69725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)

2023-11-10 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/71707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable CLAMP multi-vector builtins for SME2 (PR #72272)

2023-11-14 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/72272

Thing change add builtins for SME2:
sclamp.single.x2
uclamp.single.x2
fclamp.single.x2
sclamp.single.x4
uclamp.single.x4
fclamp.single.x4

Patch by: Hassnaa Hamdi 

>From 3baa1da7fac4d628e2d92b1db5afeac34b5acb28 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Tue, 14 Nov 2023 15:25:45 +
Subject: [PATCH] [AArch64][SME2] Enable CLAMP multi-vector builtins for SME2

Thing change add builtins for SME2:
sclamp.single.x2
uclamp.single.x2
fclamp.single.x2
sclamp.single.x4
uclamp.single.x4
fclamp.single.x4

Patch by: Hassnaa Hamdi 
---
 clang/include/clang/Basic/arm_sve.td  |  10 +
 .../aarch64-sme2-intrinsics/acle_sme2_clamp.c | 747 ++
 2 files changed, 757 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..1d03a3c9d99f9bf 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2007,6 +2007,16 @@ let TargetGuard = "sme2" in {
   defm MIN_MULTI_X4  : MinMaxIntr<"min", "","x4", "444">;
 }
 
+let TargetGuard = "sme2" in {
+  def SVSCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "csil", 
MergeNone, "aarch64_sve_sclamp_single_x2",  [IsStreaming], []>;
+  def SVUCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "UcUsUiUl", 
MergeNone, "aarch64_sve_uclamp_single_x2",  [IsStreaming], []>;
+  def SVFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x2",  [IsStreaming], []>;
+
+  def SVSCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "csil", 
MergeNone, "aarch64_sve_sclamp_single_x4",  [IsStreaming], []>;
+  def SVUCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "UcUsUiUl", 
MergeNone, "aarch64_sve_uclamp_single_x4",  [IsStreaming], []>;
+  def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
+}
+
 let TargetGuard = "sme2" in {
 // == ADD (vectors) ==
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c
new file mode 100644
index 000..4dce1699c264fbf
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c
@@ -0,0 +1,747 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve \
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// SCLAMP_X2
+
+// CHECK-LABEL: @test_svclamp_single_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[OP1]], i64 16)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { ,  } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[TMP0]], 
 [[TMP1]],  [[OP2:%.*]],  
[[OP3:%.*]])
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { ,  } [[TMP2]], 0
+// CHECK-NEXT:[[TMP4:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( poison,  [[TMP3]], i64 0)
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { ,  } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]],  [[TMP5]], i64 16)
+// CHECK-NEXT:ret  [[TMP6]]

[clang] [AArch64][SME2] Add multi-vector builtins for cvt (PR #74450)

2023-12-06 Thread Dinar Temirbulatov via cfe-commits


@@ -0,0 +1,242 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1  -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -target-feature +bf16 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s

dtemirbulatov wrote:

I used **-target-feature +b16b16** for BF16 support, not sure now how correct 
now, I am waiting for Carol to resolve a correct nameing after that I can 
rebase my change. 

https://github.com/llvm/llvm-project/pull/74450
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-12-06 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72827

>From 7aa69c9cb936b3883de7922f72ed9417be5a16f5 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 20 Nov 2023 07:04:18 +
Subject: [PATCH 1/2] [AArch64][SME2] Add PEXT, PSEL builtins for SME2

This change enables PEXT, PSEL builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  |  30 +--
 .../acle_sve2p1_pext.c| 207 +-
 .../acle_sve2p1_psel.c|   5 +
 3 files changed, 218 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f..96c5ac366574b 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1859,19 +1859,28 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", 
"UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, 
"aarch64_sve_bgrp_x">;
 }
 
+let TargetGuard = "sve2p1|sme" in {
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", 
[IsStreamingCompatible], []>;
+}
+
+let TargetGuard = "sve2p1|sme2" in {
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [IsStreamingCompatible], []>;
+
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck0_1>]>;
+}
+
 let TargetGuard = "sve2p1" in {
 def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
-
-def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -1971,11 +1980,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
-
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index fe15d5a9db81f..76603e384b99c 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-targ

[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)

2023-12-07 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/74720

Add builtin: 'svreinterpret_b' to cast from svcount_t to svbool_t.
Add builtin: 'svreinterpret_c'  to cast from svbool_t  to svcount_t.

Patch by: Hassnaa Hamdi 

>From 837a5240ab812eb609bd1db446035a21f42d22e3 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 7 Dec 2023 14:46:11 +
Subject: [PATCH] [AArch64][SME2] Add builtins to cast svbool from/to svcount.

Add builtin: 'svreinterpret_b' to cast from svcount_t to svbool_t.
Add builtin: 'svreinterpret_c'  to cast from svbool_t  to svcount_t.

Patch by: Hassnaa Hamdi 
---
 clang/include/clang/Basic/arm_sve.td  |  3 ++
 clang/lib/CodeGen/CGBuiltin.cpp   | 16 +++
 .../acle_sme2_reinterpret_svcount_svbool.c| 47 +++
 3 files changed, 66 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3f69a3df9e616..ec68489effefa 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2130,6 +2130,9 @@ let TargetGuard = "sme2" in {
   def SVURSHL_X2 : SInst<"svrshl[_{d}_x2]", "222", "UcUsUiUl", MergeNone, 
"aarch64_sve_urshl_x2", [IsStreaming], []>;
   def SVSRSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "csil", MergeNone, 
"aarch64_sve_srshl_x4", [IsStreaming], []>;
   def SVURSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "UcUsUiUl", MergeNone, 
"aarch64_sve_urshl_x4", [IsStreaming], []>;
+
+  def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", 
MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>;
+  def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", 
MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061..b1d8edcd99123 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10214,6 +10214,22 @@ Value 
*CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
   switch (BuiltinID) {
   default:
 return nullptr;
+
+  case SVE::BI__builtin_sve_svreinterpret_b: {
+auto SVCountTy =
+llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+Function *CastFromSVCountF =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
+return Builder.CreateCall(CastFromSVCountF, Ops[0]);
+  }
+  case SVE::BI__builtin_sve_svreinterpret_c: {
+auto SVCountTy =
+llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+Function *CastToSVCountF =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, 
SVCountTy);
+return Builder.CreateCall(CastToSVCountF, Ops[0]);
+  }
+
   case SVE::BI__builtin_sve_svpsel_lane_b8:
   case SVE::BI__builtin_sve_svpsel_lane_b16:
   case SVE::BI__builtin_sve_svpsel_lane_b32:
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
new file mode 100644
index 0..82b30e8bbe9b9
--- /dev/null
+++ 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.§
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: @test_svreinterpret_svbool_svcnt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") 
[[CNT:%.*]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CPP-CHECK-LABEL: @

[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-12-07 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72827

>From 7aa69c9cb936b3883de7922f72ed9417be5a16f5 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 20 Nov 2023 07:04:18 +
Subject: [PATCH 1/3] [AArch64][SME2] Add PEXT, PSEL builtins for SME2

This change enables PEXT, PSEL builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  |  30 +--
 .../acle_sve2p1_pext.c| 207 +-
 .../acle_sve2p1_psel.c|   5 +
 3 files changed, 218 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f..96c5ac366574b 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1859,19 +1859,28 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", 
"UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, 
"aarch64_sve_bgrp_x">;
 }
 
+let TargetGuard = "sve2p1|sme" in {
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", 
[IsStreamingCompatible], []>;
+}
+
+let TargetGuard = "sve2p1|sme2" in {
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [IsStreamingCompatible], []>;
+
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck0_1>]>;
+}
+
 let TargetGuard = "sve2p1" in {
 def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
-
-def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -1971,11 +1980,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
-
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index fe15d5a9db81f..76603e384b99c 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-targ

[clang] [AArch64][SME2] Add _x2/_x4 svqrshr builtins. (PR #74100)

2023-12-07 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/74100
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)

2023-12-07 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/74720

>From 837a5240ab812eb609bd1db446035a21f42d22e3 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 7 Dec 2023 14:46:11 +
Subject: [PATCH 1/2] [AArch64][SME2] Add builtins to cast svbool from/to
 svcount.

Add builtin: 'svreinterpret_b' to cast from svcount_t to svbool_t.
Add builtin: 'svreinterpret_c'  to cast from svbool_t  to svcount_t.

Patch by: Hassnaa Hamdi 
---
 clang/include/clang/Basic/arm_sve.td  |  3 ++
 clang/lib/CodeGen/CGBuiltin.cpp   | 16 +++
 .../acle_sme2_reinterpret_svcount_svbool.c| 47 +++
 3 files changed, 66 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3f69a3df9e616..ec68489effefa 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2130,6 +2130,9 @@ let TargetGuard = "sme2" in {
   def SVURSHL_X2 : SInst<"svrshl[_{d}_x2]", "222", "UcUsUiUl", MergeNone, 
"aarch64_sve_urshl_x2", [IsStreaming], []>;
   def SVSRSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "csil", MergeNone, 
"aarch64_sve_srshl_x4", [IsStreaming], []>;
   def SVURSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "UcUsUiUl", MergeNone, 
"aarch64_sve_urshl_x4", [IsStreaming], []>;
+
+  def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", 
MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>;
+  def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", 
MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061..b1d8edcd99123 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10214,6 +10214,22 @@ Value 
*CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
   switch (BuiltinID) {
   default:
 return nullptr;
+
+  case SVE::BI__builtin_sve_svreinterpret_b: {
+auto SVCountTy =
+llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+Function *CastFromSVCountF =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
+return Builder.CreateCall(CastFromSVCountF, Ops[0]);
+  }
+  case SVE::BI__builtin_sve_svreinterpret_c: {
+auto SVCountTy =
+llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+Function *CastToSVCountF =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, 
SVCountTy);
+return Builder.CreateCall(CastToSVCountF, Ops[0]);
+  }
+
   case SVE::BI__builtin_sve_svpsel_lane_b8:
   case SVE::BI__builtin_sve_svpsel_lane_b16:
   case SVE::BI__builtin_sve_svpsel_lane_b32:
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
new file mode 100644
index 0..82b30e8bbe9b9
--- /dev/null
+++ 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.§
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: @test_svreinterpret_svbool_svcnt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") 
[[CNT:%.*]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z31test_svreinterpret_svbool_svcntu11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.convert.to.

[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-12-08 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72827

>From 7aa69c9cb936b3883de7922f72ed9417be5a16f5 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 20 Nov 2023 07:04:18 +
Subject: [PATCH 1/4] [AArch64][SME2] Add PEXT, PSEL builtins for SME2

This change enables PEXT, PSEL builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  |  30 +--
 .../acle_sve2p1_pext.c| 207 +-
 .../acle_sve2p1_psel.c|   5 +
 3 files changed, 218 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4..96c5ac366574bf 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1859,19 +1859,28 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", 
"UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, 
"aarch64_sve_bgrp_x">;
 }
 
+let TargetGuard = "sve2p1|sme" in {
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", 
[IsStreamingCompatible], []>;
+}
+
+let TargetGuard = "sve2p1|sme2" in {
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [IsStreamingCompatible], []>;
+
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck0_1>]>;
+}
+
 let TargetGuard = "sve2p1" in {
 def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
-
-def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -1971,11 +1980,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
-
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index fe15d5a9db81f2..76603e384b99ca 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-

[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)

2023-12-08 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/74720

>From e54baef2fe3f6249464ba55dcd18809a5018b146 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 7 Dec 2023 14:46:11 +
Subject: [PATCH 1/2] [AArch64][SME2] Add builtins to cast svbool from/to
 svcount.

Add builtin: 'svreinterpret_b' to cast from svcount_t to svbool_t.
Add builtin: 'svreinterpret_c'  to cast from svbool_t  to svcount_t.

Patch by: Hassnaa Hamdi 
---
 clang/include/clang/Basic/arm_sve.td  |  3 ++
 clang/lib/CodeGen/CGBuiltin.cpp   | 16 +++
 .../acle_sme2_reinterpret_svcount_svbool.c| 47 +++
 3 files changed, 66 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index ba338ee6331e8..5ed4914dc5b52 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2161,6 +2161,9 @@ let TargetGuard = "sme2" in {
   def SVSQRSHRU_X4 : SInst<"svqrshru[_n]_{0}[_{d}_x4]", "b4i", "il", 
MergeNone, "aarch64_sve_sqrshru_x4", [IsStreaming], [ImmCheck<1, 
ImmCheckShiftRight, 0>]>;
 
   def SVSQRSHRUN_X4 : SInst<"svqrshrun[_n]_{0}[_{d}_x4]", "b4i", "il", 
MergeNone, "aarch64_sve_sqrshrun_x4", [IsStreaming], [ImmCheck<1, 
ImmCheckShiftRight, 0>]>;
+
+  def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", 
MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>;
+  def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", 
MergeNone, "", [IsStreamingCompatible], [], MemEltTyDefault>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0d8b3e4aaad47..83d0a72aac549 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10211,6 +10211,22 @@ Value 
*CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
   switch (BuiltinID) {
   default:
 return nullptr;
+
+  case SVE::BI__builtin_sve_svreinterpret_b: {
+auto SVCountTy =
+llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+Function *CastFromSVCountF =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
+return Builder.CreateCall(CastFromSVCountF, Ops[0]);
+  }
+  case SVE::BI__builtin_sve_svreinterpret_c: {
+auto SVCountTy =
+llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+Function *CastToSVCountF =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, 
SVCountTy);
+return Builder.CreateCall(CastToSVCountF, Ops[0]);
+  }
+
   case SVE::BI__builtin_sve_svpsel_lane_b8:
   case SVE::BI__builtin_sve_svpsel_lane_b16:
   case SVE::BI__builtin_sve_svpsel_lane_b32:
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
new file mode 100644
index 0..82b30e8bbe9b9
--- /dev/null
+++ 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.§
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: @test_svreinterpret_svbool_svcnt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") 
[[CNT:%.*]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z31test_svreinterpret_svbool_svcntu11__SVCount_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(targ

[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)

2023-12-08 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/74720
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-12-11 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72827

>From ae1b183325c08ec8eb1cb82977591563410c0361 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 20 Nov 2023 07:04:18 +
Subject: [PATCH 1/5] [AArch64][SME2] Add PEXT, PSEL builtins for SME2

This change enables PEXT, PSEL builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  |  30 +--
 .../acle_sve2p1_pext.c| 207 +-
 .../acle_sve2p1_psel.c|   5 +
 3 files changed, 218 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 85656c00c5b3e..acba13bcc14c5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1935,16 +1935,25 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", 
"UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, 
"aarch64_sve_bgrp_x">;
 }
 
-let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
+let TargetGuard = "sve2p1|sme" in {
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", 
[IsStreamingCompatible], []>;
+}
 
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
+let TargetGuard = "sve2p1|sme2" in {
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [IsStreamingCompatible], []>;
 
-def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [], []>;
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sve2p1" in {
+def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -2045,11 +2054,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
-
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index fe15d5a9db81f..76603e384b99c 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - -x c++ %s 

[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-12-11 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/72827
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-11-15 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/72487

This change enables FCLAMP, FCLAMP_BF16, CNTP builtins for SME2 target.

>From dc691934814029de64494272697d562ddb86dfee Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, FCLAMP_BF16, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 10 +++---
 .../acle_sve2p1_bfclamp.c | 35 +++
 .../acle_sve2p1_cntp.c|  3 ++
 .../acle_sve2p1_fclamp.c  |  8 +
 4 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..5947a1a1054cc16 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1860,7 +1860,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", 
MergeNone, "aarch64_sv
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
@@ -1970,17 +1969,20 @@ def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", 
"dd$$i", "f", MergeNone, "aar
 let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
-
 def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
 def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
 def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
 def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
+let TargetGuard = "sve2p1|sme2" in {
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
+}
+
 

 // SME2
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
new file mode 100644
index 000..4cd5627eb5c4dd8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
@@ -0,0 +1,35 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -target-feature +bf16 -S 

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-11-16 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From dc691934814029de64494272697d562ddb86dfee Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH 1/2] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, FCLAMP_BF16, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 10 +++---
 .../acle_sve2p1_bfclamp.c | 35 +++
 .../acle_sve2p1_cntp.c|  3 ++
 .../acle_sve2p1_fclamp.c  |  8 +
 4 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..5947a1a1054cc16 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1860,7 +1860,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", 
MergeNone, "aarch64_sv
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
@@ -1970,17 +1969,20 @@ def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", 
"dd$$i", "f", MergeNone, "aar
 let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
-
 def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
 def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
 def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
 def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
+let TargetGuard = "sve2p1|sme2" in {
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
+}
+
 

 // SME2
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
new file mode 100644
index 000..4cd5627eb5c4dd8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
@@ -0,0 +1,35 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -target-feature +bf16 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
me

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-11-16 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From dc691934814029de64494272697d562ddb86dfee Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH 1/3] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, FCLAMP_BF16, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 10 +++---
 .../acle_sve2p1_bfclamp.c | 35 +++
 .../acle_sve2p1_cntp.c|  3 ++
 .../acle_sve2p1_fclamp.c  |  8 +
 4 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..5947a1a1054cc16 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1860,7 +1860,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", 
MergeNone, "aarch64_sv
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
@@ -1970,17 +1969,20 @@ def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", 
"dd$$i", "f", MergeNone, "aar
 let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
-
 def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
 def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
 def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
 def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
+let TargetGuard = "sve2p1|sme2" in {
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
+}
+
 

 // SME2
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
new file mode 100644
index 000..4cd5627eb5c4dd8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
@@ -0,0 +1,35 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -target-feature +bf16 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
me

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-11-17 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From dc691934814029de64494272697d562ddb86dfee Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH 1/4] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, FCLAMP_BF16, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 10 +++---
 .../acle_sve2p1_bfclamp.c | 35 +++
 .../acle_sve2p1_cntp.c|  3 ++
 .../acle_sve2p1_fclamp.c  |  8 +
 4 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..5947a1a1054cc16 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1860,7 +1860,6 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", 
MergeNone, "aarch64_sv
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
@@ -1970,17 +1969,20 @@ def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", 
"dd$$i", "f", MergeNone, "aar
 let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
-
 def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
 def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
 def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
 def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
+let TargetGuard = "sve2p1|sme2" in {
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
+}
+
 

 // SME2
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
new file mode 100644
index 000..4cd5627eb5c4dd8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfclamp.c
@@ -0,0 +1,35 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 
-target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -target-feature +bf16 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
me

[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-11-19 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/72827

This change enables PEXT, PSEL builtins for SME2 target.

>From 7aa69c9cb936b3883de7922f72ed9417be5a16f5 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 20 Nov 2023 07:04:18 +
Subject: [PATCH] [AArch64][SME2] Add PEXT, PSEL builtins for SME2

This change enables PEXT, PSEL builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  |  30 +--
 .../acle_sve2p1_pext.c| 207 +-
 .../acle_sve2p1_psel.c|   5 +
 3 files changed, 218 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 40e474d5f0a8f4e..96c5ac366574bf2 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1859,19 +1859,28 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", 
"UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, 
"aarch64_sve_bgrp_x">;
 }
 
+let TargetGuard = "sve2p1|sme" in {
+def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", 
[IsStreamingCompatible], []>;
+def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", 
[IsStreamingCompatible], []>;
+}
+
+let TargetGuard = "sve2p1|sme2" in {
+def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [IsStreamingCompatible], []>;
+def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [IsStreamingCompatible], []>;
+
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck0_1>]>;
+}
+
 let TargetGuard = "sve2p1" in {
 def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
 def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone]>;
 
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_pext", [], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", 
MergeNone, "aarch64_sve_pext_x2", [], [ImmCheck<1, ImmCheck0_1>]>;
-
-def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8",  "}}Pm", "Pc", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, 
"", [], []>;
-def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, 
"", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -1971,11 +1980,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVPSEL_B : SInst<"svpsel_lane_b8",  "PPPm", "Pc", MergeNone, "", [], []>;
-def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [], []>;
-def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [], []>;
-def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [], []>;
-
 def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index fe15d5a9db81f2f..76603e384b99ca3 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/

[llvm] [compiler-rt] [clang] [AArch64][SME] Add support for sme-fa64 (PR #70809)

2023-11-03 Thread Dinar Temirbulatov via cfe-commits

dtemirbulatov wrote:

LGTM.

https://github.com/llvm/llvm-project/pull/70809
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [llvm] [AArch64][SME] Add support for sme-fa64 (PR #70809)

2023-11-03 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/70809
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-12-12 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From 6e2c1015c6cf6b226bc9e28c563167e07b9c4074 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 7 +++
 .../CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c  | 3 +++
 .../aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c| 8 
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index aa9b105364a51..e10076a766385 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1953,8 +1953,6 @@ def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", 
"QcQsQiQl", MergeNone,
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -2054,8 +2052,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
@@ -2064,6 +2060,9 @@ let TargetGuard = "sve2p1|sme2" in {
   def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>;
 
   def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone, IsStreamingCompatible]>;
+
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
 }
 
 let TargetGuard = "sve2p1,b16b16" in {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
index 18973a6467450..01b995c1b0583 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
@@ -3,6 +3,9 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index a9f482cab3969..3adb28b8cc71b 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -8,6 +8,14 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fcl

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-12-12 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov edited 
https://github.com/llvm/llvm-project/pull/72487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SQRSHRN, UQRSHRN, SQRSHRUN builtins for SME2, SVE2p1 (PR #75325)

2023-12-13 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/75325

Add SQRSHRN, UQRSHRN, SQRSHRUN builtins for SME2, SVE2p1.

>From 83d1e213a5dce7621f8f87668a4ab1f15306fc5b Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Wed, 13 Dec 2023 11:28:06 +
Subject: [PATCH] [AArch64][SME2] Add SQRSHRN, UQRSHRN, SQRSHRUN builtins for
 SME2, SVE2p1

Add SQRSHRN, UQRSHRN, SQRSHRUN builtins for SME2, SVE2p1.
---
 clang/include/clang/Basic/arm_sve.td  |  9 +++
 .../acle_sve2p1_qrshr.c   | 78 +++
 2 files changed, 87 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index aa9b105364a51a..4e9e318e7ade85 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2170,6 +2170,15 @@ let TargetGuard = "sme2" in {
   def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", 
MergeNone, "", [IsStreamingCompatible], []>;
 }
 
+let TargetGuard = "sve2p1|sme2" in {
+  // SQRSHRN / UQRSHRN
+  def SVQRSHRN_X2   : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "i",
MergeNone, "aarch64_sve_sqrshrn_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck1_16>]>;
+  def SVUQRSHRN_X2  : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Ui",   
MergeNone, "aarch64_sve_uqrshrn_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck1_16>]>;
+
+  // SQRSHRUN
+  def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i",  
MergeNone, "aarch64_sve_sqrshrun_x2", [IsStreamingCompatible], [ImmCheck<1, 
ImmCheck1_16>]>;
+}
+
 let TargetGuard = "sve2p1" in {
   // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
   def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfd", MergeNone, 
"aarch64_sve_zipq1", [], []>;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c
new file mode 100644
index 00..8e8b7203148934
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c
@@ -0,0 +1,78 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S  -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S  
-passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone 
-Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+
+// SQRSHRN x 2
+
+// CHECK-LABEL: @test_svqrshrn_s16_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call  
@llvm.aarch64.sve.sqrshrn.x2.nxv4i32( [[TMP0]],  [[TMP1]], i32 16)
+// CHECK-NEXT:ret  [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svqrshrn_s16_s32_x211svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0)
+// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4)
+// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call  
@llvm.aarch64.sve.sqrshrn.x2.nxv4i32( [[TMP0]],  [[TMP1]], i32 16)
+// CPP-CHECK-NEXT:ret  [[TMP2]]
+//
+svint16_t test_svqrshrn_s16_s32_x2(svint32x2_t zn) __arm_streaming_compatible {
+  return SVE_ACLE_FUNC(svqrshrn

[clang] [AArch64][SME2] Add builtins for SQDMULH (PR #75326)

2023-12-13 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/75326

Patch by: Kerry McLaughlin 

>From 2b0d280b59e244600ebc4b8373ab5494b88942df Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Wed, 13 Dec 2023 12:00:11 +
Subject: [PATCH] [AArch64][SME2] Add builtins for SQDMULH

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  |   6 +
 .../acle_sme2_sqdmulh.c   | 584 ++
 2 files changed, 590 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index aa9b105364a51a..7f46f20ba7a1a7 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2168,6 +2168,12 @@ let TargetGuard = "sme2" in {
 
   def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", 
MergeNone, "", [IsStreamingCompatible], []>;
   def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", 
MergeNone, "", [IsStreamingCompatible], []>;
+
+  // SQDMULH
+  def SVSQDMULH_SINGLE_X2 : SInst<"svqdmulh[_single_{d}_x2]", "22d", "csil", 
MergeNone, "aarch64_sve_sqdmulh_single_vgx2", [IsStreaming], []>;
+  def SVSQDMULH_SINGLE_X4 : SInst<"svqdmulh[_single_{d}_x4]", "44d", "csil", 
MergeNone, "aarch64_sve_sqdmulh_single_vgx4", [IsStreaming], []>;
+  def SVSQDMULH_X2: SInst<"svqdmulh[_{d}_x2]","222", "csil", 
MergeNone, "aarch64_sve_sqdmulh_vgx2",[IsStreaming], []>;
+  def SVSQDMULH_X4: SInst<"svqdmulh[_{d}_x4]","444", "csil", 
MergeNone, "aarch64_sve_sqdmulh_vgx4",[IsStreaming], []>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c
new file mode 100644
index 00..6bbd23ccd32a52
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c
@@ -0,0 +1,584 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// Single, x2
+
+// CHECK-LABEL: @test_svqdmulh_single_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { ,  } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( 
[[TMP0]],  [[TMP1]],  [[ZM:%.*]])
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { ,  } [[TMP2]], 0
+// CHECK-NEXT:[[TMP4:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( poison,  [[TMP3]], i64 0)
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { ,  } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]],  [[TMP5]], i64 16)
+// CHECK-NEXT:ret  [[TMP6]]
+//
+// CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x210svint8x2_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0)
+// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16)
+// CPP-CHECK-NEXT:[[TMP2:%.*]] = tail call { ,  } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( 
[[TMP0]],  [[TMP1]],  [[ZM:%.*]])
+// CPP-CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , 
 } [[TMP2]], 0
+// CPP-CHECK-NEXT:[[TMP4:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( poison,  [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:[[TMP5:%.*]] = extractvalue { , 
 } [[TMP2]], 1
+// CPP-CHECK-NEXT:[[TMP6:%.*]] = tai

[clang] [AArch64][SME2] Add builtins for SQDMULH (PR #75326)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/75326
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SQRSHRN, UQRSHRN, SQRSHRUN builtins for SME2, SVE2p1 (PR #75325)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/75325
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add multi-vector zip & unzip builtins (PR #74841)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.

LGTM, with David's request to rename acle_sme2_* acle_sme2_vector_* tests in 
clang/test/CodeGen/aarch64-sme2-intrinsics

https://github.com/llvm/llvm-project/pull/74841
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64]Add QCVTN builtin to SVE2.1 (PR #75454)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits


@@ -0,0 +1,78 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py

dtemirbulatov wrote:

Hmm, I could only see aarch64_sve_sqcvtn_x2 tests here, can you add 
aarch64_sve_sqcvtn_x4 as well?

https://github.com/llvm/llvm-project/pull/75454
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64]Add QCVTN builtin to SVE2.1 (PR #75454)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits


@@ -0,0 +1,78 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py

dtemirbulatov wrote:

oh, sorry, I see now.

https://github.com/llvm/llvm-project/pull/75454
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64]Add QCVTN builtin to SVE2.1 (PR #75454)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/75454
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64]Add QCVTN builtin to SVE2.1 (PR #75454)

2023-12-14 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov edited 
https://github.com/llvm/llvm-project/pull/75454
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-12-15 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From 6e2c1015c6cf6b226bc9e28c563167e07b9c4074 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH 1/2] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 7 +++
 .../CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c  | 3 +++
 .../aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c| 8 
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index aa9b105364a51a..e10076a7663856 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1953,8 +1953,6 @@ def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", 
"QcQsQiQl", MergeNone,
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -2054,8 +2052,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
@@ -2064,6 +2060,9 @@ let TargetGuard = "sve2p1|sme2" in {
   def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>;
 
   def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone, IsStreamingCompatible]>;
+
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
 }
 
 let TargetGuard = "sve2p1,b16b16" in {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
index 18973a6467450a..01b995c1b05833 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
@@ -3,6 +3,9 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index a9f482cab39698..3adb28b8cc71bf 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -8,6 +8,14 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clan

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-12-15 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From 6e2c1015c6cf6b226bc9e28c563167e07b9c4074 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH 1/3] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 7 +++
 .../CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c  | 3 +++
 .../aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c| 8 
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index aa9b105364a51a..e10076a7663856 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1953,8 +1953,6 @@ def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", 
"QcQsQiQl", MergeNone,
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -2054,8 +2052,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
@@ -2064,6 +2060,9 @@ let TargetGuard = "sve2p1|sme2" in {
   def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>;
 
   def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone, IsStreamingCompatible]>;
+
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
 }
 
 let TargetGuard = "sve2p1,b16b16" in {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
index 18973a6467450a..01b995c1b05833 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
@@ -3,6 +3,9 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index a9f482cab39698..3adb28b8cc71bf 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -8,6 +8,14 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clan

[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-15 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,223 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+}
+
+// FMLAL/FMLSL/UMLAL/SMLAL
+// SMLALL/UMLALL/USMLALL/SUMLALL
+let TargetGuard = "sme2" in {
+  // MULTI MLAL
+  def SVMLAL_MULTI_VG2x2_F16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "bh", 
MergeNone, "aarch64_sme_fmlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_F16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "bh", 
MergeNone, "aarch64_sme_fmlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_S16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "s", 
MergeNone, "aarch64_sme_smlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_S16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "s", 
MergeNone, "aarch64_sme_smlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_U16 : Inst<"svmla_za32[_{d}]_vg2x2", "v

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-12-15 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72487

>From 6e2c1015c6cf6b226bc9e28c563167e07b9c4074 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Thu, 16 Nov 2023 07:21:17 +
Subject: [PATCH 1/4] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2

This change enables FCLAMP, CNTP builtins for SME2 target.
---
 clang/include/clang/Basic/arm_sve.td  | 7 +++
 .../CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c  | 3 +++
 .../aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c| 8 
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index aa9b105364a51a..e10076a7663856 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1953,8 +1953,6 @@ def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", 
"QcQsQiQl", MergeNone,
 }
 
 let TargetGuard = "sve2p1" in {
-def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [], []>;
-
 def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILELE_COUNT  : SInst<"svwhilele_{d}",  "}lli", "QcQsQiQl", MergeNone, 
"aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
@@ -2054,8 +2052,6 @@ let TargetGuard = "sve2p1" in {
 def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
 def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
 
-def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
-
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
 }
 
@@ -2064,6 +2060,9 @@ let TargetGuard = "sve2p1|sme2" in {
   def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, 
"aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>;
 
   def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", 
[IsOverloadNone, IsStreamingCompatible]>;
+
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "", "hfd", MergeNone, 
"aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, 
"aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, 
ImmCheck2_4_Mul2>]>;
 }
 
 let TargetGuard = "sve2p1,b16b16" in {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
index 18973a6467450a..01b995c1b05833 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
@@ -3,6 +3,9 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index a9f482cab39698..3adb28b8cc71bf 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -8,6 +8,14 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clan

[clang] [AArch64][SME2] Add FCLAMP, CNTP builtins for SME2 (PR #72487)

2023-12-16 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/72487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Enable multi-vector loads & stores for SME2 (PR #75821)

2023-12-18 Thread Dinar Temirbulatov via cfe-commits

dtemirbulatov wrote:

> Should these builtins be `IsStreamingCompatible` until we add 
> `IsStreamingOrSVE2p1`?

yes, it looks like this is the case. LGTM.

https://github.com/llvm/llvm-project/pull/75821
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Enable multi-vector loads & stores for SME2 (PR #75821)

2023-12-18 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.


https://github.com/llvm/llvm-project/pull/75821
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;

dtemirbulatov wrote:

hmm, I see for SVMLA_MULTI_VG1x2_F32 in
void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) 
__arm_streaming __arm_shared_za {
  SVE_ACLE_FUNC(svmla,,_za32,_f32,_vg1x2)(slice_base, zn, zm);
} in acle_sme2_mla.c
and the rest of acle_sme2_mla.c, acle_sme2_mls.c 

https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov edited 
https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;

dtemirbulatov wrote:

similarly in acle_sme2_mla.c, acle_sme2_mls.c with **test_svmla_lane2_f32()**, 
**test_svmla_lane4_f32()**, **test_svmls_lane2_f32()**, 
**test_svmls_lane4_f32()** functions.

https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;

dtemirbulatov wrote:

Again, acle_sme2_mla.c, acle_sme2_mls.c in **test_svmla_lane2_f64()**, 
**test_svmla_lane4_f64()**, , **test_svmls_lane2_f64**, 
**test_svmla_lane4_f64()**, 

https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+}
+
+// FMLAL/FMLSL/UMLAL/SMLAL
+// SMLALL/UMLALL/USMLALL/SUMLALL
+let TargetGuard = "sme2" in {
+  // MULTI MLAL
+  def SVMLAL_MULTI_VG2x2_F16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "bh", 
MergeNone, "aarch64_sme_fmlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_F16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "bh", 
MergeNone, "aarch64_sme_fmlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_S16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "s", 
MergeNone, "aarch64_sme_smlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_S16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "s", 
MergeNone, "aarch64_sme_smlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_U16 : Inst<"svmla_za32[_{d}]_vg2x2", "v

[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+

dtemirbulatov wrote:

in acle_sme2_mla.c following functions **test_svmla2_f64()**, 
**test_svmla4_f64()** and in acle_sme2_mls.c **test_svmls2_f64()**, 
**test_svmls4_f64()** functions.

https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -315,6 +315,219 @@ let TargetGuard = "sme2" in {
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 }
 
+// FMLA/FMLS
+let TargetGuard = "sme2" in {
+  def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vm22", "f", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vm44", "f", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vm2d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vm4d", 
"f", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vm2di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vm4di", "f", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+}
+
+let TargetGuard = "sme2,sme-f64f64" in {
+  def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", 
MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vm44", "d", 
MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vm2d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vm4d", 
"d", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA], []>;
+
+  def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vm2di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+  def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", 
MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_1>]>;
+}
+
+// FMLAL/FMLSL/UMLAL/SMLAL
+// SMLALL/UMLALL/USMLALL/SUMLALL
+let TargetGuard = "sme2" in {
+  // MULTI MLAL
+  def SVMLAL_MULTI_VG2x2_F16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "bh", 
MergeNone, "aarch64_sme_fmlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_F16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "bh", 
MergeNone, "aarch64_sme_fmlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_S16 : Inst<"svmla_za32[_{d}]_vg2x2", "vm22", "s", 
MergeNone, "aarch64_sme_smlal_vg2x2", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x4_S16 : Inst<"svmla_za32[_{d}]_vg2x4", "vm44", "s", 
MergeNone, "aarch64_sme_smlal_vg2x4", [IsStreaming, IsSharedZA], []>;
+  def SVMLAL_MULTI_VG2x2_U16 : Inst<"svmla_za32[_{d}]_vg2x2", "v

[clang] [AArch64][SME2] Add builtins for FDOT, BFDOT, SUDOT, USDOT, SDOT, UDOT. (PR #75737)

2023-12-19 Thread Dinar Temirbulatov via cfe-commits


@@ -313,6 +313,72 @@ let TargetGuard = "sme2" in {
   def SVBMOPA : Inst<"svbmopa_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmopa_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
 
   def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, 
"aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, 
ImmCheck0_3>]>;
+
+  // VERTICAL DOT-PRODUCT
+  def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", 
"s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", 
"c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVVDOT_LANE_ZA32_VG1x2_U : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", 
"Us", MergeNone, "aarch64_sme_uvdot_lane_za32_vg1x2", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVVDOT_LANE_ZA32_VG1x4_U : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", 
"Uc", MergeNone, "aarch64_sme_uvdot_lane_za32_vg1x4", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVVDOT_LANE_ZA32_VG1x2_F : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", 
"hb", MergeNone, "aarch64_sme_fvdot_lane_za32_vg1x2", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVSUVDOT_LANE_ZA32_VG1x4 : Inst<"svsuvdot_lane_za32[_{d}]_vg1x4", 
"vm4di", "c", MergeNone, "aarch64_sme_suvdot_lane_za32_vg1x4", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVUSVDOT_LANE_ZA32_VG1x4 : Inst<"svusvdot_lane_za32[_{d}]_vg1x4", 
"vm4di", "Uc", MergeNone, "aarch64_sme_usvdot_lane_za32_vg1x4", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+
+  // Multi-vector signed & unsigned integer dot-product
+  def SVDOT_SINGLE_ZA32_VG1x2_S : Inst<"svdot[_single]_za32[_{d}]_vg1x2", 
"vm2d", "cs", MergeNone, "aarch64_sme_sdot_single_za32_vg1x2", [IsStreaming, 
IsSharedZA], []>;
+  def SVDOT_SINGLE_ZA32_VG1x4_S : Inst<"svdot[_single]_za32[_{d}]_vg1x4", 
"vm4d", "cs", MergeNone, "aarch64_sme_sdot_single_za32_vg1x4", [IsStreaming, 
IsSharedZA], []>;
+  def SVDOT_SINGLE_ZA32_VG1x2_U : Inst<"svdot[_single]_za32[_{d}]_vg1x2", 
"vm2d", "UcUs", MergeNone, "aarch64_sme_udot_single_za32_vg1x2", [IsStreaming, 
IsSharedZA], []>;
+  def SVDOT_SINGLE_ZA32_VG1x4_U : Inst<"svdot[_single]_za32[_{d}]_vg1x4", 
"vm4d", "UcUs", MergeNone, "aarch64_sme_udot_single_za32_vg1x4", [IsStreaming, 
IsSharedZA], []>;
+  def SVDOT_MULTI_ZA32_VG1x2_S  : Inst<"svdot_za32[_{d}]_vg1x2", "vm22", "cs", 
MergeNone, "aarch64_sme_sdot_za32_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVDOT_MULTI_ZA32_VG1x4_S  : Inst<"svdot_za32[_{d}]_vg1x4", "vm44", "cs", 
MergeNone, "aarch64_sme_sdot_za32_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVDOT_MULTI_ZA32_VG1x2_U  : Inst<"svdot_za32[_{d}]_vg1x2", "vm22", 
"UcUs", MergeNone, "aarch64_sme_udot_za32_vg1x2", [IsStreaming, IsSharedZA], 
[]>;
+  def SVDOT_MULTI_ZA32_VG1x4_U  : Inst<"svdot_za32[_{d}]_vg1x4", "vm44", 
"UcUs", MergeNone, "aarch64_sme_udot_za32_vg1x4", [IsStreaming, IsSharedZA], 
[]>;
+  def SVDOT_LANE_ZA32_VG1x2_S   : Inst<"svdot_lane_za32[_{d}]_vg1x2", "vm2di", 
"cs", MergeNone, "aarch64_sme_sdot_lane_za32_vg1x2", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVDOT_LANE_ZA32_VG1x4_S   : Inst<"svdot_lane_za32[_{d}]_vg1x4", "vm4di", 
"cs", MergeNone, "aarch64_sme_sdot_lane_za32_vg1x4", [IsStreaming, IsSharedZA], 
[ImmCheck<3, ImmCheck0_3>]>;
+  def SVDOT_LANE_ZA32_VG1x2_U   : Inst<"svdot_lane_za32[_{d}]_vg1x2", "vm2di", 
"UcUs", MergeNone, "aarch64_sme_udot_lane_za32_vg1x2", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVDOT_LANE_ZA32_VG1x4_U   : Inst<"svdot_lane_za32[_{d}]_vg1x4", "vm4di", 
"UcUs", MergeNone, "aarch64_sme_udot_lane_za32_vg1x4", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+
+  def SVUSDOT_SINGLE_ZA32_VG1x2 : Inst<"svusdot[_single]_za32[_{d}]_vg1x2", 
"vm2.dx", "Uc", MergeNone, "aarch64_sme_usdot_single_za32_vg1x2", [IsStreaming, 
IsSharedZA], []>;
+  def SVUSDOT_SINGLE_ZA32_VG1x4 : Inst<"svusdot[_single]_za32[_{d}]_vg1x4", 
"vm4.dx", "Uc", MergeNone, "aarch64_sme_usdot_single_za32_vg1x4", [IsStreaming, 
IsSharedZA], []>;
+  def SVUSDOT_MULTI_ZA32_VG1x2  : Inst<"svusdot_za32[_{d}]_vg1x2", "vm2.d2.x", 
"Uc", MergeNone, "aarch64_sme_usdot_za32_vg1x2", [IsStreaming, IsSharedZA], []>;
+  def SVUSDOT_MULTI_ZA32_VG1x4  : Inst<"svusdot_za32[_{d}]_vg1x4", "vm4.d4.x", 
"Uc", MergeNone, "aarch64_sme_usdot_za32_vg1x4", [IsStreaming, IsSharedZA], []>;
+  def SVUSDOT_LANE_ZA32_VG1x2   : Inst<"svusdot_lane_za32[_{d}]_vg1x2", 
"vm2.dxi", "Uc", MergeNone, "aarch64_sme_usdot_lane_za32_vg1x2", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVUSDOT_LANE_ZA32_VG1x4   : Inst<"svusdot_lane_za32[_{d}]_vg1x4", 
"vm4.dxi", "Uc", MergeNone, "aarch64_sme_usdot_lane_za32_vg1x4", [IsStreaming, 
IsSharedZA], [ImmCheck<3, ImmCheck0_3>]>;
+
+  def SVSUDOT_SI

[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-11-27 Thread Dinar Temirbulatov via cfe-commits


@@ -1,10 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | 
FileCheck %s

dtemirbulatov wrote:

According to 
https://developer.arm.com/documentation/ddi0602/2022-09/SVE-Instructions/PSEL--Predicate-select-between-predicate-register-or-all-false-?lang=en
 , I think that we should enable `-target-feature +sme` and `-target-feature 
+sve2p1 ` and remove `-target-feature +sme2` in order to produce PSEL 
instruction.

https://github.com/llvm/llvm-project/pull/72827
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add PEXT, PSEL builtins for SME2 (PR #72827)

2023-11-27 Thread Dinar Temirbulatov via cfe-commits


@@ -1859,19 +1859,28 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", 
"UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, 
"aarch64_sve_bgrp_x">;
 }
 
+let TargetGuard = "sve2p1|sme" in {

dtemirbulatov wrote:

Accroding to 
https://developer.arm.com/documentation/ddi0602/2022-09/SVE-Instructions/PSEL--Predicate-select-between-predicate-register-or-all-false-?lang=en,
 it is "sve2p1" and "sme" both enabled, I think now it is `sve2p1,sme` should 
work.

https://github.com/llvm/llvm-project/pull/72827
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable CLAMP multi-vector builtins for SME2 (PR #72272)

2023-11-29 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/72272

>From 11d7759523aa7d8c7f581da176f2e4e5a03f76ca Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Tue, 14 Nov 2023 15:25:45 +
Subject: [PATCH] [AArch64][SME2] Enable CLAMP multi-vector builtins for SME2

Thing change add builtins for SME2:
sclamp.single.x2
uclamp.single.x2
fclamp.single.x2
sclamp.single.x4
uclamp.single.x4
fclamp.single.x4

Patch by: Hassnaa Hamdi 
---
 clang/include/clang/Basic/arm_sve.td  |  10 +
 .../aarch64-sme2-intrinsics/acle_sme2_clamp.c | 747 ++
 2 files changed, 757 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index cd4c09a3ad7a81c..4fcc9327f22fe61 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2101,6 +2101,16 @@ let TargetGuard = "sme2" in {
   defm SVMAXNM : SInstMinMaxByVector<"max">;
 }
 
+let TargetGuard = "sme2" in {
+  def SVSCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "csil", 
MergeNone, "aarch64_sve_sclamp_single_x2",  [IsStreaming], []>;
+  def SVUCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "UcUsUiUl", 
MergeNone, "aarch64_sve_uclamp_single_x2",  [IsStreaming], []>;
+  def SVFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x2",  [IsStreaming], []>;
+
+  def SVSCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "csil", 
MergeNone, "aarch64_sve_sclamp_single_x4",  [IsStreaming], []>;
+  def SVUCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "UcUsUiUl", 
MergeNone, "aarch64_sve_uclamp_single_x4",  [IsStreaming], []>;
+  def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
+}
+
 let TargetGuard = "sme2" in {
 // == ADD (vectors) ==
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c
new file mode 100644
index 000..4dce1699c264fbf
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c
@@ -0,0 +1,747 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve \
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
+#endif
+
+// SCLAMP_X2
+
+// CHECK-LABEL: @test_svclamp_single_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[OP1]], i64 16)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { ,  } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[TMP0]], 
 [[TMP1]],  [[OP2:%.*]],  
[[OP3:%.*]])
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { ,  } [[TMP2]], 0
+// CHECK-NEXT:[[TMP4:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( poison,  [[TMP3]], i64 0)
+// CHECK-NEXT:[[TMP5:%.*]] = extractvalue { ,  } [[TMP2]], 1
+// CHECK-NEXT:[[TMP6:%.*]] = tail call  
@llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]],  [[TMP5]], i64 16)
+// CHECK-NEXT:ret  [[TMP6]]
+//
+// CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x210svint8x2_tu10__SVInt8_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*

[clang] [AArch64][SME2] Enable CLAMP multi-vector builtins for SME2 (PR #72272)

2023-11-29 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/72272
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add multi-vector SEL (x2, x4) ACLE builtins & intrinsics (PR #73188)

2023-11-30 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/73188
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add _x2/_x4 svqrshr builtins. (PR #74100)

2023-12-01 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/74100

Patch by: Kerry McLaughlin 

>From 68fdadc7fc5c6541ac56cab0240d24df3f003eb0 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Fri, 1 Dec 2023 16:27:42 +
Subject: [PATCH] [AArch64][SME2] Add _x2/_x4 svqrshr builtins.

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  |  15 +
 .../acle_sme2_vector_qrshr.c  | 341 ++
 2 files changed, 356 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 7d1af51fb30ba10..deb0884b7526ddd 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2119,6 +2119,21 @@ let TargetGuard = "sme2" in {
   // 2-way and 4-way selects
   def SVSEL_X2  : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlbhfd", 
MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>;
   def SVSEL_X4  : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlbhfd", 
MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>;
+
+  def SVQRSHRN_X4   : SInst<"svqrshrn[_{0}_{d}_x4]", "q4i", "il",   MergeNone, 
"aarch64_sve_sqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+  def SVUQRSHRN_X4  : SInst<"svqrshrn[_{0}_{d}_x4]", "b4i", "UiUl", MergeNone, 
"aarch64_sve_uqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+
+  // SQRSHR / UQRSHR
+  def SVQRSHR_X2  : SInst<"svqrshr[_{0}_{d}_x2]", "h2i", "i",MergeNone, 
"aarch64_sve_sqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>;
+  def SVUQRSHR_X2 : SInst<"svqrshr[_{0}_{d}_x2]", "e2i", "Ui",   MergeNone, 
"aarch64_sve_uqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>;
+  def SVQRSHR_X4  : SInst<"svqrshr[_{0}_{d}_x4]", "q4i", "il",   MergeNone, 
"aarch64_sve_sqrshr_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+  def SVUQRSHR_X4 : SInst<"svqrshr[_{0}_{d}_x4]", "b4i", "UiUl", MergeNone, 
"aarch64_sve_uqrshr_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+
+  // SQRSHRU
+  def SVSQRSHRU_X2 : SInst<"svqrshru[_{0}_{d}_x2]", "e2i", "i",  MergeNone, 
"aarch64_sve_sqrshru_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>;
+  def SVSQRSHRU_X4 : SInst<"svqrshru[_{0}_{d}_x4]", "b4i", "il", MergeNone, 
"aarch64_sve_sqrshru_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+
+  def SVSQRSHRUN_X4 : SInst<"svqrshrun[_{0}_{d}_x4]", "b4i", "il", MergeNone, 
"aarch64_sve_sqrshrun_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c
new file mode 100644
index 000..84f561b7a81f4a4
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c
@@ -0,0 +1,341 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S  -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone 
-Werror -Wall -o /dev/null %s
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// SVQRSHR
+
+// CHECK-LABEL: @test_svsqrshr_u16_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call  
@llvm.aarch64.sve.uqrshr.x2.nxv4i32( [[TMP0]],  [[TMP1]], i32 16)
+// CHECK-NEXT:ret  [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svsqrshr_u16_u32_x412svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHEC

[clang] [AArch64][SME2] Add intrinsics & builtins for S/URSHL (single, multi) (PR #74066)

2023-12-04 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/74066
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add _x2/_x4 svqrshr builtins. (PR #74100)

2023-12-04 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/74100

>From f1fb62ba84d94d2e63efa97e2a3d98c633509589 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Fri, 1 Dec 2023 16:27:42 +
Subject: [PATCH 1/2] [AArch64][SME2] Add _x2/_x4 svqrshr builtins.

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  |  15 +
 .../acle_sme2_vector_qrshr.c  | 341 ++
 2 files changed, 356 insertions(+)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3f69a3df9e616..0d247155f17e6 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2130,6 +2130,21 @@ let TargetGuard = "sme2" in {
   def SVURSHL_X2 : SInst<"svrshl[_{d}_x2]", "222", "UcUsUiUl", MergeNone, 
"aarch64_sve_urshl_x2", [IsStreaming], []>;
   def SVSRSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "csil", MergeNone, 
"aarch64_sve_srshl_x4", [IsStreaming], []>;
   def SVURSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "UcUsUiUl", MergeNone, 
"aarch64_sve_urshl_x4", [IsStreaming], []>;
+
+  def SVQRSHRN_X4   : SInst<"svqrshrn[_{0}_{d}_x4]", "q4i", "il",   MergeNone, 
"aarch64_sve_sqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+  def SVUQRSHRN_X4  : SInst<"svqrshrn[_{0}_{d}_x4]", "b4i", "UiUl", MergeNone, 
"aarch64_sve_uqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+
+  // SQRSHR / UQRSHR
+  def SVQRSHR_X2  : SInst<"svqrshr[_{0}_{d}_x2]", "h2i", "i",MergeNone, 
"aarch64_sve_sqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>;
+  def SVUQRSHR_X2 : SInst<"svqrshr[_{0}_{d}_x2]", "e2i", "Ui",   MergeNone, 
"aarch64_sve_uqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>;
+  def SVQRSHR_X4  : SInst<"svqrshr[_{0}_{d}_x4]", "q4i", "il",   MergeNone, 
"aarch64_sve_sqrshr_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+  def SVUQRSHR_X4 : SInst<"svqrshr[_{0}_{d}_x4]", "b4i", "UiUl", MergeNone, 
"aarch64_sve_uqrshr_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+
+  // SQRSHRU
+  def SVSQRSHRU_X2 : SInst<"svqrshru[_{0}_{d}_x2]", "e2i", "i",  MergeNone, 
"aarch64_sve_sqrshru_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>;
+  def SVSQRSHRU_X4 : SInst<"svqrshru[_{0}_{d}_x4]", "b4i", "il", MergeNone, 
"aarch64_sve_sqrshru_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
+
+  def SVSQRSHRUN_X4 : SInst<"svqrshrun[_{0}_{d}_x4]", "b4i", "il", MergeNone, 
"aarch64_sve_sqrshrun_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c
new file mode 100644
index 0..84f561b7a81f4
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c
@@ -0,0 +1,341 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S  -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S  -passes=mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone 
-Werror -Wall -o /dev/null %s
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// SVQRSHR
+
+// CHECK-LABEL: @test_svsqrshr_u16_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call  
@llvm.aarch64.sve.uqrshr.x2.nxv4i32( [[TMP0]],  [[TMP1]], i32 16)
+// CHECK-NEXT:ret  [[TMP2]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svsqrshr_u16_u32_x412svuint3

[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-21 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add builtins for FDOT, BFDOT, SUDOT, USDOT, SDOT, UDOT. (PR #75737)

2023-12-21 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/75737
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add __arm_streaming_compatible attribute to CLAMP bui… (PR #76712)

2024-01-02 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov created 
https://github.com/llvm/llvm-project/pull/76712

…ltins.

Patch-by: Caroline Concatto 

>From d21c30bb3d655ab66e1ecb2f9b26dc441ca8928a Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Tue, 2 Jan 2024 12:01:17 +
Subject: [PATCH] [AArch64][SME2] Add __arm_streaming_compatible attribute to
 CLAMP builtins.

Patch-by: Caroline Concatto 
---
 clang/include/clang/Basic/arm_sve.td|  4 ++--
 .../acle_sve2p1_fclamp.c| 14 ++
 .../acle_sve2p1_sclamp.c| 17 -
 .../acle_sve2p1_uclamp.c| 17 -
 4 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 91f62c4c76339d..9b88be633aa55c 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2052,8 +2052,8 @@ def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", 
"ddhhi", "f",  MergeNone
 }
 
 let TargetGuard = "sve2p1|sme" in {
-def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [], []>;
-def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [], []>;
+def SVSCLAMP : SInst<"svclamp[_{d}]", "", "csil", MergeNone, 
"aarch64_sve_sclamp", [IsStreamingCompatible], []>;
+def SVUCLAMP : SInst<"svclamp[_{d}]", "", "UcUsUiUl", MergeNone, 
"aarch64_sve_uclamp", [IsStreamingCompatible], []>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">;
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index 5d8c5b7b8a18c6..fd397292d1f2b6 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -10,7 +10,7 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -DTEST_SME2 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
@@ -22,6 +22,12 @@
 #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4
 #endif
 
+#ifndef TEST_SME2
+#define ATTR
+#else
+#define ATTR __arm_streaming_compatible
+#endif
+
 // CHECK-LABEL: @test_svclamp_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fclamp.nxv8f16( [[OP1:%.*]],  [[OP2:%.*]],  [[OP3:%.*]])
@@ -32,7 +38,7 @@
 // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fclamp.nxv8f16( [[OP1:%.*]],  [[OP2:%.*]],  [[OP3:%.*]])
 // CPP-CHECK-NEXT:ret  [[TMP0]]
 //
-svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t 
op3) {
+svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t 
op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _f16, , )(op1, op2, op3);
 }
 
@@ -46,7 +52,7 @@ svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t 
op2, svfloat16_t op3)
 // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fclamp.nxv4f32( [[OP1:%.*]],  [[OP2:%.*]],  [[OP3:%.*]])
 // CPP-CHECK-NEXT:ret  [[TMP0]]
 //
-svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t 
op3) {
+svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t 
op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _f32, , )(op1, op2, op3);
 }
 
@@ -60,7 +66,7 @@ svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t 
op2, svfloat32_t op3)
 // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fclamp.nxv2f64( [[OP1:%.*]],  [[OP2:%.*]],  [[OP3:%.*]])
 // CPP-CHECK-NEXT:ret  [[TMP0]]
 //
-svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t 
op3) {
+svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t 
op3) ATTR {
   return SVE_ACLE_FUNC(svclamp, _f64, , )(op1, op2, op3);
 }
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
index 8c63a7455c79f4..54c4c1c1679d7a 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
@@ -10,6 +10,8 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-f

[clang] [AArch64][SME2] Add __arm_streaming_compatible attribute to CLAMP bui… (PR #76712)

2024-01-02 Thread Dinar Temirbulatov via cfe-commits

dtemirbulatov wrote:

I missed that implemtation in https://github.com/llvm/llvm-project/pull/75958, 
Closing then.

https://github.com/llvm/llvm-project/pull/76712
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add __arm_streaming_compatible attribute to CLAMP bui… (PR #76712)

2024-01-02 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov closed 
https://github.com/llvm/llvm-project/pull/76712
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Fix PSEL builtin predicates (PR #77097)

2024-01-10 Thread Dinar Temirbulatov via cfe-commits

dtemirbulatov wrote:

LGTM.

https://github.com/llvm/llvm-project/pull/77097
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Fix PSEL builtin predicates (PR #77097)

2024-01-10 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.


https://github.com/llvm/llvm-project/pull/77097
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-13 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/79842

>From af323998a63a72f569d543cf5167d5d28e784682 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 29 Jan 2024 14:43:13 +
Subject: [PATCH 1/9] [Clang][AArch64] Warn when calling
 streaming/non-streaming about vector size might be different.

The compiler doesn't know in advance if the streaming and non-streaming
vector-lengths are different, so it should be safe to give a warning diagnostic
to warn the user about possible undefined behaviour. If the user knows
the vector lengths are equal, they can disable the warning separately.
---
 .../clang/Basic/DiagnosticSemaKinds.td| 24 +++
 clang/lib/Sema/SemaChecking.cpp   | 42 
 clang/test/Sema/aarch64-sme-func-attrs.c  | 68 ++-
 3 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24d32cb87c89e2..37fea5746936c7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_has_vl_args : Warning<
+  "non-streaming callee receives a VL-dependent argument and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_returns_vl : Warning<
+  "non-streaming callee returns a VL-dependent value and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
 def err_conflicting_attributes_arm_state : Error<
   "conflicting attributes for state '%0'">;
 def err_unknown_arm_state : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b42..e668a45c69e5f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7480,6 +7480,7 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
 // For variadic functions, we may have more args than parameters.
 // For some K&R functions, we may have less args than parameters.
 const auto N = std::min(Proto->getNumParams(), Args.size());
+bool AnyScalableArgs = false;
 for (unsigned ArgIdx = 0; ArgIdx < N; ++ArgIdx) {
   // Args[ArgIdx] can be null in malformed code.
   if (const Expr *Arg = Args[ArgIdx]) {
@@ -7493,6 +7494,8 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   checkAIXMemberAlignment((Arg->getExprLoc()), Arg);
 
 QualType ParamTy = Proto->getParamType(ArgIdx);
+if (ParamTy->isSizelessVectorType())
+  AnyScalableArgs = true;
 QualType ArgTy = Arg->getType();
 CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1),
   ArgTy, ParamTy);
@@ -7513,6 +7516,45 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr() &&
+  CallerFnType != ArmStreaming) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::warn

[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-13 Thread Dinar Temirbulatov via cfe-commits


@@ -7531,19 +7531,15 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
 // vector lengths may be different.
 if (CallerFD && Context.getTargetInfo().hasFeature("sme") && !IsBuiltin) {
   ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
-  if (CallerFnType != ArmStreaming &&
-  CallerFnType != ArmStreamingCompatible) {
-if (IsCalleeStreaming && AnyScalableArgsOrRet)
-  Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
-  } else if (CallerFnType == ArmStreaming && !IsCalleeStreaming &&
- !IsCalleeStreamingCompatible) {
-if (AnyScalableArgsOrRet)
-  Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
-  } else if (CallerFnType == ArmStreamingCompatible) {
-if ((IsCalleeStreaming || !IsCalleeStreamingCompatible) &&
-AnyScalableArgsOrRet)
-  Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
-  }
+  if ((CallerFnType != ArmStreaming &&
+   CallerFnType != ArmStreamingCompatible && IsCalleeStreaming &&
+   AnyScalableArgsOrRet) ||

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-15 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/79842

>From af323998a63a72f569d543cf5167d5d28e784682 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 29 Jan 2024 14:43:13 +
Subject: [PATCH 01/10] [Clang][AArch64] Warn when calling
 streaming/non-streaming about vector size might be different.

The compiler doesn't know in advance if the streaming and non-streaming
vector-lengths are different, so it should be safe to give a warning diagnostic
to warn the user about possible undefined behaviour. If the user knows
the vector lengths are equal, they can disable the warning separately.
---
 .../clang/Basic/DiagnosticSemaKinds.td| 24 +++
 clang/lib/Sema/SemaChecking.cpp   | 42 
 clang/test/Sema/aarch64-sme-func-attrs.c  | 68 ++-
 3 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24d32cb87c89e2..37fea5746936c7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_has_vl_args : Warning<
+  "non-streaming callee receives a VL-dependent argument and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_returns_vl : Warning<
+  "non-streaming callee returns a VL-dependent value and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
 def err_conflicting_attributes_arm_state : Error<
   "conflicting attributes for state '%0'">;
 def err_unknown_arm_state : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b42..e668a45c69e5f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7480,6 +7480,7 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
 // For variadic functions, we may have more args than parameters.
 // For some K&R functions, we may have less args than parameters.
 const auto N = std::min(Proto->getNumParams(), Args.size());
+bool AnyScalableArgs = false;
 for (unsigned ArgIdx = 0; ArgIdx < N; ++ArgIdx) {
   // Args[ArgIdx] can be null in malformed code.
   if (const Expr *Arg = Args[ArgIdx]) {
@@ -7493,6 +7494,8 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   checkAIXMemberAlignment((Arg->getExprLoc()), Arg);
 
 QualType ParamTy = Proto->getParamType(ArgIdx);
+if (ParamTy->isSizelessVectorType())
+  AnyScalableArgs = true;
 QualType ArgTy = Arg->getType();
 CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1),
   ArgTy, ParamTy);
@@ -7513,6 +7516,45 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr() &&
+  CallerFnType != ArmStreaming) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::wa

[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -3717,6 +3717,16 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_pass_return_vl_to_non_streaming : Warning<
+  "passing a VL-dependent argument to/from a function that has a different"
+  " streaming-mode. The streaming and non-streaming vector lengths may be"
+  " different">,
+  InGroup, DefaultIgnore;
+def warn_sme_locally_streaming_has_vl_args_returns : Warning<
+  "passing/returning a VL-dependent argument from a function"
+  " arm_locally_streaming attribute. The streaming and non-streaming vector"

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);
+bool IsBuiltin = (FD && FD->getBuiltinID());

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);
+bool IsBuiltin = (FD && FD->getBuiltinID());
+AnyScalableArgsOrRet |= Proto->getReturnType()->isSizelessVectorType();

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);
+bool IsBuiltin = (FD && FD->getBuiltinID());
+AnyScalableArgsOrRet |= Proto->getReturnType()->isSizelessVectorType();
+
+// If the caller is a function and the callee has a different
+// non-compitable streaming attribute. If it passed any VL-based arguments
+// or return VL-based value, then warn that the streaming and non-streaming
+// vector lengths may be different.
+if (CallerFD && !IsBuiltin && AnyScalableArgsOrRet) {
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if ((CallerFnType != ArmStreaming &&
+   CallerFnType != ArmStreamingCompatible && IsCalleeStreaming) ||
+  (CallerFnType == ArmStreaming && !IsCalleeStreaming &&
+   !IsCalleeStreamingCompatible) ||
+  (CallerFnType == ArmStreamingCompatible &&
+   (IsCalleeStreaming || !IsCalleeStreamingCompatible)))

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,30 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);
+bool IsBuiltin = (FD && FD->getBuiltinID());
+AnyScalableArgsOrRet |= Proto->getReturnType()->isSizelessVectorType();
+
+// If the caller is a function and the callee has a different
+// non-compitable streaming attribute. If it passed any VL-based arguments
+// or return VL-based value, then warn that the streaming and non-streaming
+// vector lengths may be different.

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -7516,28 +7516,23 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
-auto *CallerFD = dyn_cast(CurContext);
-bool IsCalleeStreaming =
-(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
-bool IsCalleeStreamingCompatible =
-(ExtInfo.AArch64SMEAttributes &
- FunctionType::SME_PStateSMCompatibleMask);
-bool IsBuiltin = (FD && FD->getBuiltinID());
-AnyScalableArgsOrRet |= Proto->getReturnType()->isSizelessVectorType();
-
-// If the caller is a function and the callee has a different
-// non-compitable streaming attribute. If it passed any VL-based arguments
-// or return VL-based value, then warn that the streaming and non-streaming
-// vector lengths may be different.
-if (CallerFD && !IsBuiltin && AnyScalableArgsOrRet) {
-  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
-  if ((CallerFnType != ArmStreaming &&
-   CallerFnType != ArmStreamingCompatible && IsCalleeStreaming) ||
-  (CallerFnType == ArmStreaming && !IsCalleeStreaming &&
-   !IsCalleeStreamingCompatible) ||
-  (CallerFnType == ArmStreamingCompatible &&
-   (IsCalleeStreaming || !IsCalleeStreamingCompatible)))
-Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
+// If the call requires a streaming-mode change and has scalable vector
+// arguments or return values, then warn the user that the streaming and
+// non-streaming vector lengths may be different.
+bool IsBuiltin = FD && FD->getBuiltinID();

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-19 Thread Dinar Temirbulatov via cfe-commits


@@ -3723,8 +3723,8 @@ def warn_sme_streaming_pass_return_vl_to_non_streaming : 
Warning<
   " different">,
   InGroup, DefaultIgnore;
 def warn_sme_locally_streaming_has_vl_args_returns : Warning<
-  "passing/returning a VL-dependent argument from a function"
-  " arm_locally_streaming attribute. The streaming and non-streaming vector"
+  "passing/returning a VL-dependent argument from a arm_locally_streaming"

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-28 Thread Dinar Temirbulatov via cfe-commits


@@ -1390,6 +1390,9 @@ def MultiGPU: DiagGroup<"multi-gpu">;
 // libc and the CRT to be skipped.
 def AVRRtlibLinkingQuirks : DiagGroup<"avr-rtlib-linking-quirks">;
 
+// A warning group AArch64 related to SME function attribues.

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-28 Thread Dinar Temirbulatov via cfe-commits


@@ -3717,6 +3717,16 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_pass_return_vl_to_non_streaming : Warning<
+  "passing a VL-dependent argument to/from a function that has a different"
+  " streaming-mode. The streaming and non-streaming vector lengths may be"
+  " different">,
+  InGroup, DefaultIgnore;
+def warn_sme_locally_streaming_has_vl_args_returns : Warning<
+  "passing/returning a VL-dependent argument from a __arm_locally_streaming"

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-06 Thread Dinar Temirbulatov via cfe-commits


@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-fsyntax-only -verify=expected-cpp -x c++ %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme -fsyntax-only -verify=expected-cpp -x c++ %s
+#include 

dtemirbulatov wrote:

ok, but then I have to define __clang_svint32x4_t.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-06 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov edited 
https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-07 Thread Dinar Temirbulatov via cfe-commits


@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;

dtemirbulatov wrote:

But why not, There is `warn_riscv_repeated_interrupt_attribute` also uses 
`IgnoredAttributes`.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-07 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/79842

>From af323998a63a72f569d543cf5167d5d28e784682 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 29 Jan 2024 14:43:13 +
Subject: [PATCH 1/3] [Clang][AArch64] Warn when calling
 streaming/non-streaming about vector size might be different.

The compiler doesn't know in advance if the streaming and non-streaming
vector-lengths are different, so it should be safe to give a warning diagnostic
to warn the user about possible undefined behaviour. If the user knows
the vector lengths are equal, they can disable the warning separately.
---
 .../clang/Basic/DiagnosticSemaKinds.td| 24 +++
 clang/lib/Sema/SemaChecking.cpp   | 42 
 clang/test/Sema/aarch64-sme-func-attrs.c  | 68 ++-
 3 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24d32cb87c89e2..37fea5746936c7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_has_vl_args : Warning<
+  "non-streaming callee receives a VL-dependent argument and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_returns_vl : Warning<
+  "non-streaming callee returns a VL-dependent value and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
 def err_conflicting_attributes_arm_state : Error<
   "conflicting attributes for state '%0'">;
 def err_unknown_arm_state : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b42..e668a45c69e5f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7480,6 +7480,7 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
 // For variadic functions, we may have more args than parameters.
 // For some K&R functions, we may have less args than parameters.
 const auto N = std::min(Proto->getNumParams(), Args.size());
+bool AnyScalableArgs = false;
 for (unsigned ArgIdx = 0; ArgIdx < N; ++ArgIdx) {
   // Args[ArgIdx] can be null in malformed code.
   if (const Expr *Arg = Args[ArgIdx]) {
@@ -7493,6 +7494,8 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   checkAIXMemberAlignment((Arg->getExprLoc()), Arg);
 
 QualType ParamTy = Proto->getParamType(ArgIdx);
+if (ParamTy->isSizelessVectorType())
+  AnyScalableArgs = true;
 QualType ArgTy = Arg->getType();
 CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1),
   ArgTy, ParamTy);
@@ -7513,6 +7516,45 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr() &&
+  CallerFnType != ArmStreaming) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::warn

[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-07 Thread Dinar Temirbulatov via cfe-commits


@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-fsyntax-only -verify=expected-cpp -x c++ %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sme -fsyntax-only -verify=expected-cpp -x c++ %s
+#include 

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-07 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,44 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr()) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::warn_sme_locally_streaming_returns_vl);
+  }
+  // If the caller is a non-streaming function and the callee has a
+  // streaming attribute. If it passed any VL-based arguments or return
+  // VL-based value, then warn that the streaming and non-streaming vector
+  // lengths may be different.
+  if (CallerFnType != ArmStreaming) {
+if (CalleeFnType == ArmStreaming) {
+  if (AnyScalableArgs)
+Diag(Loc,
+ diag::warn_sme_non_streaming_caller_pass_args_to_streaming);
+  if (FD->getReturnType()->isSizelessVectorType())
+Diag(Loc, 
diag::warn_sme_non_streaming_caller_returns_to_streaming);
+}
+  } else if (!FD->hasAttr()) {

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-07 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,44 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr()) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::warn_sme_locally_streaming_returns_vl);
+  }

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-08 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/79842

>From af323998a63a72f569d543cf5167d5d28e784682 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 29 Jan 2024 14:43:13 +
Subject: [PATCH 1/4] [Clang][AArch64] Warn when calling
 streaming/non-streaming about vector size might be different.

The compiler doesn't know in advance if the streaming and non-streaming
vector-lengths are different, so it should be safe to give a warning diagnostic
to warn the user about possible undefined behaviour. If the user knows
the vector lengths are equal, they can disable the warning separately.
---
 .../clang/Basic/DiagnosticSemaKinds.td| 24 +++
 clang/lib/Sema/SemaChecking.cpp   | 42 
 clang/test/Sema/aarch64-sme-func-attrs.c  | 68 ++-
 3 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24d32cb87c89e2..37fea5746936c7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_has_vl_args : Warning<
+  "non-streaming callee receives a VL-dependent argument and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_returns_vl : Warning<
+  "non-streaming callee returns a VL-dependent value and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
 def err_conflicting_attributes_arm_state : Error<
   "conflicting attributes for state '%0'">;
 def err_unknown_arm_state : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b42..e668a45c69e5f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7480,6 +7480,7 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
 // For variadic functions, we may have more args than parameters.
 // For some K&R functions, we may have less args than parameters.
 const auto N = std::min(Proto->getNumParams(), Args.size());
+bool AnyScalableArgs = false;
 for (unsigned ArgIdx = 0; ArgIdx < N; ++ArgIdx) {
   // Args[ArgIdx] can be null in malformed code.
   if (const Expr *Arg = Args[ArgIdx]) {
@@ -7493,6 +7494,8 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   checkAIXMemberAlignment((Arg->getExprLoc()), Arg);
 
 QualType ParamTy = Proto->getParamType(ArgIdx);
+if (ParamTy->isSizelessVectorType())
+  AnyScalableArgs = true;
 QualType ArgTy = Arg->getType();
 CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1),
   ArgTy, ParamTy);
@@ -7513,6 +7516,45 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr() &&
+  CallerFnType != ArmStreaming) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::warn

[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-08 Thread Dinar Temirbulatov via cfe-commits


@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;

dtemirbulatov wrote:

Add new warning group AArch64SMEAttributes.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov updated 
https://github.com/llvm/llvm-project/pull/79842

>From af323998a63a72f569d543cf5167d5d28e784682 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov 
Date: Mon, 29 Jan 2024 14:43:13 +
Subject: [PATCH 1/5] [Clang][AArch64] Warn when calling
 streaming/non-streaming about vector size might be different.

The compiler doesn't know in advance if the streaming and non-streaming
vector-lengths are different, so it should be safe to give a warning diagnostic
to warn the user about possible undefined behaviour. If the user knows
the vector lengths are equal, they can disable the warning separately.
---
 .../clang/Basic/DiagnosticSemaKinds.td| 24 +++
 clang/lib/Sema/SemaChecking.cpp   | 42 
 clang/test/Sema/aarch64-sme-func-attrs.c  | 68 ++-
 3 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24d32cb87c89e2..37fea5746936c7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_has_vl_args : Warning<
+  "non-streaming callee receives a VL-dependent argument and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_returns_vl : Warning<
+  "non-streaming callee returns a VL-dependent value and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
 def err_conflicting_attributes_arm_state : Error<
   "conflicting attributes for state '%0'">;
 def err_unknown_arm_state : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 502b24bcdf8b42..e668a45c69e5f9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7480,6 +7480,7 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
 // For variadic functions, we may have more args than parameters.
 // For some K&R functions, we may have less args than parameters.
 const auto N = std::min(Proto->getNumParams(), Args.size());
+bool AnyScalableArgs = false;
 for (unsigned ArgIdx = 0; ArgIdx < N; ++ArgIdx) {
   // Args[ArgIdx] can be null in malformed code.
   if (const Expr *Arg = Args[ArgIdx]) {
@@ -7493,6 +7494,8 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   checkAIXMemberAlignment((Arg->getExprLoc()), Arg);
 
 QualType ParamTy = Proto->getParamType(ArgIdx);
+if (ParamTy->isSizelessVectorType())
+  AnyScalableArgs = true;
 QualType ArgTy = Arg->getType();
 CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1),
   ArgTy, ParamTy);
@@ -7513,6 +7516,45 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (FD->hasAttr() &&
+  CallerFnType != ArmStreaming) {
+if (AnyScalableArgs)
+  Diag(Loc, diag::warn_sme_locally_streaming_has_vl_args);
+if (FD->getReturnType()->isSizelessVectorType())
+  Diag(Loc, diag::warn

[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,38 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,38 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+if (FD && CallerFD && Context.getTargetInfo().hasFeature("sme") &&
+!FD->getBuiltinID()) {
+  // If the callee has an AArch64 SME __arm_locally_streaming attribute
+  // warn if this function returns VL-based value or pass any such 
argument,
+  // the streaming and non-streaming vector lengths may be different.
+  ArmStreamingType CalleeFnType = getArmStreamingFnType(FD);
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  // If the caller is a non-streaming function and the callee has a
+  // streaming attribute. If it passed any VL-based arguments or return
+  // VL-based value, then warn that the streaming and non-streaming vector
+  // lengths may be different.
+  if (CallerFnType != ArmStreaming) {
+if (CalleeFnType == ArmStreaming) {
+  if (AnyScalableArgs)
+Diag(Loc,
+ diag::warn_sme_non_streaming_caller_pass_args_to_streaming);
+  if (FD->getReturnType()->isSizelessVectorType())
+Diag(Loc, 
diag::warn_sme_non_streaming_caller_returns_to_streaming);
+}
+  } else if (CalleeFnType != ArmStreaming) {

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits


@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits


@@ -3717,6 +3717,30 @@ def err_sme_definition_using_za_in_non_sme_target : 
Error<
   "function using ZA state requires 'sme'">;
 def err_sme_definition_using_zt0_in_non_sme2_target : Error<
   "function using ZT0 state requires 'sme2'">;
+def warn_sme_streaming_caller_pass_args_to_non_streaming : Warning<
+  "streaming caller passes a VL-dependent argument to non-streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_callee_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_pass_args_to_streaming : Warning<
+  "non-streaming caller passes a VL-dependent argument to streaming callee, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_non_streaming_caller_returns_to_streaming : Warning<
+  "non-streaming callee returns a VL-dependent value to streaming caller, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_has_vl_args : Warning<
+  "non-streaming callee receives a VL-dependent argument and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;
+def warn_sme_locally_streaming_returns_vl : Warning<
+  "non-streaming callee returns a VL-dependent value and the callee has an 
arm_locally_streaming attribute, "
+  "the streaming and non-streaming vector lengths may be different">,
+  InGroup;

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits


@@ -445,3 +448,54 @@ void conflicting_state_attrs_preserves_out_zt0(void) 
__arm_preserves("zt0") __ar
 // expected-cpp-error@+2 {{conflicting attributes for state 'zt0'}}
 // expected-error@+1 {{conflicting attributes for state 'zt0'}}
 void conflicting_state_attrs_preserves_inout_zt0(void) __arm_preserves("zt0") 
__arm_inout("zt0");
+

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang][SME] Detect always_inline used with mismatched streaming attributes (PR #77936)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/77936
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang][SME] Detect always_inline used with mismatched streaming attributes (PR #77936)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov edited 
https://github.com/llvm/llvm-project/pull/77936
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang][SME] Detect always_inline used with mismatched streaming attributes (PR #77936)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits


@@ -814,6 +820,49 @@ Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, 
Address VAListAddr,
   /*allowHigherAlign*/ false);
 }
 
+class SMEAttributes {
+public:
+  bool IsStreaming = false;
+  bool IsStreamingCompatible = false;
+  bool HasNewZA = false;
+
+  SMEAttributes(const FunctionDecl *F) {
+if (F->hasAttr())
+  IsStreaming = true;
+if (auto *NewAttr = F->getAttr()) {
+  if (NewAttr->isNewZA())
+HasNewZA = true;
+}
+if (const auto *T = F->getType()->getAs()) {
+  if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+IsStreaming = true;
+  if (T->getAArch64SMEAttributes() &
+  FunctionType::SME_PStateSMCompatibleMask)
+IsStreamingCompatible = true;
+}
+  }
+};
+
+void AArch64TargetCodeGenInfo::checkFunctionCallABI(
+CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
+const FunctionDecl *Callee, const CallArgList &Args) const {
+  if (!Callee->hasAttr())
+return;
+
+  SMEAttributes CalleeAttrs(Callee);
+  SMEAttributes CallerAttrs(Caller);

dtemirbulatov wrote:

Caller or Callee might be Null?

https://github.com/llvm/llvm-project/pull/77936
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [Clang][SME] Detect always_inline used with mismatched streaming attributes (PR #77936)

2024-02-12 Thread Dinar Temirbulatov via cfe-commits

https://github.com/dtemirbulatov requested changes to this pull request.


https://github.com/llvm/llvm-project/pull/77936
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-04 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,36 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);
+bool IsBuiltin = (FD && FD->getBuiltinID());
+AnyScalableArgsOrRet |= Proto->getReturnType()->isSizelessVectorType();
+
+// If the caller is a function and the callee has a different
+// non-compitable streaming attribute. If it passed any VL-based arguments
+// or return VL-based value, then warn that the streaming and non-streaming
+// vector lengths may be different.
+if (CallerFD && Context.getTargetInfo().hasFeature("sme") && !IsBuiltin) {

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-04 Thread Dinar Temirbulatov via cfe-commits


@@ -7513,6 +7516,36 @@ void Sema::checkCall(NamedDecl *FDecl, const 
FunctionProtoType *Proto,
   }
 }
 
+auto *CallerFD = dyn_cast(CurContext);
+bool IsCalleeStreaming =
+(ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask);
+bool IsCalleeStreamingCompatible =
+(ExtInfo.AArch64SMEAttributes &
+ FunctionType::SME_PStateSMCompatibleMask);
+bool IsBuiltin = (FD && FD->getBuiltinID());
+AnyScalableArgsOrRet |= Proto->getReturnType()->isSizelessVectorType();
+
+// If the caller is a function and the callee has a different
+// non-compitable streaming attribute. If it passed any VL-based arguments
+// or return VL-based value, then warn that the streaming and non-streaming
+// vector lengths may be different.
+if (CallerFD && Context.getTargetInfo().hasFeature("sme") && !IsBuiltin) {
+  ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD);
+  if (CallerFnType != ArmStreaming &&
+  CallerFnType != ArmStreamingCompatible) {
+if (IsCalleeStreaming && AnyScalableArgsOrRet)
+  Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
+  } else if (CallerFnType == ArmStreaming && !IsCalleeStreaming &&
+ !IsCalleeStreamingCompatible) {
+if (AnyScalableArgsOrRet)
+  Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
+  } else if (CallerFnType == ArmStreamingCompatible) {
+if ((IsCalleeStreaming || !IsCalleeStreamingCompatible) &&
+AnyScalableArgsOrRet)
+  Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming);
+  }

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][AArch64] Warn when calling streaming/non-streaming about vect… (PR #79842)

2024-03-04 Thread Dinar Temirbulatov via cfe-commits


@@ -12230,12 +12230,22 @@ bool Sema::CheckFunctionDeclaration(Scope *S, 
FunctionDecl *NewFD,
   }
 
   // Check if the function definition uses any AArch64 SME features without
-  // having the '+sme' feature enabled.
+  // having the '+sme' feature enabled and warn user if sme locally streaming
+  // function returns or uses arguments with VL-based types.
   if (DeclIsDefn) {
 const auto *Attr = NewFD->getAttr();
 bool UsesSM = NewFD->hasAttr();
 bool UsesZA = Attr && Attr->isNewZA();
 bool UsesZT0 = Attr && Attr->isNewZT0();
+
+if (UsesSM) {

dtemirbulatov wrote:

Done.

https://github.com/llvm/llvm-project/pull/79842
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   >