[PATCH] D40299: [Complex] Don't use __div?c3 when building with fast-math.

2017-12-19 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 127528.
paulwalker-arm added a comment.

Query LangOpts for FastMast rather than IRBuilder and fleshed out the tests.


https://reviews.llvm.org/D40299

Files:
  lib/CodeGen/CGExprComplex.cpp
  test/CodeGen/complex-math.c

Index: test/CodeGen/complex-math.c
===
--- test/CodeGen/complex-math.c
+++ test/CodeGen/complex-math.c
@@ -5,6 +5,7 @@
 // RUN %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
+// RUN: %clang_cc1 %s -O1 -emit-llvm -triple aarch64-unknown-unknown -ffast-math -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -128,13 +129,59 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divsc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_float_rc(float %a, [2 x float] %b.coerce)
+  // A = a
+  // B = 0
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x float] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x float] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast float [[C]], %a
+  // BD = 0
+  // ACpBD = AC
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast float [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast float [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast float [[CC]], [[DD]]
+  //
+  // BC = 0
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast float [[D]], %a
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast float -0.00e+00, [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast float [[AC]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast float [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 float _Complex div_float_cc(float _Complex a, float _Complex b) {
   // X86-LABEL: @div_float_cc(
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divsc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_float_cc([2 x float] %a.coerce, [2 x float] %b.coerce)
+  // AARCH64-FASTMATH: [[A:%.*]] = extractvalue [2 x float] %a.coerce, 0
+  // AARCH64-FASTMATH: [[B:%.*]] = extractvalue [2 x float] %a.coerce, 1
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x float] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x float] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast float [[C]], [[A]]
+  // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast float [[D]], [[B]]
+  // AARCH64-FASTMATH: [[ACpBD:%.*]] = fadd fast float [[AC]], [[BD]]
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast float [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast float [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast float [[CC]], [[DD]]
+  //
+  // AARCH64-FASTMATH: [[BC:%.*]] = fmul fast float [[C]], [[B]]
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast float [[D]], [[A]]
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast float [[BC]], [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast float [[ACpBD]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast float [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 
@@ -260,13 +307,59 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divdc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_double_rc(double %a, [2 x double] %b.coerce)
+  // A = a
+  // B = 0
+  // AARCH64-FASTMATH: [[C:%.*]] = extractvalue [2 x double] %b.coerce, 0
+  // AARCH64-FASTMATH: [[D:%.*]] = extractvalue [2 x double] %b.coerce, 1
+  //
+  // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast double [[C]], %a
+  // BD = 0
+  // ACpBD = AC
+  //
+  // AARCH64-FASTMATH: [[CC:%.*]] = fmul fast double [[C]], [[C]]
+  // AARCH64-FASTMATH: [[DD:%.*]] = fmul fast double [[D]], [[D]]
+  // AARCH64-FASTMATH: [[CCpDD:%.*]] = fadd fast double [[CC]], [[DD]]
+  //
+  // BC = 0
+  // AARCH64-FASTMATH: [[AD:%.*]] = fmul fast double [[D]], %a
+  // AARCH64-FASTMATH: [[BCmAD:%.*]] = fsub fast double -0.00e+00, [[AD]]
+  //
+  // AARCH64-FASTMATH: fdiv fast double [[AC]], [[CCpDD]]
+  // AARCH64-FASTMATH: fdiv fast double [[BCmAD]], [[CCpDD]]
+  // AARCH64-FASTMATH: ret
   return a / b;
 }
 double _Complex div_double_cc(double _Complex a, double _Complex b) {
   // X86-LABEL: @div_double_cc(
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divdc3(
   // X86: ret
+
+  // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
+  // AARCH64-FASTMATH-LABEL: @div_double_cc([2 x double] %a.coerce, [2 x double] %b.coerce)
+  // AARCH64-FASTMATH: [[A:%.*]] = extractvalue [2 x double] %a.coerce, 0
+  // AA

[PATCH] D40299: [Complex] Don't use __div?c3 when building with fast-math.

2017-12-19 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm marked 3 inline comments as done.
paulwalker-arm added inline comments.



Comment at: lib/CodeGen/CGExprComplex.cpp:773
 // supported imaginary types in addition to complex types.
-if (RHSi) {
+if (RHSi && !FMF.isFast()) {
   BinOpInfo LibCallOp = Op;

hfinkel wrote:
> fhahn wrote:
> > Would the following structure be slightly easier to read?
> > 
> > if (RHSi) {
> >   if (FMF.isFast()) { simplify } else {libcall}
> > }
> I'd use CGF.getLangOpts().FastMath (instead of interrogating the implicit 
> state stored in the IR builder).
Probably subjective but in this instance I preferred the look with fewer nested 
conditionals.


https://reviews.llvm.org/D40299



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84021: [Driver] Add suppoort for -msve-vector-bits=scalable.

2020-07-17 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added subscribers: cfe-commits, tschuett.
Herald added a project: clang.

No real action is taken for a value of scalable but it provides a
route to disable an earlier specification and is effectively its
default value when omitted.

Patch also removes an "unused variable" warning.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D84021

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c


Index: clang/test/Driver/aarch64-sve-vector-bits.c
===
--- clang/test/Driver/aarch64-sve-vector-bits.c
+++ clang/test/Driver/aarch64-sve-vector-bits.c
@@ -12,12 +12,15 @@
 // RUN:  -msve-vector-bits=1024 2>&1 | FileCheck --check-prefix=CHECK-1024 %s
 // RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
 // RUN:  -msve-vector-bits=2048 2>&1 | FileCheck --check-prefix=CHECK-2048 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=scalable 2>&1 | FileCheck 
--check-prefix=CHECK-SCALABLE %s
 
 // CHECK-128: "-msve-vector-bits=128"
 // CHECK-256: "-msve-vector-bits=256"
 // CHECK-512: "-msve-vector-bits=512"
 // CHECK-1024: "-msve-vector-bits=1024"
 // CHECK-2048: "-msve-vector-bits=2048"
+// CHECK-SCALABLE-NOT: "-msve-vector-bits=
 
 // Bail out if -msve-vector-bits is specified without SVE enabled
 // 
-
@@ -47,11 +50,13 @@
 // 
-
 // RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
 // RUN:  -march=armv8-a+sve 2>&1 | FileCheck 
--check-prefix=CHECK-NO-FLAG-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve -msve-vector-bits=scalable 2>&1 | FileCheck 
--check-prefix=CHECK-NO-FLAG-ERROR %s
 
 typedef __SVInt32_t svint32_t;
 typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
 
-// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is not supported when 
'-msve-vector-bits=' is not specified
+// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is only supported when 
'-msve-vector-bits=' is specified with a value of 128, 256, 512, 1024 or 
2048
 
 // Error if attribute vector size != -msve-vector-bits
 // 
-
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1720,15 +1720,15 @@
   if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
-if (!Val.equals("128") && !Val.equals("256") && !Val.equals("512") &&
-!Val.equals("1024") && !Val.equals("2048")) {
+if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
+Val.equals("1024") || Val.equals("2048"))
+  CmdArgs.push_back(
+  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+// Silently drop requests for vector-length agnostic code as it's implied.
+else if (!Val.equals("scalable"))
   // Handle the unsupported values passed to msve-vector-bits.
   D.Diag(diag::err_drv_unsupported_option_argument)
   << A->getOption().getName() << Val;
-} else if (A->getOption().matches(options::OPT_msve_vector_bits_EQ)) {
-  CmdArgs.push_back(
-  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
-}
   }
 }
 
Index: clang/lib/Driver/ToolChains/Arch/AArch64.cpp
===
--- clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -370,8 +370,8 @@
 V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
 
   bool HasSve = llvm::is_contained(Features, "+sve");
-  // -msve_vector_bits= flag is valid only if SVE is enabled.
-  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ))
+  // -msve-vector-bits= flag is valid only if SVE is enabled.
+  if (Args.hasArg(options::OPT_msve_vector_bits_EQ))
 if (!HasSve)
   D.Diag(diag::err_drv_invalid_sve_vector_bits);
 
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2816,7 +2816,8 @@
   "invalid SVE vector size '%0', must match value set by "
   "'-msve-vector-bits' ('%1')">;
 def err_attribute_arm_feature_sve_bits_unsupported : Error<
-  "%0 is not supported when '-msve-vector-bits=' is not specified">;
+  "%0 is only supported when

[PATCH] D84021: [Driver] Add suppoort for -msve-vector-bits=scalable.

2020-07-17 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 278744.
paulwalker-arm added a comment.

Fixed typo to match existing wrapping style.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84021/new/

https://reviews.llvm.org/D84021

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c


Index: clang/test/Driver/aarch64-sve-vector-bits.c
===
--- clang/test/Driver/aarch64-sve-vector-bits.c
+++ clang/test/Driver/aarch64-sve-vector-bits.c
@@ -12,12 +12,15 @@
 // RUN:  -msve-vector-bits=1024 2>&1 | FileCheck --check-prefix=CHECK-1024 %s
 // RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
 // RUN:  -msve-vector-bits=2048 2>&1 | FileCheck --check-prefix=CHECK-2048 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=scalable 2>&1 | FileCheck 
--check-prefix=CHECK-SCALABLE %s
 
 // CHECK-128: "-msve-vector-bits=128"
 // CHECK-256: "-msve-vector-bits=256"
 // CHECK-512: "-msve-vector-bits=512"
 // CHECK-1024: "-msve-vector-bits=1024"
 // CHECK-2048: "-msve-vector-bits=2048"
+// CHECK-SCALABLE-NOT: "-msve-vector-bits=
 
 // Bail out if -msve-vector-bits is specified without SVE enabled
 // 
-
@@ -47,11 +50,13 @@
 // 
-
 // RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
 // RUN:  -march=armv8-a+sve 2>&1 | FileCheck 
--check-prefix=CHECK-NO-FLAG-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve -msve-vector-bits=scalable 2>&1 | FileCheck 
--check-prefix=CHECK-NO-FLAG-ERROR %s
 
 typedef __SVInt32_t svint32_t;
 typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
 
-// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is not supported when 
'-msve-vector-bits=' is not specified
+// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is only supported when 
'-msve-vector-bits=' is specified with a value of 128, 256, 512, 1024 or 
2048
 
 // Error if attribute vector size != -msve-vector-bits
 // 
-
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1720,15 +1720,15 @@
   if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
-if (!Val.equals("128") && !Val.equals("256") && !Val.equals("512") &&
-!Val.equals("1024") && !Val.equals("2048")) {
+if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
+Val.equals("1024") || Val.equals("2048"))
+  CmdArgs.push_back(
+  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+// Silently drop requests for vector-length agnostic code as it's implied.
+else if (!Val.equals("scalable"))
   // Handle the unsupported values passed to msve-vector-bits.
   D.Diag(diag::err_drv_unsupported_option_argument)
   << A->getOption().getName() << Val;
-} else if (A->getOption().matches(options::OPT_msve_vector_bits_EQ)) {
-  CmdArgs.push_back(
-  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
-}
   }
 }
 
Index: clang/lib/Driver/ToolChains/Arch/AArch64.cpp
===
--- clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -370,8 +370,8 @@
 V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
 
   bool HasSve = llvm::is_contained(Features, "+sve");
-  // -msve_vector_bits= flag is valid only if SVE is enabled.
-  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ))
+  // -msve-vector-bits= flag is valid only if SVE is enabled.
+  if (Args.hasArg(options::OPT_msve_vector_bits_EQ))
 if (!HasSve)
   D.Diag(diag::err_drv_invalid_sve_vector_bits);
 
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2816,7 +2816,8 @@
   "invalid SVE vector size '%0', must match value set by "
   "'-msve-vector-bits' ('%1')">;
 def err_attribute_arm_feature_sve_bits_unsupported : Error<
-  "%0 is not supported when '-msve-vector-bits=' is not specified">;
+  "%0 is only supported when '-msve-vector-bits=' is specified with a "
+  "value of 128, 256, 512, 1024 or 2048.">;
 def err_attribute_requires_positive_integ

[PATCH] D84021: [Driver] Add suppoort for -msve-vector-bits=scalable.

2020-07-17 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 278803.
paulwalker-arm added a comment.
Herald added a subscriber: dang.

Sorry for the post acceptance change, but I spotted the help text needed an 
update also.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84021/new/

https://reviews.llvm.org/D84021

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c

Index: clang/test/Driver/aarch64-sve-vector-bits.c
===
--- clang/test/Driver/aarch64-sve-vector-bits.c
+++ clang/test/Driver/aarch64-sve-vector-bits.c
@@ -12,12 +12,15 @@
 // RUN:  -msve-vector-bits=1024 2>&1 | FileCheck --check-prefix=CHECK-1024 %s
 // RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
 // RUN:  -msve-vector-bits=2048 2>&1 | FileCheck --check-prefix=CHECK-2048 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=scalable 2>&1 | FileCheck --check-prefix=CHECK-SCALABLE %s
 
 // CHECK-128: "-msve-vector-bits=128"
 // CHECK-256: "-msve-vector-bits=256"
 // CHECK-512: "-msve-vector-bits=512"
 // CHECK-1024: "-msve-vector-bits=1024"
 // CHECK-2048: "-msve-vector-bits=2048"
+// CHECK-SCALABLE-NOT: "-msve-vector-bits=
 
 // Bail out if -msve-vector-bits is specified without SVE enabled
 // -
@@ -47,11 +50,13 @@
 // -
 // RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
 // RUN:  -march=armv8-a+sve 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve -msve-vector-bits=scalable 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
 
 typedef __SVInt32_t svint32_t;
 typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
 
-// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is not supported when '-msve-vector-bits=' is not specified
+// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is only supported when '-msve-vector-bits=' is specified with a value of 128, 256, 512, 1024 or 2048
 
 // Error if attribute vector size != -msve-vector-bits
 // -
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1720,15 +1720,15 @@
   if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
-if (!Val.equals("128") && !Val.equals("256") && !Val.equals("512") &&
-!Val.equals("1024") && !Val.equals("2048")) {
+if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
+Val.equals("1024") || Val.equals("2048"))
+  CmdArgs.push_back(
+  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+// Silently drop requests for vector-length agnostic code as it's implied.
+else if (!Val.equals("scalable"))
   // Handle the unsupported values passed to msve-vector-bits.
   D.Diag(diag::err_drv_unsupported_option_argument)
   << A->getOption().getName() << Val;
-} else if (A->getOption().matches(options::OPT_msve_vector_bits_EQ)) {
-  CmdArgs.push_back(
-  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
-}
   }
 }
 
Index: clang/lib/Driver/ToolChains/Arch/AArch64.cpp
===
--- clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -370,8 +370,8 @@
 V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
 
   bool HasSve = llvm::is_contained(Features, "+sve");
-  // -msve_vector_bits= flag is valid only if SVE is enabled.
-  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ))
+  // -msve-vector-bits= flag is valid only if SVE is enabled.
+  if (Args.hasArg(options::OPT_msve_vector_bits_EQ))
 if (!HasSve)
   D.Diag(diag::err_drv_invalid_sve_vector_bits);
 
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2346,8 +2346,9 @@
 
 def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">,
   Group, Flags<[DriverOption,CC1Option]>,
-  HelpText<"Set the size of fixed-length SVE vectors in bits.">,
-  Values<"128,256,512,1024,2048">;
+  HelpText<"Specify the size in bits of an SVE vector register. Defaults to the"
+   " vect

[PATCH] D84021: [Driver] Add support for -msve-vector-bits=scalable.

2020-07-20 Thread Paul Walker via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGab7abd8bf41b: [Driver] Add support for 
-msve-vector-bits=scalable. (authored by paulwalker-arm).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84021/new/

https://reviews.llvm.org/D84021

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c

Index: clang/test/Driver/aarch64-sve-vector-bits.c
===
--- clang/test/Driver/aarch64-sve-vector-bits.c
+++ clang/test/Driver/aarch64-sve-vector-bits.c
@@ -12,12 +12,15 @@
 // RUN:  -msve-vector-bits=1024 2>&1 | FileCheck --check-prefix=CHECK-1024 %s
 // RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
 // RUN:  -msve-vector-bits=2048 2>&1 | FileCheck --check-prefix=CHECK-2048 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=scalable 2>&1 | FileCheck --check-prefix=CHECK-SCALABLE %s
 
 // CHECK-128: "-msve-vector-bits=128"
 // CHECK-256: "-msve-vector-bits=256"
 // CHECK-512: "-msve-vector-bits=512"
 // CHECK-1024: "-msve-vector-bits=1024"
 // CHECK-2048: "-msve-vector-bits=2048"
+// CHECK-SCALABLE-NOT: "-msve-vector-bits=
 
 // Bail out if -msve-vector-bits is specified without SVE enabled
 // -
@@ -47,11 +50,13 @@
 // -
 // RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
 // RUN:  -march=armv8-a+sve 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve -msve-vector-bits=scalable 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
 
 typedef __SVInt32_t svint32_t;
 typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
 
-// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is not supported when '-msve-vector-bits=' is not specified
+// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is only supported when '-msve-vector-bits=' is specified with a value of 128, 256, 512, 1024 or 2048
 
 // Error if attribute vector size != -msve-vector-bits
 // -
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1720,15 +1720,15 @@
   if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
-if (!Val.equals("128") && !Val.equals("256") && !Val.equals("512") &&
-!Val.equals("1024") && !Val.equals("2048")) {
+if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
+Val.equals("1024") || Val.equals("2048"))
+  CmdArgs.push_back(
+  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+// Silently drop requests for vector-length agnostic code as it's implied.
+else if (!Val.equals("scalable"))
   // Handle the unsupported values passed to msve-vector-bits.
   D.Diag(diag::err_drv_unsupported_option_argument)
   << A->getOption().getName() << Val;
-} else if (A->getOption().matches(options::OPT_msve_vector_bits_EQ)) {
-  CmdArgs.push_back(
-  Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
-}
   }
 }
 
Index: clang/lib/Driver/ToolChains/Arch/AArch64.cpp
===
--- clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -370,8 +370,8 @@
 V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
 
   bool HasSve = llvm::is_contained(Features, "+sve");
-  // -msve_vector_bits= flag is valid only if SVE is enabled.
-  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ))
+  // -msve-vector-bits= flag is valid only if SVE is enabled.
+  if (Args.hasArg(options::OPT_msve_vector_bits_EQ))
 if (!HasSve)
   D.Diag(diag::err_drv_invalid_sve_vector_bits);
 
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2346,8 +2346,9 @@
 
 def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">,
   Group, Flags<[DriverOption,CC1Option]>,
-  HelpText<"Set the size of fixed-length SVE vectors in bits.">,
-  Values<"128,256,512,1024,2048">;
+  HelpText<"Specify the size in bits of an SVE vector register. Defaults to the"
+   " vector length agnostic 

[PATCH] D71760: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 1/2].

2020-07-20 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 279217.
paulwalker-arm added a comment.

Rebase.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71760/new/

https://reviews.llvm.org/D71760

Files:
  clang/lib/Driver/ToolChains/Clang.cpp


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1721,9 +1721,16 @@
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
 if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
-Val.equals("1024") || Val.equals("2048"))
+Val.equals("1024") || Val.equals("2048")) {
   CmdArgs.push_back(
   Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+  CmdArgs.push_back("-mllvm");
+  CmdArgs.push_back(
+  Args.MakeArgString("-aarch64-sve-vector-bits-min=" + Val));
+  // CmdArgs.push_back("-mllvm");
+  // CmdArgs.push_back(
+  //Args.MakeArgString("-aarch64-sve-vector-bits-max=" + Val));
+}
 // Silently drop requests for vector-length agnostic code as it's implied.
 else if (!Val.equals("scalable"))
   // Handle the unsupported values passed to msve-vector-bits.


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1721,9 +1721,16 @@
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
 if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
-Val.equals("1024") || Val.equals("2048"))
+Val.equals("1024") || Val.equals("2048")) {
   CmdArgs.push_back(
   Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+  CmdArgs.push_back("-mllvm");
+  CmdArgs.push_back(
+  Args.MakeArgString("-aarch64-sve-vector-bits-min=" + Val));
+  // CmdArgs.push_back("-mllvm");
+  // CmdArgs.push_back(
+  //Args.MakeArgString("-aarch64-sve-vector-bits-max=" + Val));
+}
 // Silently drop requests for vector-length agnostic code as it's implied.
 else if (!Val.equals("scalable"))
   // Handle the unsupported values passed to msve-vector-bits.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81252: [SVE ACLE] Remove redundant bool_t typedef.

2020-06-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added subscribers: cfe-commits, tschuett.
Herald added a project: clang.
paulwalker-arm added a reviewer: sdesmalen.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81252

Files:
  clang/utils/TableGen/SveEmitter.cpp


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -1058,7 +1058,6 @@
   OS << "typedef __fp16 float16_t;\n";
   OS << "typedef float float32_t;\n";
   OS << "typedef double float64_t;\n";
-  OS << "typedef bool bool_t;\n\n";
 
   OS << "typedef __SVInt8_t svint8_t;\n";
   OS << "typedef __SVInt16_t svint16_t;\n";


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -1058,7 +1058,6 @@
   OS << "typedef __fp16 float16_t;\n";
   OS << "typedef float float32_t;\n";
   OS << "typedef double float64_t;\n";
-  OS << "typedef bool bool_t;\n\n";
 
   OS << "typedef __SVInt8_t svint8_t;\n";
   OS << "typedef __SVInt16_t svint16_t;\n";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81252: [SVE ACLE] Remove redundant bool_t typedef.

2020-06-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 268795.
paulwalker-arm added a comment.

Stop SveEmitter adding a _t suffix to the scalar boolean type.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81252/new/

https://reviews.llvm.org/D81252

Files:
  clang/utils/TableGen/SveEmitter.cpp


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -437,7 +437,8 @@
   S += "x" + utostr(getNumElements());
 if (NumVectors > 1)
   S += "x" + utostr(NumVectors);
-S += "_t";
+if (!isScalarPredicate())
+  S += "_t";
   }
 
   if (Constant)
@@ -1058,7 +1059,6 @@
   OS << "typedef __fp16 float16_t;\n";
   OS << "typedef float float32_t;\n";
   OS << "typedef double float64_t;\n";
-  OS << "typedef bool bool_t;\n\n";
 
   OS << "typedef __SVInt8_t svint8_t;\n";
   OS << "typedef __SVInt16_t svint16_t;\n";


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -437,7 +437,8 @@
   S += "x" + utostr(getNumElements());
 if (NumVectors > 1)
   S += "x" + utostr(NumVectors);
-S += "_t";
+if (!isScalarPredicate())
+  S += "_t";
   }
 
   if (Constant)
@@ -1058,7 +1059,6 @@
   OS << "typedef __fp16 float16_t;\n";
   OS << "typedef float float32_t;\n";
   OS << "typedef double float64_t;\n";
-  OS << "typedef bool bool_t;\n\n";
 
   OS << "typedef __SVInt8_t svint8_t;\n";
   OS << "typedef __SVInt16_t svint16_t;\n";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81252: [SVE ACLE] Remove redundant bool_t typedef.

2020-06-06 Thread Paul Walker via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd6d2f78fe504: [SVE ACLE] Remove redundant bool_t typedef. 
(authored by paulwalker-arm).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81252/new/

https://reviews.llvm.org/D81252

Files:
  clang/utils/TableGen/SveEmitter.cpp


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -437,7 +437,8 @@
   S += "x" + utostr(getNumElements());
 if (NumVectors > 1)
   S += "x" + utostr(NumVectors);
-S += "_t";
+if (!isScalarPredicate())
+  S += "_t";
   }
 
   if (Constant)
@@ -1058,7 +1059,6 @@
   OS << "typedef __fp16 float16_t;\n";
   OS << "typedef float float32_t;\n";
   OS << "typedef double float64_t;\n";
-  OS << "typedef bool bool_t;\n\n";
 
   OS << "typedef __SVInt8_t svint8_t;\n";
   OS << "typedef __SVInt16_t svint16_t;\n";


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -437,7 +437,8 @@
   S += "x" + utostr(getNumElements());
 if (NumVectors > 1)
   S += "x" + utostr(NumVectors);
-S += "_t";
+if (!isScalarPredicate())
+  S += "_t";
   }
 
   if (Constant)
@@ -1058,7 +1059,6 @@
   OS << "typedef __fp16 float16_t;\n";
   OS << "typedef float float32_t;\n";
   OS << "typedef double float64_t;\n";
-  OS << "typedef bool bool_t;\n\n";
 
   OS << "typedef __SVInt8_t svint8_t;\n";
   OS << "typedef __SVInt16_t svint16_t;\n";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71760: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 1/2].

2020-07-13 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 277452.
paulwalker-arm removed reviewers: rengolin, efriedma.
paulwalker-arm added a subscriber: efriedma.
paulwalker-arm added a comment.
Herald added a reviewer: rengolin.
Herald added a reviewer: efriedma.
Herald added subscribers: cfe-commits, dang.
Herald added a project: clang.

Rebasing to reflect the majority of the functionality is now in master.  What 
remains is likely to be abandoned in favour of function attributes but it's 
here for those who want to experiment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71760/new/

https://reviews.llvm.org/D71760

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/lib/Target/AArch64/AArch64FrameLowering.cpp


Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
===
--- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1819,7 +1819,7 @@
 if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
   UseFP = PreferFP;
 else if (!CanUseBP) { // Can't use BP. Forced to use FP.
-  assert(!SVEStackSize && "Expected BP to be available");
+  // assert(!SVEStackSize && "Expected BP to be available");
   UseFP = true;
 }
 // else we can use BP and FP, but the offset from FP won't fit.
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1715,6 +1715,22 @@
 if (IndirectBranches)
   CmdArgs.push_back("-mbranch-target-enforce");
   }
+
+  if (any_of(CmdArgs, [](const char *Arg) {
+return (strcmp(Arg, "+sve") == 0 || strcmp(Arg, "+sve2") == 0);
+  })) {
+if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits)) {
+  StringRef Bits = A->getValue();
+  if (Bits != "scalable") {
+CmdArgs.push_back("-mllvm");
+CmdArgs.push_back(
+Args.MakeArgString("-aarch64-sve-vector-bits-min=" + Bits));
+// CmdArgs.push_back("-mllvm");
+// CmdArgs.push_back(
+//Args.MakeArgString("-aarch64-sve-vector-bits-max=" + Bits));
+  }
+}
+  }
 }
 
 void Clang::AddMIPSTargetArgs(const ArgList &Args,
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2351,6 +2351,10 @@
 
 def mharden_sls_EQ : Joined<["-"], "mharden-sls=">,
   HelpText<"Select straight-line speculation hardening scope">;
+def msve_vector_bits : Joined<["-"], "msve-vector-bits=">,
+  Group,
+  HelpText<"Specify the size in bits of an SVE vector register."
+   " Has no effect unless SVE is enabled. (Default is \"scalable\")">;
 
 def msimd128 : Flag<["-"], "msimd128">, Group;
 def munimplemented_simd128 : Flag<["-"], "munimplemented-simd128">, 
Group;


Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
===
--- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1819,7 +1819,7 @@
 if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
   UseFP = PreferFP;
 else if (!CanUseBP) { // Can't use BP. Forced to use FP.
-  assert(!SVEStackSize && "Expected BP to be available");
+  // assert(!SVEStackSize && "Expected BP to be available");
   UseFP = true;
 }
 // else we can use BP and FP, but the offset from FP won't fit.
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1715,6 +1715,22 @@
 if (IndirectBranches)
   CmdArgs.push_back("-mbranch-target-enforce");
   }
+
+  if (any_of(CmdArgs, [](const char *Arg) {
+return (strcmp(Arg, "+sve") == 0 || strcmp(Arg, "+sve2") == 0);
+  })) {
+if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits)) {
+  StringRef Bits = A->getValue();
+  if (Bits != "scalable") {
+CmdArgs.push_back("-mllvm");
+CmdArgs.push_back(
+Args.MakeArgString("-aarch64-sve-vector-bits-min=" + Bits));
+// CmdArgs.push_back("-mllvm");
+// CmdArgs.push_back(
+//Args.MakeArgString("-aarch64-sve-vector-bits-max=" + Bits));
+  }
+}
+  }
 }
 
 void Clang::AddMIPSTargetArgs(const ArgList &Args,
Index: clang/include/clang/Driver/Options.td
===
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2351,6 +2351,10 @@
 
 def mharden_sls_EQ : Joined<["-"], "mharden-sls=">,
   HelpText<"Select

[PATCH] D71767: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 2/2].

2020-08-10 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 284380.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71767/new/

https://reviews.llvm.org/D71767

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll

Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
@@ -0,0 +1,373 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
+; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: z{0-9}
+
+;
+; sext i8 -> i16
+;
+
+define void @sext_v16i8_v16i16(<16 x i8> %a, <16 x i16>* %out) #0 {
+; CHECK-LABEL: sext_v16i8_v16i16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, z0.b
+; CHECK-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %b = sext <16 x i8> %a to <16 x i16>
+  store <16 x i16>%b, <16 x i16>* %out
+  ret void
+}
+
+; NOTE: Extra 'add' is to prevent the extend being combined with the load.
+define void @sext_v32i8_v32i16(<32 x i8>* %in, <32 x i16>* %out) #0 {
+; CHECK-LABEL: sext_v32i8_v32i16:
+; VBITS_GE_512: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
+; VBITS_GE_512-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
+; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
+; VBITS_GE_512-NEXT: ret
+  %a = load <32 x i8>, <32 x i8>* %in
+  %b = add <32 x i8> %a, %a
+  %c = sext <32 x i8> %b to <32 x i16>
+  store <32 x i16> %c, <32 x i16>* %out
+  ret void
+}
+
+define void @sext_v64i8_v64i16(<64 x i8>* %in, <64 x i16>* %out) #0 {
+; CHECK-LABEL: sext_v64i8_v64i16:
+; VBITS_GE_1024: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
+; VBITS_GE_1024-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
+; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
+; VBITS_GE_1024-NEXT: ret
+  %a = load <64 x i8>, <64 x i8>* %in
+  %b = add <64 x i8> %a, %a
+  %c = sext <64 x i8> %b to <64 x i16>
+  store <64 x i16> %c, <64 x i16>* %out
+  ret void
+}
+
+define void @sext_v128i8_v128i16(<128 x i8>* %in, <128 x i16>* %out) #0 {
+; CHECK-LABEL: sext_v128i8_v128i16:
+; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
+; VBITS_GE_2048-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
+; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].h, vl128
+; VBITS_GE_2048-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
+; VBITS_GE_2048-NEXT: ret
+  %a = load <128 x i8>, <128 x i8>* %in
+  %b = add <128 x i8> %a,

[PATCH] D71767: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 2/2].

2020-08-10 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

@cameron.mcinally this is the patch I mentioned the other day, which contains 
the nodes where once I've written suitable tests I'll push separate patches 
for.  Anything else is fair game.  This patch implements VSELECT but that was 
just to investigate what we talked about during the previous sync call so I'll 
ignore it if you're planning to push on with your original work?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71767/new/

https://reviews.llvm.org/D71767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71767: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 2/2].

2020-08-10 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D71767#2207158 , @cameron.mcinally 
wrote:

> In D71767#2206947 , @paulwalker-arm 
> wrote:
>
>> @cameron.mcinally this is the patch I mentioned the other day, which 
>> contains the nodes where once I've written suitable tests I'll push separate 
>> patches for.
>
> Thanks, Paul. You mentioned that you would be focusing on another project for 
> a few weeks. Would it help if I attempted to cherry-pick some of this Diff 
> into individual patches (with new tests) for you? Or would I be stepping on 
> your toes too much?

That would be great, thanks.  I already have patches up for the extends and am 
currently focusing on setcc, sub and the shifts, which leaves min/max and 
divides.  That said, one area I've not looked at yet are the VECREDUCE_ nodes.  
I don't anticipate them to be that problematic but having proof of this would 
be nice.  Let me know what you decide.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71767/new/

https://reviews.llvm.org/D71767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71767: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 2/2].

2020-08-13 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 285336.
paulwalker-arm added a comment.

rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71767/new/

https://reviews.llvm.org/D71767

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h

Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -878,6 +878,7 @@
   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1110,13 +1110,24 @@
   setOperationAction(ISD::SHL, VT, Custom);
   setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+  setOperationAction(ISD::SMAX, VT, Custom);
+  setOperationAction(ISD::SMIN, VT, Custom);
   setOperationAction(ISD::SRA, VT, Custom);
   setOperationAction(ISD::SRL, VT, Custom);
   setOperationAction(ISD::STORE, VT, Custom);
   setOperationAction(ISD::SUB, VT, Custom);
   setOperationAction(ISD::TRUNCATE, VT, Custom);
+  setOperationAction(ISD::UMAX, VT, Custom);
+  setOperationAction(ISD::UMIN, VT, Custom);
+  setOperationAction(ISD::VSELECT, VT, Custom);
   setOperationAction(ISD::XOR, VT, Custom);
   setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+
+  if (VT.getVectorElementType() == MVT::i32 ||
+  VT.getVectorElementType() == MVT::i64) {
+setOperationAction(ISD::SDIV, VT, Custom);
+setOperationAction(ISD::UDIV, VT, Custom);
+  }
 }
 
 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -3710,6 +3721,8 @@
 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
   case ISD::FMINNUM:
 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
+  case ISD::VSELECT:
+return LowerVSELECT(Op, DAG);
   }
 }
 
@@ -15491,3 +15504,24 @@
   auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
   return convertFromScalableVector(DAG, Op.getValueType(), Promote);
 }
+
+SDValue AArch64TargetLowering::LowerVSELECT(SDValue Op,
+SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+
+  EVT InVT = Op.getOperand(1).getValueType();
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+  auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
+  auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(2));
+
+  // Convert the mask to a predicated (NOTE: We don't need to worry about
+  // inactive lanes since VSELECT is safe when given undefined elements).
+  EVT MaskVT = Op.getOperand(0).getValueType();
+  EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
+  auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
+  Mask = DAG.getNode(ISD::TRUNCATE, DL,
+ MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
+
+  auto VSel = DAG.getNode(ISD::VSELECT, DL, ContainerVT, Mask, Op1, Op2);
+  return convertFromScalableVector(DAG, InVT, VSel);
+}
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20577,7 +20577,8 @@
 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
-N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+N0Src.getValueType().isFixedLengthVector() &&
+N1Src.getValueType().isFixedLengthVector()) {
   EVT NewVT;
   SDLoc DL(N);
   SDValue NewIdx;
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1724,9 +1724,17 @@
 StringRef Val = A->getValue();
 const Driver &D = getToolChain().getDriver();
 if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
-Val.equals("1024") || Val.equals("2048"))
+Val.equals("1024") || Val.equals("2048")) {
   CmdArgs.push_back(
   Args.MakeArgString(llvm::Twine("-msve-

[PATCH] D122983: [C11/C2x] Change the behavior of the implicit function declaration warning

2022-04-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Please consider this "AArch64 folks speaking up".  What are your plans here 
exactly? I have no issue with adding `-std=c99` to the RUN lines, but "remove 
the // expected-warning comments" sounds like a significant loss of test 
coverage.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122983/new/

https://reviews.llvm.org/D122983

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D122983: [C11/C2x] Change the behavior of the implicit function declaration warning

2022-04-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

The tests verify a set of builtins do not exist when the associated feature 
flag is not present.  They sit within CodeGen because the tests were plentiful 
and it did not seem worth duplicating them.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122983/new/

https://reviews.llvm.org/D122983

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D122983: [C11/C2x] Change the behavior of the implicit function declaration warning

2022-04-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Thanks for this.  I can see about cleaning up the tests but I'm still not sure 
what the advantage is.  The affected RUN lines are already `-fsyntax-only` 
tests.  Is it about where the test files live or is there something else I 
should be considering?  The benefit of the current tests is that it's easy to 
spot holes as a single function tests both requirements.  My fear is that 
separating the tests based on Sema/CodeGen could mean we'll miss something and 
never know.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122983/new/

https://reviews.llvm.org/D122983

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D122983: [C11/C2x] Change the behavior of the implicit function declaration warning

2022-04-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Ok, message understood.  I'll try and get these cleaned up, I cannot say 
precisely when but will push for before we branch for 15 if that sounds 
sensible.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122983/new/

https://reviews.llvm.org/D122983

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D131580: [clang][SVE] Undefine preprocessor macro defined in

2022-08-10 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/utils/TableGen/SveEmitter.cpp:1285
   OS << "#endif\n\n";
+  OS << "#undef __ai\n\n";
   OS << "#endif /*__ARM_FEATURE_SVE */\n\n";

Can you also do this for `__aio`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131580/new/

https://reviews.llvm.org/D131580

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-07-19 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/lib/CodeGen/CodeGenFunction.cpp:505-506
+  } else if (getContext().getTargetInfo().hasFeature("sve")) {
+CurFn->addFnAttr(
+llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), 0, 16));
   }

bsmith wrote:
> Is this really what we want? Won't this enable fixed length codegen all of 
> the time?
Fixed length codegen is tied to the minimum `vscale` value, so by using `0` 
here means nothing is known about the minimum `vscale` and thus fixed length 
codegen will be restricted to 128bit as is the case when no attribute is 
specified.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106333: [AArch64][SVE] Handle svbool_t VLST <-> VLAT/GNUT conversion

2021-07-20 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D106333#2889168 , @junparser wrote:

> @efriedma with this patch,  all of conversion between VLST and VLAT should 
> have same vector size(getElementType() * getElementCount()). The regression 
> in D105097  will be fixed by using bitcast 
> + vector.insert/extract directly

I hope I've not got the wrong end of the stick here but the above is our 
intention.  As in, Arm is looking at replacing the "via memory predicate 
casting" with a method that uses vector_of_i8s vector insert/extract with the 
necessary bitcasting.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106333/new/

https://reviews.llvm.org/D106333

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-07-20 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp:119-131
-Optional RISCVTTIImpl::getMaxVScale() const {
-  // There is no assumption of the maximum vector length in V specification.
-  // We use the value specified by users as the maximum vector length.
-  // This function will use the assumed maximum vector length to get the
-  // maximum vscale for LoopVectorizer.
-  // If users do not specify the maximum vector length, we have no way to
-  // know whether the LoopVectorizer is safe to do or not.

bsmith wrote:
> I'm not sure that RISCV have made a commitment to use the vscale_range 
> attribute yet have they? In either case I think they should be involved in a 
> change like this.
Perhaps it's worth this patch not removing getMaxVScale just yet but rather 
just AArch64's implementation?  There would only need to be a minor change to 
LoopVectorize.cpp along the lines of `if (!MaxVScale && 
TheFunction->hasFnAttribute(Attribute::VScaleRange...`.  That way getMaxVScale 
can be removed if/when no one needs it, which I hope is not too far away.

If there is agreement to remove it then I imagine code similar to what you've 
done for SVE in CodeGenFunction.cpp will be needed for RISCV otherwise the 
patch will cause a regression in functionality.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106333: [AArch64][SVE] Handle svbool_t VLST <-> VLAT/GNUT conversion

2021-07-21 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/lib/AST/ASTContext.cpp:8677
+  return Ty->getKind() == BuiltinType::SveBool
+ ? Context.getLangOpts().ArmSveVectorBits / Context.getCharWidth()
+ : Context.getLangOpts().ArmSveVectorBits;

Out of interest is this indirection necessary? I mean we know sve predicates 
are exactly an eighth the size of sve vectors so why not just use `8`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106333/new/

https://reviews.llvm.org/D106333

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-07-29 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

@craig.topper can you share RISCV plans around supporting vscale_range?  In 
essence we'd like to deprecate the TTI method and have LLVM IR contain all 
relevant information when is comes to interpreting vscale.

Currently the usage is minimal and so checking both interfaces is not too bad 
but they'll come a point when there's no TTI available and then only the side 
supporting vscale_range can be considered.  There's also the LTO side of things 
where relying on opt/llc flags to set register widths becomes fragile.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D121829: [clang][AArc64][SVE] Implement vector-scalar operators

2022-03-16 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

It looks like the issue you fixed for the integer tests also need fixing for 
the floating point variants.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121829/new/

https://reviews.llvm.org/D121829

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D123605: Work in progress: [Sema][SVE] Move sema testing for SVE2-AES ACLE builtins

2022-04-12 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/test/Sema/aarch64-acle-sve2-aes.c:1
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 
-fallow-half-arguments-and-returns -fsyntax-only -std=c99 -verify 
-verify-ignore-unexpected=error %s
+

Relying on the `implicit declaration...` warning is potentially problematic and 
what triggered this work in the first place.  Can you make the new Sema tests 
c++ ones instead.  Doing this means we can catch errors like `error: use of 
undeclared identifier 'svaesd_u8'` instead of a c99 warning.



Comment at: clang/test/Sema/aarch64-acle-sve2-aes.c:5-9
+svuint8_t test_svaesd_u8(svuint8_t op1, svuint8_t op2)
+{
+  // expected-warning@+1 {{implicit declaration of function 'svaesd_u8'}}
+  return svaesd_u8(op1, op2);
+}

The original versions of these tests used the `SVE_ACLE_FUNC` macro so that the 
overloaded builtin names are also protected.  Can you do likewise for the new 
tests?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123605/new/

https://reviews.llvm.org/D123605

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D123300: [Clang] Enable opaque pointers by default

2022-04-13 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

We've spotted some breakages caused by this patch within the llvm test suite 
when built for AArch64-SVE.  I've got https://reviews.llvm.org/D123670 as a WIP 
fix.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123300/new/

https://reviews.llvm.org/D123300

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D119130: [clangd] NFC: Move stdlib headers handling to Clang

2022-02-09 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

After this commit I am seeing the link time error

  ld.lld: error: undefined symbol: clang::DeclContext::isInlineNamespace() const
  >>> referenced by StandardLibrary.cpp:118 
(/home/pmw/projects/upstream-llvm/llvm-project/clang/lib/Tooling/Inclusions/StandardLibrary.cpp:118)
  >>>   
tools/clang/lib/Tooling/Inclusions/CMakeFiles/obj.clangToolingInclusions.dir/StandardLibrary.cpp.o:(clang::tooling::stdlib::Recognizer::namespaceSymbols(clang::NamespaceDecl
 const*)::'lambda'()::operator()() const)
  
  ld.lld: error: undefined symbol: 
clang::Decl::castFromDeclContext(clang::DeclContext const*)
  >>> referenced by DeclBase.h:2562 
(/home/pmw/projects/upstream-llvm/llvm-project/clang/include/clang/AST/DeclBase.h:2562)
  >>>   
tools/clang/lib/Tooling/Inclusions/CMakeFiles/obj.clangToolingInclusions.dir/StandardLibrary.cpp.o:(clang::cast_convert_decl_context::doit(clang::DeclContext*))
  >>> referenced by DeclBase.h:2558 
(/home/pmw/projects/upstream-llvm/llvm-project/clang/include/clang/AST/DeclBase.h:2558)
  >>>   
tools/clang/lib/Tooling/Inclusions/CMakeFiles/obj.clangToolingInclusions.dir/StandardLibrary.cpp.o:(clang::cast_convert_decl_context::doit(clang::DeclContext const*))

If I revert this commit it goes away.  I've tried a fresh cmake and build and 
get the same result.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119130/new/

https://reviews.llvm.org/D119130

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D119130: [clangd] NFC: Move stdlib headers handling to Clang

2022-02-09 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Yep, that did the trick. Many Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119130/new/

https://reviews.llvm.org/D119130

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D119319: [AArch64] Emit TBAA metadata for SVE load/store intrinsics

2022-02-10 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added a comment.
This revision is now accepted and ready to land.

I cannot say I fully understand all the connotations of this change but my gut 
feeling is that if failures occur it's likely the input program is malformed.  
Either way, we're early in the LLVM 15 development cycle so have plenty of time 
to react if necessary.




Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp:888
   if (isAllActivePredicate(Pred)) {
-Builder.CreateStore(VecOp, VecPtr);
+auto *Store = Builder.CreateStore(VecOp, VecPtr);
+Store->copyMetadata(II);

If I'm nit picking you use `auto` here but then choose the explicit return type 
(i.e. `CallInst`) for the MaskedStore local. Is there a reason for this or just 
muscle memory?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119319/new/

https://reviews.llvm.org/D119319

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D119926: [Clang][AArch64] Enable _Float16 _Complex type

2022-02-16 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/include/clang/AST/ASTContext.h:1117
   CanQualType BFloat16Ty;
-  CanQualType Float16Ty; // C11 extension ISO/IEC TS 18661-3
+  CanQualType Float16Ty, Float16ComplexTy; // C11 extension ISO/IEC TS 18661-3
   CanQualType VoidPtrTy, NullPtrTy;

Is this necessary? I don't see any other `*ComplexTy` types in this file (i.e. 
there is no `DoubleComplexTy`).



Comment at: clang/lib/AST/ASTContext.cpp:1330
   InitBuiltinType(Float16Ty,   BuiltinType::Float16);
+  InitBuiltinType(Float16ComplexTy,BuiltinType::Float16);
 

As above.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119926/new/

https://reviews.llvm.org/D119926

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D123605: [WIP][Sema][SVE] Move/simplify Sema testing for SVE ACLE builtins

2022-04-27 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_n.cpp:25
+{
+  // expected-error-re@+1 3 {{argument value {{[0-9]+}} is outside the valid 
range [0, 7]}}
+  EXPAND_XZM_FUNC(SVE_ACLE_FUNC(svqshlu,_n_s8,,), pg, svundef_s8(), -1);

I've not seen this before, presumably it's short hand instead of needing to 
repeat multiple identical `expected-error` check lines?  If so, is it worth 
using this throughout the test files and essentially only require one 
`expected-error` per function or does this only work here because the 
`EXPAND...` macro emits its three function calls on the same line?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123605/new/

https://reviews.llvm.org/D123605

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D123605: [WIP][Sema][SVE] Move/simplify Sema testing for SVE ACLE builtins

2022-04-28 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_n.cpp:25
+{
+  // expected-error-re@+1 3 {{argument value {{[0-9]+}} is outside the valid 
range [0, 7]}}
+  EXPAND_XZM_FUNC(SVE_ACLE_FUNC(svqshlu,_n_s8,,), pg, svundef_s8(), -1);

RosieSumpter wrote:
> paulwalker-arm wrote:
> > I've not seen this before, presumably it's short hand instead of needing to 
> > repeat multiple identical `expected-error` check lines?  If so, is it worth 
> > using this throughout the test files and essentially only require one 
> > `expected-error` per function or does this only work here because the 
> > `EXPAND...` macro emits its three function calls on the same line?
> Yes it lets you specify how many times you expect the diagnostic to appear, 
> but as you said it only works when the diagnostics are emitted on the same 
> line so I'm not sure there's a way to reduce the number of `expected-error` 
> lines any more than this
OK, thanks for checking.  To be honest I'm not sure why we need the 
`EXPAND_XZM_FUNC` macro given `SVE_ACLE_FUNC` worked fine before.  To my eye it 
kind of ruins the flow, but hey-ho I'll not worry about it.

Assuming I've not screwed up I think you're missing tests for 
`SVE_ACLE_FUNC(svrshrnb,_n_s16,,)` and `SVE_ACLE_FUNC(svrshrnt,_n_s16,,)`.



Comment at: 
clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_imm_rotation.cpp:17-40
+  // expected-error@+1 {{argument should be the value 90 or 270}}
+  SVE_ACLE_FUNC(svcadd,_s8,,)(svundef_s8(), svundef_s8(), 0);
+  // expected-error@+1 {{argument should be the value 90 or 270}}
+  SVE_ACLE_FUNC(svcadd,_u8,,)(svundef_u8(), svundef_u8(), 0);
+  // expected-error@+1 {{argument should be the value 90 or 270}}
+  SVE_ACLE_FUNC(svcadd,_s16,,)(svundef_s16(), svundef_s16(), 0);
+  // expected-error@+1 {{argument should be the value 90 or 270}}

I know we cannot test every number but `180` seems like a reasonable mistake 
for people to make given the other complex number instructions so perhaps 
alternate between `0` and `180` to give a little more coverage without 
increasing the number of lines.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D123605/new/

https://reviews.llvm.org/D123605

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124998: [AArch64][SVE] Add aarch64_sve_pcs attribute to Clang

2022-05-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Just wanted to say this is not a new calling convention as such, but rather an 
existing one that is generally auto-detected based on function signature.  The 
problem we're trying to solve here is that we need a way to allow a user to 
force the calling convention when the function signature would not normally 
choose it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124998/new/

https://reviews.llvm.org/D124998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124998: [AArch64][SVE] Add aarch64_sve_pcs attribute to Clang

2022-05-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D124998#3494127 , @aaron.ballman 
wrote:

> In D124998#3493791 , 
> @paulwalker-arm wrote:
>
>> Just wanted to say this is not a new calling convention as such, but rather 
>> an existing one that is generally auto-detected based on function signature. 
>>  The problem we're trying to solve here is that we need a way to allow a 
>> user to force the calling convention when the function signature would not 
>> normally choose it.
>
> Thanks for this information! It's still not clear to me whether there's 
> sufficient need for this extension. From this description, it sounds like 
> this will be rarely used because it's only necessary in one-off situations. 
> If that's correct, can those users make use of inline assembly instead of a 
> devoted named calling convention?

It's hard to say how often this will be used but when used it will be 
fundamental to performance. I don't see inline assembly as a workable solution. 
It presents an unreasonable burden on a framework's user and will impact 
compiler optimisations.  The ACLE exists to stop developers from needing to use 
inline assembly.

You have to forgive my naivety here but some the calling conventions are target 
specific.  Is it possible for them to safely alias for the parts of the code 
where memory is constrained?  Or to put it another way, do the X86 and AArch64 
calling conventions need to be uniquely identifiable?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124998/new/

https://reviews.llvm.org/D124998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D124998: [AArch64][SVE] Add aarch64_sve_pcs attribute to Clang

2022-05-10 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

@aaron.ballman It looks like the conversation reached a conclusion?  Given this 
is separate to what we're trying to add here can this patch be unblocked?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124998/new/

https://reviews.llvm.org/D124998

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-21 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/include/llvm/Support/TypeSize.h:56
 
+  friend bool operator>(const ElementCount &LHS, const ElementCount &RHS) {
+assert(LHS.Scalable == RHS.Scalable &&

david-arm wrote:
> ctetreau wrote:
> > fpetrogalli wrote:
> > > I think that @ctetreau is right on 
> > > https://reviews.llvm.org/D85794#inline-793909. We should not overload a 
> > > comparison operator on this class because the set it represent it cannot 
> > > be ordered.
> > > 
> > > Chris suggests an approach of writing a static function that can be used 
> > > as a comparison operator,  so that we can make it explicit of what kind 
> > > of comparison we  are doing. 
> > In C++, it's common to overload the comparison operators for the purposes 
> > of being able to std::sort and use ordered sets. Normally, I would be OK 
> > with such usages. However, since `ElementCount` is basically a numeric 
> > type, and they only have a partial ordering, I think this is dangerous. I'm 
> > concerned that this will result in more bugs whereby somebody didn't 
> > remember that vectors can be scalable.
> > 
> > I don't have a strong opinion what the comparator function should be 
> > called, but I strongly prefer that it not be a comparison operator.
> Hi @ctetreau, yeah I understand. The reason I chose to use operators was 
> simply to be consistent with what we have already in TypeSize. Also, we have 
> existing "==" and "!=" operators in ElementCount too, although these are 
> essentially testing that two ElementCounts are identically the same or not, 
> i.e. for 2 given polynomials (a + bx) and (c + dx) we're essentially asking 
> if both a==c and b==d.
> 
> If I introduce a new comparison function, I'll probably keep the asserts in 
> for now, but in general we can do better than simply asserting if something 
> is scalable or not. For example, we know that (vscale * 4) is definitely >= 4 
> because vscale is at least 1. I'm just not sure if we have that need yet.
I think we should treat the non-equality comparison functions more like 
floating point.  What we don't want is somebody writing !GreaterThan when they 
actually mean LessThan.

Perhaps we should name the functions accordingly (i.e. ogt for 
OrderedAndGreaterThan).  We will also need matching less than functions since I 
can see those being useful when analysing constant insert/extract element 
indices which stand a good chance to be a known comparison (with 0 being the 
most common index).



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-27 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/include/llvm/Support/TypeSize.h:108
+
+  bool isPowerOf2() const { return isPowerOf2_32(Min); }
 };

I don't believe this is safe.  For example we know SVE supported vector lengths 
only have to be a multiple of 128bits.  So for scalable vectors we cannot know 
the element count is a power of 2 unless we perform a runtime check.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-27 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

I cannot say whether such questions make sense without a deeper investigation, 
but I can say for certain that EC.isPowerOf2 is a question we cannot answer at 
compile time.  Given this is a mechanical change I would just remove the member 
function and leave the code as is (well change EC.Min to 
EC.getKnownMinValue()). We already know that we'll need to visit the places 
where getKnownMinValue() is used to ensure the question makes sense in the face 
of scalable vectors.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-27 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added a comment.
This revision is now accepted and ready to land.

There's probably a few .Min to .getKnownMinValue() conversions where the .Min 
could be dropped (calls to Builder.CreateVectorSplat for example) but they can 
be tidied up as part of a proper activity to reduce the places where 
getKnownMinValue is called.  So other than my suggested updated to 
EC::operator/ the patch looks good to my eye.  Please give other reviewers a 
little more time to provide other insights.




Comment at: llvm/include/llvm/Support/TypeSize.h:66
+
+  ElementCount &operator/=(unsigned RHS) {
+Min /= RHS;

If you add an assert that the divide is lossless (i.e. MIN % RHS == 0) then 
asserts like:
```
assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
```
are no longer required.  Plus those places which are not checking for lossless 
division will be automatically protected.  This feels like a sensible default 
to me.  If somebody wants a truncated result, they can do the maths using 
getKnownMinValue().


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-28 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Can't say I agree since people are already writing the ugly code, because the 
result typically demands different handling or they're asserting the divide 
doesn't truncate in the first place.  That said I'm happy for there to be no 
assert as long as operator% is implemented so users can calculate the remainder 
in the expected way.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-28 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

I'm retracting my operator% request. After thinking about it and speaking with 
Dave I just cannot see how allowing a total divide is safe for scalable 
vectors.  If you are relying on a truncating divide then special handling is 
require anyway, which is likely to be different between fixed-length and 
scalable vectors.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86065: [SVE] Make ElementCount members private

2020-08-28 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

To be more clear, I'm happy to defer the divide conversation for if/when we run 
into issues so my previous acceptance still stands.  It'll be good to get the 
intent of the patch in (i.e. stoping access to internal class members) asap, 
plus any follow up work will be a smaller more manageable patch.  It's worth 
talking this through during the next sync call to see it we can get some 
consensus regarding what maths is and isn't allowed.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86065/new/

https://reviews.llvm.org/D86065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71760: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 1/2].

2020-09-02 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm abandoned this revision.
paulwalker-arm added a comment.

The intention of this patch is now complete.  All work is available in master 
with the exception of the hook into -msve-vector-bits which is not necessarily 
the direction we'll use once function attributes are available.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71760/new/

https://reviews.llvm.org/D71760

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D71767: [POC][SVE] Allow code generation for fixed length vectorised loops [Patch 2/2].

2020-09-02 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm abandoned this revision.
paulwalker-arm added a comment.
Herald added a subscriber: ecnelises.

With the exception of VSELECT lowering, which is being worked under D85364 
, everything else is available in master.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71767/new/

https://reviews.llvm.org/D71767

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89031: [SVE] Add support to vectorize_width loop pragma for scalable vectors

2020-11-18 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

As I see it there are a bunch of pragmas that all enable vectorisation, with 
each pragma providing a unit of information.  One component of this information 
is the vectorisation factor hint provided by vectorize_width.

With the introduction of scalable vectors this hint is using the wrong datatype 
and thus needs to be updated to allow `vectorize_width(#num,[fixed|scalable])` 
and `vectorize_width([fixed|scalable])` along side the existing 
`vectorize_width(#num)` representation that effectively becomes an alias to 
`vectorize_width(#num, fixed)`.

Doing this means all existing usages work as expected and there's now extra 
power to better guide the chosen vectorisation factor.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89031/new/

https://reviews.llvm.org/D89031

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D101986: [InstSimplify] Remove redundant {insert,extract}_vector intrinsic chains

2021-05-18 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Analysis/InstructionSimplify.cpp:5741-5747
+// (insert_vector _, (extract_vector X, 0), 0) -> X
+unsigned IdxN = cast(Idx)->getZExtValue();
+Value *X = nullptr;
+if (match(SubVec, m_Intrinsic(
+  m_Value(X), m_Zero())) &&
+IdxN == 0 && X->getType() == ReturnType)
+  return X;

Are you sure this transform is correct?  I would have thought the following are 
safe:
```
(insert_vector X, (extract_vector X, 0), 0) -> X
(insert_vector undef, (extract_vector X, 0), 0) -> X
```
when `X->getType() == ReturnType` but nothing else.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101986/new/

https://reviews.llvm.org/D101986

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D110258: [AArch64][Clang] Always add -tune-cpu argument to -cc1 driver

2021-09-23 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:1850
+  else
+TuneCPU = "generic";
+} else

What benefit does `-tune-cpu generic` provide?

I'm wondering if the patch can be restricted to only add `-tune-cpu` when a 
`-mtune=` is specified with a real name or a detected name for when "native" is 
specified.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:1857-1860
+  else if (!Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) {
+CmdArgs.push_back("-tune-cpu");
+CmdArgs.push_back("generic");
+  }

See me comment above.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110258/new/

https://reviews.llvm.org/D110258

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96852: [clang][SVE] Remove inline keyword from arm_sve.h

2021-02-17 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D96852#2568383 , @joechrisellis 
wrote:

> Speaking to @DavidTruby about this, it appears that this fix is insufficient 
> -- `inline` has important semantic meaning in C++ that means that we can't 
> simply omit the keyword here.
>
> The `inline` keyword bypasses the one-definition rule. If we have a function 
> defined in a header that isn't marked `inline`, and you include that header 
> in two different source files, then your program is ill formed because it 
> contains 2 definitions of that function. So we have to keep it for C++.

That makes sense and suggests we're missing some additional C++ testing?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96852/new/

https://reviews.llvm.org/D96852

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96852: [clang][SVE] Remove inline keyword from arm_sve.h

2021-02-17 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

One observation is that for arm_neon.h `__inline__` is used.  So perhaps we can 
just do likewise and we'll also be consistent across the two ACLE headers.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D96852/new/

https://reviews.llvm.org/D96852

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-14 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64Subtarget.h:298-299
+   bool LittleEndian,
+   unsigned MinSVEVectorSizeInBitsOverride = 0,
+   unsigned MaxSVEVectorSizeInBitsOverride = 0);
 

Out of interest are these defaults ever relied upon?



Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:380-386
+  assert(MinSVEVectorSize % 128 == 0 &&
+ "SVE requires vector length in multiples of 128!");
+  assert(MaxSVEVectorSize % 128 == 0 &&
+ "SVE requires vector length in multiples of 128!");
+  assert((MaxSVEVectorSize >= MinSVEVectorSize ||
+  MaxSVEVectorSize == 0) &&
+ "Minimum SVE vector size should not be larger than its maximum!");

These asserts are fine but you'll see from the original implementations of 
`getM..SVEVectorSizeInBits` that I do not rely on the user passing the correct 
values.  Instead I also always process the sizes to ensure the values of 
`MinSVEVectorSize` and `MaxSVEVectorSize`are sane.  Can you do likewise here?



Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:357
+  Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
+  if (VScaleRangeAttr.isValid())
+std::tie(MinSVEVectorSize, MaxSVEVectorSize) =

bsmith wrote:
> paulwalker-arm wrote:
> > I don't know if this is possible but I feel we need a `HasSVE` like check 
> > here?
> I'm not sure this is really doable here without picking apart the feature 
> string, I think it makes more sense to just set the values and assert when 
> using the accessors without SVE enabled.
Fair enough.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-18 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64Subtarget.cpp:350-352
 unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const {
   assert(HasSVE && "Tried to get SVE vector length without SVE support!");
+  return MaxSVEVectorSizeInBits;

Up to you but now these are simple accessors, is it worth having the 
implementations inlined into the header?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-08-03 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h:134
+  unsigned getMaxNumElements(ElementCount VF,
+ const Instruction *I = nullptr) const {
 if (!VF.isScalable())

Can this parameter be a `Function*`? given there's no real link between this 
function and LLVM Instructions.





Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h:142-143
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+}

This can return `0` implying there is no know maximum.  With the current code 
this means `0` will be returned instead of a sensible default.



Comment at: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:5620
+TheFunction->getFnAttribute(Attribute::VScaleRange);
+MaxVScale = VScaleRangeAttr.getVScaleRangeArgs().second;
+  }

I think you only want to set `MaxVScale` when 
`VScaleRangeAttr.getVScaleRangeArgs().second` is non-zero.

Given this and the above similar comment perhaps there's need for extra tests 
that cover `vscale_range(2,0)` for example.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-08-13 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Please remember to create a more representative commit message as the patch no 
longer removes getMaxVScale.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112406: [Driver][AArch64]Add driver support for neoverse-512tvb target

2021-10-26 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D112406#3087191 , @dmgreen wrote:

> Thanks. If the cpu has a 512 bit total vector bandwidth, should the 
> VScaleForTuning be 1 or 2 (or higher)? llvm doesn't usually deal with total 
> bandwidth a lot, perhaps not as much as it should.
>
> @david-arm any thoughts?

The total vector bandwidth includes unrolling so currently having 
`VScaleForTuning=1` and `MaxInterleaveFactor=4` implies 512 tvb.  If the target 
has >128bit vectors then vector loops will likely have more work than they can 
handle in parallel but as long as that does not negatively affect register 
pressure it shouldn't be a problem.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112406/new/

https://reviews.llvm.org/D112406

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/docs/LangRef.rst:1998
+function. A value of 0 means unbounded. If the optional max value is 
omitted
+then max is set to the value of min.
 

sdesmalen wrote:
> Do you need to say anything about a default if the attribute is not set, e.g. 
> if `vscale_range` is not set, no assumptions are made about the range of 
> vscale and the compiler falls back on the architectural maximum (if 
> available).
When the attribute is omitted there is no implicit knowledge and thus I'd stop 
at "... no assumptions are made about the range of vscale."

I would have though such a condition is implicit across most attributes (i.e. 
if omitted then no extra information is available) but I guess it cannot hurt 
to be explicit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98030/new/

https://reviews.llvm.org/D98030

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D98487#2625673 , @bsmith wrote:

>> Why is this patch only changing int_aarch64_neon_frintn and not 
>> int_aarch64_sve_frintn?
>> Is there a particular reason to do so?
>
> Things are done slightly differently for SVE in this regard, in principal 
> yes, we could emit roundeven instead of frintn from the ACLE intrinsic, 
> however all of the other ACLE intrinsics also emit SVE specific LLVM 
> intrinsics rather than the arch-indep nodes. This patch doesn't change that 
> in order to stay consistent, if we did want to change that it should be done 
> as a separate patch that changes all of them.

@CarolineConcatto There are two levels at play here.  At the top level 
(C->LLVM) the SVE ACLE cannot use the roundeven intrinsic because that 
operation takes a single data operand whereas for SVE the operation is 
predicated and thus also requires predicate and passthru operands (i.e. the two 
intrinsics are doing different things).  At the bottom level (CodeGen) we 
already lower scalable vector variants of both intrinsics to 
ISD::FROUNDEVEN_MERGE_PASSTHRU which is the "masked" version of ISD::FROUNDEVEN.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c:10-14
+// CHECK-128: attributes #0 = { {{.*}} vscale_range(1,1) {{.*}} }
+// CHECK-256: attributes #0 = { {{.*}} vscale_range(2,2) {{.*}} }
+// CHECK-512: attributes #0 = { {{.*}} vscale_range(4,4) {{.*}} }
+// CHECK-1024: attributes #0 = { {{.*}} vscale_range(8,8) {{.*}} }
+// CHECK-2048: attributes #0 = { {{.*}} vscale_range(16,16) {{.*}} }

I'm happy with this but for information you could use the same trick as for the 
`llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll` so that only a 
single CHECK, and CHECK-NOT line is required.



Comment at: llvm/include/llvm/IR/Attributes.h:944
 
+  /// Add an vscale_range attribute, using the representation returned by
+  /// Attribute.getIntValue().

a



Comment at: llvm/test/Verifier/vscale_range.ll:3-4
+
+; CHECK-NOT: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @a(i32) vscale_range(1, 0)
+

I think attributes.ll is a better place for this test, along with a CHECK for 
it's expected output.

Kind of related, what is expected for `vscale_range(0,0)` and does that need a 
test (positive or negative)?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98030/new/

https://reviews.llvm.org/D98030

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113779: [Clang] Add mfp16, mfp16fml and mdotprod flags for ARM target features.

2021-11-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Rather than adding connivence options after the fact what about allowing 
`-march=` to be specified multiple times? The first must be the usual format 
with later ones required to start with `+`.  The defined parsing behaviour 
would be as if there was a single `-march` instance positioned at the first 
occurrence but containing the value of all instances when combined from left to 
right.  For example `-march=armv8.4-a .. march=+nofp16` or perhaps `+=` 
syntax like  `-march=armv8.4-a .. march+=nofp16+nosve` is more intuitive?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113779/new/

https://reviews.llvm.org/D113779

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113489: [AArch64][SVE] Instcombine SVE LD1/ST1 to stock LLVM IR

2021-11-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added a comment.
This revision is now accepted and ready to land.

There's an issue with the a `Value*` being named `VecPtrTy` but otherwise this 
looks good to me.

I'll leave it up to you to decide whether it's worth breaking out the usage of 
update_cc_test_checks.py into a separate patch.  Normally this is a good thing 
but given the patch is ready to go and I imagine you wouldn't submit the "use 
update_cc_test_checks.py" patch for review I'm not sure it's worth the effort.




Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp:736
+  Type *VecTy = II.getType();
+  Value *VecPtrTy = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo());
+

The name here is wrong as this is not a type.  My guess is you meant `VecPtr`?



Comment at: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp:758
+  Value *PtrOp = II.getOperand(2);
+  Value *VecPtrTy =
+  Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());

As above, I suspect this name is not what you intended.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113489/new/

https://reviews.llvm.org/D113489

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D114713: [AArch64][SVE][NEON] Add NEON-SVE-Bridge intrinsics

2021-11-29 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:1325
   setOperationAction(ISD::MLOAD, VT, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
 }

Can you extract this into its own patch as it's really not relevant to the rest 
of the patch and is currently missing tests.  Presumably 
`llvm/test/CodeGen/AArch64/sve-insert-vector.ll` needs updating?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114713/new/

https://reviews.llvm.org/D114713

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D114713: [AArch64][SVE][NEON] Add NEON-SVE-Bridge intrinsics

2021-11-30 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp:1325
   setOperationAction(ISD::MLOAD, VT, Custom);
+  setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
 }

MattDevereau wrote:
> paulwalker-arm wrote:
> > Can you extract this into its own patch as it's really not relevant to the 
> > rest of the patch and is currently missing tests.  Presumably 
> > `llvm/test/CodeGen/AArch64/sve-insert-vector.ll` needs updating?
> i've been adding some tests to assert this block of code. i've got tests for 
> `insert(vscale x n x bfloat, n x bfloat, idx)` and `insert(vscale x n x 
> bfloat, vscale x n x bfloat, idx)`.
> the n = 4 and n = 8 tests are fine, but n = 2 for `insert(vscale x 2 x 
> bfloat, 2 x bfloat, idx)`  fails an assertion. i've had a quick poke around 
> but haven't seen an obvious reason why its failing, should I worry about this 
> and spend more time on it or just submit the tests i've already got for 
> `4bf16` and `8bf16`?
Obviously it would be nice for all combinations to work but that's not 
something you have to fix if it's not directly affecting what you need.

I've checked and it seems `2 x half` doesn't work out of the box either so it 
sounds reasonable to me for your new `bfloat` handling to mirror the existing 
supported `half` use cases only.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114713/new/

https://reviews.llvm.org/D114713

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113294: [IR] Remove unbounded as possible value for vscale_range minimum

2021-12-03 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm requested changes to this revision.
paulwalker-arm added inline comments.
This revision now requires changes to proceed.



Comment at: clang/lib/Basic/Targets/AArch64.cpp:476-484
+  assert(LangOpts.VScaleMin && "vscale min must be greater than 0!");
+
+  if (LangOpts.VScaleMax)
 return std::pair(LangOpts.VScaleMin,
  LangOpts.VScaleMax);
+
   if (hasFeature("sve"))

This looks like a change of behaviour to me.  Previously the command line flags 
would override the "sve" default but now that only happens when the user 
specifies a maximum value.  That means the interface can no longer be used to 
force truly width agnostic values.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113294/new/

https://reviews.llvm.org/D113294

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113294: [IR] Remove unbounded as possible value for vscale_range minimum

2021-12-03 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

I agree, it's the change to VScaleMin that has caused the issue.  If the 
LangOpts default can remain as 0 and you can still achieve what you're after 
then that would be perfect.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113294/new/

https://reviews.llvm.org/D113294

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113294: [IR] Remove unbounded as possible value for vscale_range minimum

2021-12-03 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/lib/Basic/Targets/AArch64.cpp:476-484
+  assert(LangOpts.VScaleMin && "vscale min must be greater than 0!");
+
+  if (LangOpts.VScaleMax)
 return std::pair(LangOpts.VScaleMin,
  LangOpts.VScaleMax);
+
   if (hasFeature("sve"))

c-rhodes wrote:
> paulwalker-arm wrote:
> > This looks like a change of behaviour to me.  Previously the command line 
> > flags would override the "sve" default but now that only happens when the 
> > user specifies a maximum value.  That means the interface can no longer be 
> > used to force truly width agnostic values.
> > This looks like a change of behaviour to me.  Previously the command line 
> > flags would override the "sve" default but now that only happens when the 
> > user specifies a maximum value.  That means the interface can no longer be 
> > used to force truly width agnostic values.
> 
> I think the issue here is the default of 1 for min would always trigger `if 
> (LangOpts.VScaleMin || LangOpts.VScaleMax)` overriding the SVE default. 
> Perhaps the default can be removed from the driver option and handled here, 
> i.e.
> 
> ```
> if (LangOpts.VScaleMin || LangOpts.VScaleMax)
> return std::pair(LangOpts.VScaleMin ? 
> LangOpts.VScaleMin : 1,
>  LangOpts.VScaleMax);
> ```
> 
> 
Is this enough?  I'm not sure it'll work because `LangOpts.VScaleMin` defaults 
to 1 and thus you'll always end up passing the first check, unless the user 
specifically uses `-mvscale-min=0` which they cannot because that'll result in 
`diag::err_cc1_unbounded_vscale_min`.

Do we need to link the LangOpt defaults to the attribute defaults?  I'm think 
that having the LangOpts default to zero is a good way to represent "value is 
unspecified" with regards to the `LangOpts.VScaleMin`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113294/new/

https://reviews.llvm.org/D113294

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D131580: [clang][SVE] Undefine preprocessor macro defined in

2022-08-12 Thread Paul Walker via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG48e1250a91d2: [clang][SVE] Undefine preprocessor macro 
defined in (authored by mgabka, committed by paulwalker-arm).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131580/new/

https://reviews.llvm.org/D131580

Files:
  clang/utils/TableGen/SveEmitter.cpp


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -1282,6 +1282,8 @@
   OS << "#ifdef __cplusplus\n";
   OS << "} // extern \"C\"\n";
   OS << "#endif\n\n";
+  OS << "#undef __ai\n\n";
+  OS << "#undef __aio\n\n";
   OS << "#endif /*__ARM_FEATURE_SVE */\n\n";
   OS << "#endif /* __ARM_SVE_H */\n";
 }


Index: clang/utils/TableGen/SveEmitter.cpp
===
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -1282,6 +1282,8 @@
   OS << "#ifdef __cplusplus\n";
   OS << "} // extern \"C\"\n";
   OS << "#endif\n\n";
+  OS << "#undef __ai\n\n";
+  OS << "#undef __aio\n\n";
   OS << "#endif /*__ARM_FEATURE_SVE */\n\n";
   OS << "#endif /* __ARM_SVE_H */\n";
 }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D134352: [AArch64] Add Neoverse V2 CPU support

2022-09-26 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:376
   them, which it cannot.
+- Add driver and tuning support for Neoverse V2 support via the flag
+  ``-mcpu=neoverse-v2``. Native detection is also supported via 
``-mcpu=native``.

`support for Neoverse V2 via the flag`?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134352/new/

https://reviews.llvm.org/D134352

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D140983: [IRBuilder] Use canonical i64 type for insertelement index used by vector splats.

2023-01-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added subscribers: frasercrmck, luismarques, apazos, sameer.abuasal, 
s.egerton, dmgreen, Jim, jocewei, PkmX, arphaman, the_o, brucehoult, 
MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, niosHD, sabuasal, 
simoncook, johnrusso, rbar, asb, kbarton, hiraditya, nemanjai.
Herald added a project: All.
paulwalker-arm requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, pcwang-thead, MaskRay.
Herald added projects: clang, LLVM.

Instcombine prefers this canonical form (see getPreferredVectorIndex),
as does IRBuilder when passing the index as an integer so we may as
well use the prefered form from creation.

NOTE: All test changes are mechanical with nothing else expected
beyond a change of index type from i32 to i64.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140983

Files:
  clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
  clang/test/CodeGen/SystemZ/zvector.c
  clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/arm-mve-intrinsics/compare.c
  clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
  clang/test/CodeGen/arm-mve-intrinsics/dup.c
  clang/test/CodeGen/arm-mve-intrinsics/ternary.c
  clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
  clang/test/CodeGen/matrix-type-operators-fast-math.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/vecshift.c
  clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
  clang/test/CodeGenCXX/matrix-type-operators.cpp
  clang/test/CodeGenCXX/vector-size-conditional.cpp
  clang/test/CodeGenCXX/vector-splat-conversion.cpp
  clang/test/CodeGenOpenCL/bool_cast.cl
  llvm/lib/IR/Constants.cpp
  llvm/lib/IR/IRBuilder.cpp
  llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
  llvm/test/CodeGen/Generic/expand-vp-load-store.ll
  llvm/test/CodeGen/Generic/expand-vp.ll
  llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
  llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
  llvm/test/Transforms/InstCombine/div.ll
  llvm/test/Transforms/InstCombine/fdiv.ll
  llvm/test/Transforms/InstCombine/fmul.ll
  llvm/test/Transforms/InstCombine/select.ll
  llvm/test/Transforms/InstCombine/sub.ll
  llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
  llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
  llvm/test/Transforms/InstSimplify/gep.ll
  llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/vscale.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
  llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
  llvm/test/Transfo

[PATCH] D140983: [IRBuilder] Use canonical i64 type for insertelement index used by vector splats.

2023-01-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 486284.
paulwalker-arm added a comment.
Herald added subscribers: Moerafaat, zero9178, bzcheeseman, awarzynski, 
sdasgup3, wenzhicui, wrengr, cota, teijeong, rdzhabarov, tatianashp, msifontes, 
jurahul, Kayjukh, grosul1, Joonsoo, stephenneuendorffer, liufengdb, aartbik, 
mgester, arpith-jacob, nicolasvasilache, antiagainst, shauheen, rriddle, 
mehdi_amini.
Herald added a reviewer: aartbik.
Herald added a reviewer: ftynse.
Herald added a reviewer: dcaballe.
Herald added a project: MLIR.

Update MLIR test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140983/new/

https://reviews.llvm.org/D140983

Files:
  clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
  clang/test/CodeGen/SystemZ/zvector.c
  clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/arm-mve-intrinsics/compare.c
  clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
  clang/test/CodeGen/arm-mve-intrinsics/dup.c
  clang/test/CodeGen/arm-mve-intrinsics/ternary.c
  clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
  clang/test/CodeGen/matrix-type-operators-fast-math.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/vecshift.c
  clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
  clang/test/CodeGenCXX/matrix-type-operators.cpp
  clang/test/CodeGenCXX/vector-size-conditional.cpp
  clang/test/CodeGenCXX/vector-splat-conversion.cpp
  clang/test/CodeGenOpenCL/bool_cast.cl
  llvm/lib/IR/Constants.cpp
  llvm/lib/IR/IRBuilder.cpp
  llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
  llvm/test/CodeGen/Generic/expand-vp-load-store.ll
  llvm/test/CodeGen/Generic/expand-vp.ll
  llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
  llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
  llvm/test/Transforms/InstCombine/div.ll
  llvm/test/Transforms/InstCombine/fdiv.ll
  llvm/test/Transforms/InstCombine/fmul.ll
  llvm/test/Transforms/InstCombine/select.ll
  llvm/test/Transforms/InstCombine/sub.ll
  llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
  llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
  llvm/test/Transforms/InstSimplify/gep.ll
  llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/vscale.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
  llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
  llvm/test/Transforms/

[PATCH] D140983: [IRBuilder] Use canonical i64 type for insertelement index used by vector splats.

2023-01-04 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 486332.
paulwalker-arm added a comment.
Herald added a reviewer: bollu.

Update poly tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140983/new/

https://reviews.llvm.org/D140983

Files:
  clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
  clang/test/CodeGen/SystemZ/zvector.c
  clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/arm-mve-intrinsics/compare.c
  clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
  clang/test/CodeGen/arm-mve-intrinsics/dup.c
  clang/test/CodeGen/arm-mve-intrinsics/ternary.c
  clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
  clang/test/CodeGen/matrix-type-operators-fast-math.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/vecshift.c
  clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
  clang/test/CodeGenCXX/matrix-type-operators.cpp
  clang/test/CodeGenCXX/vector-size-conditional.cpp
  clang/test/CodeGenCXX/vector-splat-conversion.cpp
  clang/test/CodeGenOpenCL/bool_cast.cl
  llvm/lib/IR/Constants.cpp
  llvm/lib/IR/IRBuilder.cpp
  llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
  llvm/test/CodeGen/Generic/expand-vp-load-store.ll
  llvm/test/CodeGen/Generic/expand-vp.ll
  llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
  llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
  llvm/test/Transforms/InstCombine/div.ll
  llvm/test/Transforms/InstCombine/fdiv.ll
  llvm/test/Transforms/InstCombine/fmul.ll
  llvm/test/Transforms/InstCombine/select.ll
  llvm/test/Transforms/InstCombine/sub.ll
  llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
  llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
  llvm/test/Transforms/InstSimplify/gep.ll
  llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/vscale.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
  llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
  llvm/test/Tra

[PATCH] D141056: [SVE][CGBuiltins] Remove need for instcombine from ACLE tests.

2023-01-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added subscribers: ctetreau, psnobl, arphaman, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
paulwalker-arm requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang.

The SVE builtins tests rely on optimisations to remove clutter from
the resulting IR that is not relevant to the tests. However, with
the increasing number of target intrinsic combines the clang tests
are moving further away from verifying what is relevant to clang.

During early SVE (or rather scalable vector) bringup, we chose to
mitigate bugs by minimising our usage of LLVM IR instructions then
later implemented the combines to lower the calls to generic IR once
scalable vector support had matured. With the mitigations no longer
required and the combines mostly trivial I have moved the logic into
CGBuiltins, which allows the existing tests to remain unchanged once
they stop using instcombine.

The optimisations include:

- Using shifts in place of multiplies by power-of-two values.
- Don't emit getelementptrs when offset is zero.
- Use IR based vector splats rather than calls to dup_x.
- Use IR based vector selects rather than calls to sel.
- Use i64 based indices for insertelement.

The test changes are the result of "sed -i -e 's/instcombine,//'",
with the exception of acle_sve_dupq.c which required regeneration
due to its previous reliance on a zext->tunc->zext combine.

The following tests still rely on instcombine because they require
changes beyond CGBuiltin.cpp:

  CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_len-bfloat.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_len.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_sel-bfloat.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
  CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c

Tests within aarch64-sve2-intrinsics don't use opt but instead use
-O1 to cleanup their output. These tests remain unchanged and will
be visited by a later patch.

Depends on D140983 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D141056

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrh.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrw.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmmla.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta

[PATCH] D141240: [SVE][Builtins] Add metadata to intrinsic calls for builtins that don't define the result of inactive lanes.

2023-01-08 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added subscribers: ctetreau, psnobl, hiraditya, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
paulwalker-arm requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

The ACLE for SVE define a repeating set of builtins that allow the
result of inactive lanes to be zeroed (Z), copied from an input
operand (M) or have an undefined value (X).  When lowering these
builtins we lose the semantics of the undefined variants because to
keep the intrinsic count down we chose to treat them as M forms.

This largely makes sense because in the majority of instances only
the M form is backed by a real instruction.  This does mean we miss
out on some optimisation opportunities and so this patch introduces
metadata to the intrinsic calls that allow us to represent the cases
where an M form can be considered to be an X form.  This metadata is
freely ignorable because copying the inactive lanes from an input
operand is a valid option to represent an undefined value, and
matches the behaviour before this patch.

To demonstrate the metadata's usage this patch includes a trivial
optimisation so that svadd_x emits the unpredicated variant of ADD
as expected.

NOTE: I did investigate representing the undefined lanes using a
select on the governing predicate but this proved a poor design
because optimisations became order sensitive, the extra IR made use
count protection harder to handle and the select instruction itself
has strict rules relating to poison that hampered the intent of this
change.



NOTE: All the existing tests pass without regeneration and so to
keep the reviewed patch small I only regenerated one of the tests
to show the affect.  If agreeable I'll regenerate all the other
tests just before landing the patch.

Depends on D141056 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D141240

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define  @unpredicate_add_x( %p,  %a,  %b) #0 {
+; CHECK-LABEL: @unpredicate_add_x(
+; CHECK-NEXT:[[OP1:%.*]] = add  [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:ret  [[OP1]]
+;
+  %op = tail call  @llvm.aarch64.sve.add.nxv4i32( %p,  %a,  %b), !inactive_lanes_undefined !0
+  ret  %op
+}
+
+declare  @llvm.aarch64.sve.add.nxv4i32(, , )
+
+attributes #0 = { "target-features"="+sve" }
+
+!0 = !{}
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1203,6 +1203,16 @@
Intrinsic::aarch64_sve_mad>(
   IC, II, false))
 return MAD;
+
+  // The predicate is redundant if we don't care about inactive lanes.
+  if (II.getIntrinsicID() == Intrinsic::aarch64_sve_add &&
+  II.hasMetadata("inactive_lanes_undefined")) {
+auto *UnpredShift =
+BinaryOperator::Create(Instruction::Add, II.getArgOperand(1),
+   II.getArgOperand(2), II.getName(), &II);
+return IC.replaceInstUsesWith(II, UnpredShift);
+  }
+
   return instCombineSVEVectorBinOp(IC, II);
 }
 
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
===
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
@@ -296,12 +296,12 @@
 
 // CHECK-LABEL: @test_svadd_s8_x(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CHECK-NEXT:ret  [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z15test_svadd_s8_xu10__SVBool_tu10__SVInt8_tu10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CPP-CHECK-NEXT:ret  [[TMP0]]
 //
 svint8_t test_svadd_s8_

[PATCH] D141240: [SVE][Builtins] Add metadata to intrinsic calls for builtins that don't define the result of inactive lanes.

2023-01-08 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 487247.
paulwalker-arm added a comment.

Fixed typo.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141240/new/

https://reviews.llvm.org/D141240

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-unpredicate.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define  @unpredicate_add_x( %p,  %a,  %b) #0 {
+; CHECK-LABEL: @unpredicate_add_x(
+; CHECK-NEXT:[[OP1:%.*]] = add  [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:ret  [[OP1]]
+;
+  %op = tail call  @llvm.aarch64.sve.add.nxv4i32( %p,  %a,  %b), !inactive_lanes_undefined !0
+  ret  %op
+}
+
+declare  @llvm.aarch64.sve.add.nxv4i32(, , )
+
+attributes #0 = { "target-features"="+sve" }
+
+!0 = !{}
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1203,6 +1203,16 @@
Intrinsic::aarch64_sve_mad>(
   IC, II, false))
 return MAD;
+
+  // The predicate is redundant if we don't care about inactive lanes.
+  if (II.getIntrinsicID() == Intrinsic::aarch64_sve_add &&
+  II.hasMetadata("inactive_lanes_undefined")) {
+auto *UnpredAdd =
+BinaryOperator::Create(Instruction::Add, II.getArgOperand(1),
+   II.getArgOperand(2), II.getName(), &II);
+return IC.replaceInstUsesWith(II, UnpredAdd);
+  }
+
   return instCombineSVEVectorBinOp(IC, II);
 }
 
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
===
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
@@ -296,12 +296,12 @@
 
 // CHECK-LABEL: @test_svadd_s8_x(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CHECK-NEXT:ret  [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z15test_svadd_s8_xu10__SVBool_tu10__SVInt8_tu10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.add.nxv16i8( [[PG:%.*]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CPP-CHECK-NEXT:ret  [[TMP0]]
 //
 svint8_t test_svadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
@@ -312,13 +312,13 @@
 // CHECK-LABEL: @test_svadd_s16_x(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]])
-// CHECK-NEXT:[[TMP1:%.*]] = tail call  @llvm.aarch64.sve.add.nxv8i16( [[TMP0]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  @llvm.aarch64.sve.add.nxv8i16( [[TMP0]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CHECK-NEXT:ret  [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z16test_svadd_s16_xu10__SVBool_tu11__SVInt16_tu11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]])
-// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call  @llvm.aarch64.sve.add.nxv8i16( [[TMP0]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CPP-CHECK-NEXT:[[TMP1:%.*]] = tail call  @llvm.aarch64.sve.add.nxv8i16( [[TMP0]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CPP-CHECK-NEXT:ret  [[TMP1]]
 //
 svint16_t test_svadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
@@ -329,13 +329,13 @@
 // CHECK-LABEL: @test_svadd_s32_x(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = tail call  @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]])
-// CHECK-NEXT:[[TMP1:%.*]] = tail call  @llvm.aarch64.sve.add.nxv4i32( [[TMP0]],  [[OP1:%.*]],  [[OP2:%.*]])
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  @llvm.aarch64.sve.add.nxv4i32( [[TMP0]],  [[OP1:%.*]],  [[OP2:%.*]]), !inactive_lanes_undefined !2
 // CHECK-NEXT:ret  [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z16test_svadd_s32_xu10__SVBool_tu11__SVInt32_tu11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:[[TMP0:%.*]] = tail call  @llv

[PATCH] D140983: [IRBuilder] Use canonical i64 type for insertelement index used by vector splats.

2023-01-11 Thread Paul Walker via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGeae26b6640af: [IRBuilder] Use canonical i64 type for 
insertelement index used by vector… (authored by paulwalker-arm).

Changed prior to commit:
  https://reviews.llvm.org/D140983?vs=486332&id=488185#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D140983/new/

https://reviews.llvm.org/D140983

Files:
  clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
  clang/test/CodeGen/SystemZ/zvector.c
  clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/arm-mve-intrinsics/compare.c
  clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
  clang/test/CodeGen/arm-mve-intrinsics/dup.c
  clang/test/CodeGen/arm-mve-intrinsics/ternary.c
  clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
  clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
  clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
  clang/test/CodeGen/matrix-type-operators-fast-math.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/vecshift.c
  clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
  clang/test/CodeGenCXX/matrix-type-operators.cpp
  clang/test/CodeGenCXX/vector-size-conditional.cpp
  clang/test/CodeGenCXX/vector-splat-conversion.cpp
  clang/test/CodeGenOpenCL/bool_cast.cl
  llvm/lib/IR/Constants.cpp
  llvm/lib/IR/IRBuilder.cpp
  llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
  llvm/test/CodeGen/Generic/expand-vp-load-store.ll
  llvm/test/CodeGen/Generic/expand-vp.ll
  llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
  llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
  llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
  llvm/test/Transforms/InstCombine/div.ll
  llvm/test/Transforms/InstCombine/fdiv.ll
  llvm/test/Transforms/InstCombine/fmul.ll
  llvm/test/Transforms/InstCombine/select.ll
  llvm/test/Transforms/InstCombine/sub.ll
  llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
  llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
  llvm/test/Transforms/InstSimplify/gep.ll
  llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
  llvm/test/Transforms/InstSimplify/vscale.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
  llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
  
llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
  llvm/test/Tran

[PATCH] D127762: [Clang][AArch64] Add ACLE attributes for SME.

2022-11-16 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Hi @rsandifo-arm , what are your thoughts on Arron's observations? My 
interpretation is that Arm originally figured the distinction between keywords 
and gnu attributes was minimal and thus using our previous norms made most 
sense. This is not my world so my understand is somewhat naive but it does 
sound like Arron has raised some good reasons why keywords are a better design 
decision.  Do you agree? I wonder if it's worth having this conversation on 
https://github.com/ARM-software/abi-aa/pull/123 (unless Richard has a better 
link) so we can capture the rational for future design changes if any. 
@aaron.ballman, are you ok with this or do you prefer to keep the design 
conversation linked to this clang implementation?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127762/new/

https://reviews.llvm.org/D127762

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D142065: [SVE] Fix incorrect lowering of predicate permute builtins.

2023-01-24 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Target/AArch64/SVEInstrFormats.td:6361
+  def : SVE_2_Op_Pat(NAME # 
_S)>;
+  def : SVE_2_Op_Pat(NAME # 
_D)>;
 }

peterwaller-arm wrote:
> Out of interest, is there a good reason to handle the nxv16 pattern case 
> differently in the `I` multiclass args? Written this way at a glance it looks 
> like it is missing.
My reasoning was the pattern within the instruction class is mandatory for the 
correct clang builtin support so I figured that should take priority.  That 
means extra patterns are only required for the unpacked cases, which are 
optional based on them having value during code generation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142065/new/

https://reviews.llvm.org/D142065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D142065: [SVE] Fix incorrect lowering of predicate permute builtins.

2023-01-26 Thread Paul Walker via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG26b79ca3fafc: [SVE] Fix incorrect lowering of predicate 
permute builtins. (authored by paulwalker-arm).

Changed prior to commit:
  https://reviews.llvm.org/D142065?vs=490332&id=492387#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142065/new/

https://reviews.llvm.org/D142065

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -942,8 +942,8 @@
 ; REV
 ;
 
-define  @rev_b8(  %a) {
-; CHECK-LABEL: rev_b8:
+define  @rev_nxv16i1( %a) {
+; CHECK-LABEL: rev_nxv16i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.b, p0.b
 ; CHECK-NEXT:ret
@@ -951,8 +951,8 @@
   ret  %res
 }
 
-define  @rev_b16( %a) {
-; CHECK-LABEL: rev_b16:
+define  @rev_nxv8i1( %a) {
+; CHECK-LABEL: rev_nxv8i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.h, p0.h
 ; CHECK-NEXT:ret
@@ -960,8 +960,8 @@
   ret  %res
 }
 
-define  @rev_b32( %a) {
-; CHECK-LABEL: rev_b32:
+define  @rev_nxv4i1( %a) {
+; CHECK-LABEL: rev_nxv4i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.s, p0.s
 ; CHECK-NEXT:ret
@@ -969,8 +969,8 @@
   ret  %res
 }
 
-define  @rev_b64( %a) {
-; CHECK-LABEL: rev_b64:
+define  @rev_nxv2i1( %a) {
+; CHECK-LABEL: rev_nxv2i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.d, p0.d
 ; CHECK-NEXT:ret
@@ -978,7 +978,34 @@
   ret  %res
 }
 
-define  @rev_i8(  %a) {
+define  @rev_b16( %a) {
+; CHECK-LABEL: rev_b16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev p0.h, p0.h
+; CHECK-NEXT:ret
+  %res = call  @llvm.aarch64.sve.rev.b16( %a)
+  ret  %res
+}
+
+define  @rev_b32( %a) {
+; CHECK-LABEL: rev_b32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev p0.s, p0.s
+; CHECK-NEXT:ret
+  %res = call  @llvm.aarch64.sve.rev.b32( %a)
+  ret  %res
+}
+
+define  @rev_b64( %a) {
+; CHECK-LABEL: rev_b64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev p0.d, p0.d
+; CHECK-NEXT:ret
+  %res = call  @llvm.aarch64.sve.rev.b64( %a)
+  ret  %res
+}
+
+define  @rev_i8( %a) {
 ; CHECK-LABEL: rev_i8:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev z0.b, z0.b
@@ -1354,8 +1381,8 @@
 ; TRN1
 ;
 
-define  @trn1_b8( %a,  %b) {
-; CHECK-LABEL: trn1_b8:
+define  @trn1_nxv16i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv16i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.b, p0.b, p1.b
 ; CHECK-NEXT:ret
@@ -1364,8 +1391,8 @@
   ret  %out
 }
 
-define  @trn1_b16( %a,  %b) {
-; CHECK-LABEL: trn1_b16:
+define  @trn1_nxv8i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv8i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.h, p0.h, p1.h
 ; CHECK-NEXT:ret
@@ -1374,8 +1401,8 @@
   ret  %out
 }
 
-define  @trn1_b32( %a,  %b) {
-; CHECK-LABEL: trn1_b32:
+define  @trn1_nxv4i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv4i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.s, p0.s, p1.s
 ; CHECK-NEXT:ret
@@ -1384,8 +1411,8 @@
   ret  %out
 }
 
-define  @trn1_b64( %a,  %b) {
-; CHECK-LABEL: trn1_b64:
+define  @trn1_nxv2i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv2i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.d, p0.d, p1.d
 ; CHECK-NEXT:ret
@@ -1394,6 +1421,36 @@
   ret  %out
 }
 
+define  @trn1_b16( %a,  %b) {
+; CHECK-LABEL: trn1_b16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:trn1 p0.h, p0.h, p1.h
+; CHECK-NEXT:ret
+  %out = call  @llvm.aarch64.sve.trn1.b16( %a,
+ %b)
+  ret  %out
+}
+
+define  @trn1_b32( %a,  %b) {
+; CHECK-LABEL: trn1_b32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:trn1 p0.s, p0.s, p1.s
+; CHECK-NEXT:ret
+  %out = call  @llvm.aarch64.sve.trn1.b32( %a,
+ %b)
+  ret  %out
+}
+
+define  @trn1_b64( %a,  %b) {
+; CHECK-LABEL: trn1_b64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:trn1 p0.d, p0.d, p1.d
+; CHECK-NEXT:ret
+  %out = call  @llvm.aarch64.sve.trn1.b64( %a,
+ %b)
+  ret  %out
+}
+
 define  @trn1_i8( %a,  %b) {
 ; CHECK-LABEL: trn1_i8:
 ; CHECK:   // %bb.0:
@@ -1508,8 +

[PATCH] D141939: [SVE][Builtins] Lower X forms of binop arithmetic builtins to dedicated intrinsics.

2023-02-01 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141939/new/

https://reviews.llvm.org/D141939

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141939: [SVE][Builtins] Lower X forms of binop arithmetic builtins to dedicated intrinsics.

2023-02-05 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/include/clang/Basic/arm_sve.td:762
 
-multiclass SInstZPZZ flags=[]> {
-  def _M   : SInst;
-  def _X   : SInst;
-  def _Z   : SInst;
-
-  def _N_M : SInst;
-  def _N_X : SInst;
-  def _N_Z : SInst;
-}
-
-defm SVABD_S  : SInstZPZZ<"svabd",  "csil", "aarch64_sve_sabd">;
-defm SVABD_U  : SInstZPZZ<"svabd",  "UcUsUiUl", "aarch64_sve_uabd">;
-defm SVADD: SInstZPZZ<"svadd",  "csilUcUsUiUl", "aarch64_sve_add">;
-defm SVDIV_S  : SInstZPZZ<"svdiv",  "il",   "aarch64_sve_sdiv">;
-defm SVDIV_U  : SInstZPZZ<"svdiv",  "UiUl", "aarch64_sve_udiv">;
-defm SVDIVR_S : SInstZPZZ<"svdivr", "il",   "aarch64_sve_sdivr">;
-defm SVDIVR_U : SInstZPZZ<"svdivr", "UiUl", "aarch64_sve_udivr">;
-defm SVMAX_S  : SInstZPZZ<"svmax",  "csil", "aarch64_sve_smax">;
-defm SVMAX_U  : SInstZPZZ<"svmax",  "UcUsUiUl", "aarch64_sve_umax">;
-defm SVMIN_S  : SInstZPZZ<"svmin",  "csil", "aarch64_sve_smin">;
-defm SVMIN_U  : SInstZPZZ<"svmin",  "UcUsUiUl", "aarch64_sve_umin">;
-defm SVMUL: SInstZPZZ<"svmul",  "csilUcUsUiUl", "aarch64_sve_mul">;
-defm SVMULH_S : SInstZPZZ<"svmulh", "csil", "aarch64_sve_smulh">;
-defm SVMULH_U : SInstZPZZ<"svmulh", "UcUsUiUl", "aarch64_sve_umulh">;
-defm SVSUB: SInstZPZZ<"svsub",  "csilUcUsUiUl", "aarch64_sve_sub">;
-defm SVSUBR   : SInstZPZZ<"svsubr", "csilUcUsUiUl", "aarch64_sve_subr">;
+multiclass SInstZPZZ flags=[]> {
+  def _M   : SInst;

sdesmalen wrote:
> nit is it worth adding a `bit hasUndefVariant` and doing `!if 
> (hasUndefVariant, intrinsic # "_u", intrinsic)`?
Thanks for the suggestion but this assumes the `m` and `x` intrinsics have the 
same name, which for the reversed instructions is not the case (see SVDIVR_S 
for an example). It seemed wrong to add intrinsics for both `div_u` and 
`divr_u` given they're essentially the same operation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141939/new/

https://reviews.llvm.org/D141939

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141240: [SVE][Builtins] Add metadata to intrinsic calls for builtins that don't define the result of inactive lanes.

2023-02-06 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm abandoned this revision.
paulwalker-arm added a comment.

D141939  turned out to be the better approach.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141240/new/

https://reviews.llvm.org/D141240

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141056: [SVE][CGBuiltins] Remove need for instcombine from ACLE tests.

2023-01-11 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm updated this revision to Diff 488418.
paulwalker-arm added a comment.

Rebase and use simpler IRBuilder interface.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141056/new/

https://reviews.llvm.org/D141056

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrh.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrw.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmmla.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpeq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpge.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpgt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmple.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmplt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpne.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpuo.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_compact.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_expa.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c
  clang

[PATCH] D141056: [SVE][CGBuiltins] Remove need for instcombine from ACLE tests.

2023-01-11 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm marked an inline comment as done.
paulwalker-arm added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:9092
+  if (BytesPerElt > 1) {
+Value *Scale = ConstantInt::get(Int64Ty, Log2_32(BytesPerElt));
+Ops[2] = Builder.CreateShl(Ops[2], Scale);

david-arm wrote:
> Given this seems a frequent idiom is it worth putting this into a helper 
> routine? i.e. something like
> 
>   Ops[2] = getScaledOffset(Ops[2], BytesPerElt);
> 
> where
> 
>   Value *getScaledOffset(SDValue Offset, unsigned Bytes) {
> Value *Scale = ConstantInt::get(Int64Ty, Log2_32(Bytes));
> return Builder.CreateShl(Offset, Scale);
>   }
Thanks Dave. It turns out IRBuilder has such a helper (well almost) function so 
I've used that instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141056/new/

https://reviews.llvm.org/D141056

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141240: [SVE][Builtins] Add metadata to intrinsic calls for builtins that don't define the result of inactive lanes.

2023-01-11 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

In D141240#4035438 , @sdesmalen wrote:

> Using metadata seems sensible, but did you also identify any downsides? I 
> could imagine that we'd need to manually propagate metadata to any nodes 
> after we do a combine (which can't be blindly copied?), e.g. add + mul -> 
> mla, this new intrinsic would also need the metadata.

I don't really see manually propagation as a downside because it's not 
functionally required but rather advantageous to maximise optimisation 
opportunities.  The downside is the opposite in that any transformation that 
wants to rely on the inactive lanes being defined as before this patch will now 
need to check for the presence of (or rather lack of) the new metadata before 
blindly reusing the result of an existing SVE intrinsic call.  The 
transformation can still reuse the call it must just first discard the metadata.

> For intrinsics that don't have a directly corresponding (unpredicated) LLVM 
> IR instruction, is there still a way to use this information in SelectionDAG?

Truth be told I'm not entire sure how this will play out.  I'm not sure whether 
it's better to use the information within the IR as I'm doing in this patch or 
whether this should be used solely when lowering IR to DAG.  So it's really an 
experiment to see what sticks while proving a route to fix some of the issues 
we've already observed with how we represent the X forms.

Predicated->unpredicted aside another use for encoding undefiness is that it 
helps with things the FMAs where we can use FMAD if that better suits register 
allocation much like we do for stock IR.

>> the select instruction itself has strict rules relating to poison that 
>> hampered the intent of this change
>
> For my understanding, can you elaborate what these strict rules regarding 
> poison are that hamper such a change, and what it was that you tried?

The LangRef states the transformation "select P, A, undef ==> A" is only valid 
when you can prove the inactive lanes of "A" do not contain poison. I'm unsure 
if this is a true blocker or a mere inconvenience because to maintain the 
maximum amount of information we likely don't want to remove the selects 
anyway.  I went down this path by creating an SVE undef intrinsic, which 
nothing knows about and thus will be left alone.  The problem is that it 
massively polluted the IR and I was worried it'll make it harder to 
spot/implement the typical combines. For sure the existing combines will need 
to be changed because they'll not know to look through the new selects.

There is the option to change the clang builtin lowering to provide finer 
control over which builtins emit these selects, but that just means more 
changes (updates to existing instcombines) each time we decide a builtin is 
worth the extra select.

I'll keep experimenting but as I mention within the in code comment, the likely 
best solution is to have dedicate intrinsics with this being the least 
intrusive hack.

Perhaps the key word there is "hack" :) I'll investigate the dedicate 
intrinsics route because perhaps we only require a handful to get the majority 
of the benefit.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141240/new/

https://reviews.llvm.org/D141240

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141240: [SVE][Builtins] Add metadata to intrinsic calls for builtins that don't define the result of inactive lanes.

2023-01-13 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added a comment.

Just a heads up that I'm likely to abandon this patch because as predicted 
implementing dedicated intrinsics is looking like the better design and most 
all the code generation plumbing is already present and so even the 
implementation is minimal.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141240/new/

https://reviews.llvm.org/D141240

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141056: [SVE][CGBuiltins] Remove need for instcombine from ACLE tests.

2023-01-15 Thread Paul Walker via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
paulwalker-arm marked an inline comment as done.
Closed by commit rG909ac0e97dcb: [SVE][CGBuiltins] Remove need for instcombine 
from ACLE tests. (authored by paulwalker-arm).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141056/new/

https://reviews.llvm.org/D141056

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adda.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrh.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_adrw.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmmla.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpeq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpge.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpgt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmple.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmplt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpne.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpuo.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_compact.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_expa.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c
  clang/test/Co

[PATCH] D142065: [SVE] Fix incorrect lowering of predicate permute builtins.

2023-01-18 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added subscribers: psnobl, hiraditya, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
paulwalker-arm requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

When lowering predicate permute builtins we incorrectly assume only
the typically "active" bits for the specified element type play a
role with all other bits zero'd.  This is not the case because all
bits are significant, with the element type specifying how they
are grouped:

  b8  - permute using a block size of 1 bit
  b16 - permute using a block size of 2 bits
  b32 - permute using a block size of 4 bits
  b64 - permute using a block size of 8 bits

The affected builtins are svrev, svtrn1, svtrn2, svuzp1, svuzp2,
svzip1 and svzip2.

This patch adds new intrinsics to support these operations and
changes the builtin lowering code to emit them.  The b8 case remains
unchanged because for that operation the existing intrinsics work
as required and their support for other predicate types has been
maintained as useful if only as a way to test the correctness of
their matching ISD nodes that code generation relies on.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D142065

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
===
--- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -942,8 +942,8 @@
 ; REV
 ;
 
-define  @rev_b8(  %a) {
-; CHECK-LABEL: rev_b8:
+define  @rev_nxv16i1( %a) {
+; CHECK-LABEL: rev_nxv16i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.b, p0.b
 ; CHECK-NEXT:ret
@@ -951,8 +951,8 @@
   ret  %res
 }
 
-define  @rev_b16( %a) {
-; CHECK-LABEL: rev_b16:
+define  @rev_nxv8i1( %a) {
+; CHECK-LABEL: rev_nxv8i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.h, p0.h
 ; CHECK-NEXT:ret
@@ -960,8 +960,8 @@
   ret  %res
 }
 
-define  @rev_b32( %a) {
-; CHECK-LABEL: rev_b32:
+define  @rev_nxv4i1( %a) {
+; CHECK-LABEL: rev_nxv4i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.s, p0.s
 ; CHECK-NEXT:ret
@@ -969,8 +969,8 @@
   ret  %res
 }
 
-define  @rev_b64( %a) {
-; CHECK-LABEL: rev_b64:
+define  @rev_nxv2i1( %a) {
+; CHECK-LABEL: rev_nxv2i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev p0.d, p0.d
 ; CHECK-NEXT:ret
@@ -978,7 +978,34 @@
   ret  %res
 }
 
-define  @rev_i8(  %a) {
+define  @rev_b16( %a) {
+; CHECK-LABEL: rev_b16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev p0.h, p0.h
+; CHECK-NEXT:ret
+  %res = call  @llvm.aarch64.sve.rev.b16( %a)
+  ret  %res
+}
+
+define  @rev_b32( %a) {
+; CHECK-LABEL: rev_b32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev p0.s, p0.s
+; CHECK-NEXT:ret
+  %res = call  @llvm.aarch64.sve.rev.b32( %a)
+  ret  %res
+}
+
+define  @rev_b64( %a) {
+; CHECK-LABEL: rev_b64:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rev p0.d, p0.d
+; CHECK-NEXT:ret
+  %res = call  @llvm.aarch64.sve.rev.b64( %a)
+  ret  %res
+}
+
+define  @rev_i8( %a) {
 ; CHECK-LABEL: rev_i8:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:rev z0.b, z0.b
@@ -1354,8 +1381,8 @@
 ; TRN1
 ;
 
-define  @trn1_b8( %a,  %b) {
-; CHECK-LABEL: trn1_b8:
+define  @trn1_nxv16i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv16i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.b, p0.b, p1.b
 ; CHECK-NEXT:ret
@@ -1364,8 +1391,8 @@
   ret  %out
 }
 
-define  @trn1_b16( %a,  %b) {
-; CHECK-LABEL: trn1_b16:
+define  @trn1_nxv8i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv8i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.h, p0.h, p1.h
 ; CHECK-NEXT:ret
@@ -1374,8 +1401,8 @@
   ret  %out
 }
 
-define  @trn1_b32( %a,  %b) {
-; CHECK-LABEL: trn1_b32:
+define  @trn1_nxv4i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv4i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.s, p0.s, p1.s
 ; CHECK-NEXT:ret
@@ -1384,8 +1411,8 @@
   ret  %out
 }
 
-define  @trn1_b64( %a,  %b) {
-; CHECK-LABEL: trn1_b64:
+define  @trn1_nxv2i1( %a,  %b) {
+; CHECK-LABEL: trn1_nxv2i1:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:trn1 p0.d, p0.d, p1.d
 ; CHECK-NEXT:ret
@@ -1394,6 +1421,36 @@
   ret  %out
 }
 
+define  @trn1_b16( %a,  %b) {
+; CHECK-LABEL: trn1_b16:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:tr

[PATCH] D142065: [SVE] Fix incorrect lowering of predicate permute builtins.

2023-01-18 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added reviewers: david-arm, CarolineConcatto, peterwaller-arm.
paulwalker-arm added a comment.

This is bug fix based on something spotted when reviewing D141469 
.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142065/new/

https://reviews.llvm.org/D142065

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127655: [AArch64] Define __FP_FAST_FMA[F]

2022-06-17 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/test/Preprocessor/aarch64-target-features.c:59-60
 // CHECK-NOT: __ARM_FEATURE_SVE_BITS 2048
+// CHECK: __FP_FAST_FMA 1
+// CHECK: __FP_FAST_FMAF 1
 

I don't think we need this change given `init-aarch64.c` provides enough 
coverage.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127655/new/

https://reviews.llvm.org/D127655

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127976: [IR] Move vector.insert/vector.extract out of experimental namespace

2022-06-23 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added inline comments.



Comment at: llvm/docs/LangRef.rst:17282
+  declare  @llvm.vector.insert.nxv4f32.v4f32( %vec, <4 x float> %subvec, i64 %idx)
+  declare  @llvm.vector.insert.nxv2f64.v2f64( %vec, <2 x double> %subvec, i64 %idx)
+

For this and the other instances, please remove the `%` as idx must be a 
literal (as documented below) but using `%idx` makes it look like an arbitrary 
variable can be used.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127976/new/

https://reviews.llvm.org/D127976

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155688: [PATCH] [llvm] [InstCombine] Reassociate loop invariant GEP index calculations.

2023-09-20 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp:2332
   }
-
+  if (GEP.getNumIndices() == 1 && !GEP.getType()->isVectorTy()) {
+auto *Idx = dyn_cast(GEP.getOperand(1));

Perhaps move this block after the `We do not handle pointer-vector geps here` 
immediately below so this test can be removed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155688/new/

https://reviews.llvm.org/D155688

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158045: [clang][SVE] Rename isVLSTBuiltinType, NFC

2023-08-16 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm accepted this revision.
paulwalker-arm added a comment.
This revision is now accepted and ready to land.

I'd hope there are common code paths where `isVLSTBuiltinType` would still be 
useful but I guess this step is required to make that happen anyway.

If you permit a minor request can the new name please be `isSveVLSBuiltinType` 
to match the case used by the related `getSveEltType` function and the extra 
`T` in the old name has always bugged me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158045/new/

https://reviews.llvm.org/D158045

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D157269: [Clang][AArch64] Diagnostics for SME attributes when target doesn't have 'sme'

2023-08-08 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/lib/Sema/SemaChecking.cpp:6762
+Context.getFunctionFeatureMap(CallerFeatureMap, CallerFD);
+if (!CallerFeatureMap.count("sme"))
+  Diag(Loc, diag::err_sme_call_in_non_sme_target);

`contains("sme")` seems more appropriate here?



Comment at: clang/lib/Sema/SemaDecl.cpp:12159
+  Context.getFunctionFeatureMap(FeatureMap, NewFD);
+  if (!FeatureMap.count("sme")) {
+if (UsesSM)

As above.



Comment at: clang/lib/Sema/SemaDecl.cpp:12163
+   diag::err_sme_definition_using_sm_in_non_sme_target);
+else if (UsesZA)
+  Diag(NewFD->getLocation(),

Can this be just `else` given by this point I believe you know `UsesZA` has to 
be true.



Comment at: clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp:1
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fsyntax-only -verify %s
+

Does the test require SVE to be enabled?



Comment at: clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp:3
+
+// This test is testing the diagnostic that Clang emits when compiling without 
'+sme'.
+

diagnostics



Comment at: 
clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp:13-17
+__attribute__((target("sme"))) void streaming_compatible_def_sme_attr() 
__arm_streaming_compatible {} // OK
+__attribute__((target("sme"))) void streaming_def_sme_attr() __arm_streaming { 
} // OK
+__attribute__((target("sme"))) void shared_za_def_sme_attr() __arm_shared_za { 
} // OK
+__arm_new_za __attribute__((target("sme"))) void new_za_def_sme_attr() {} // OK
+__arm_locally_streaming __attribute__((target("sme"))) void 
locally_streaming_def_sme_attr() {} // OK

Is it worth including tests where "sme2" is used? or are we already comfortable 
feature inheritance is well tested?



Comment at: llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td:144-163
 // It's tricky to using the existing pstate operand defined in
 // AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
 // when these fields are also encoded in CRm[3:1].
 def MSRpstatesvcrImm1
   : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
   "\t$pstatefield, $imm">,
 Sched<[WriteSys]> {

Doesn't this class belong in SMEInstrFormats.td, then you'll not need to 
override `Predicates`?



Comment at: 
llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll:4
+
+; This that the following code can be compiled without +sme, because if the
+; call is not entered in streaming-SVE mode at runtime, the codepath leading

Verify the...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157269/new/

https://reviews.llvm.org/D157269

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D158479: [clang] Support elementwise builtin for sizeless vector type

2023-08-24 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/test/CodeGen/aarch64-sve-vector-elementwise-ops.c:128-131
+// CHECK-NEXT:[[ELT_MIN:%.*]] = call  
@llvm.umin.nxv16i8( [[VI8:%.*]],  [[VI8]])
+// CHECK-NEXT:[[ELT_MIN1:%.*]] = call  
@llvm.umin.nxv8i16( [[VI16:%.*]],  [[VI16]])
+// CHECK-NEXT:[[ELT_MIN2:%.*]] = call  
@llvm.umin.nxv4i32( [[VI32:%.*]],  [[VI32]])
+// CHECK-NEXT:[[ELT_MIN3:%.*]] = call  
@llvm.umin.nxv2i64( [[VI64:%.*]],  [[VI64]])

These should be `smin` rather than `umin`?



Comment at: clang/test/CodeGen/aarch64-sve-vector-elementwise-ops.c:208-210
+// CHECK-NEXT:[[ELT_COS:%.*]] = call  
@llvm.cos.nxv8f16( [[VF16:%.*]])
+// CHECK-NEXT:[[ELT_COS1:%.*]] = call  
@llvm.cos.nxv4f32( [[VF32:%.*]])
+// CHECK-NEXT:[[ELT_COS2:%.*]] = call  
@llvm.cos.nxv2f64( [[VF64:%.*]])

This highlights a potential sticking point because can we code generate such 
intrinsics for scalable vectors?  LoopVectorize will emit calls to a vector 
math library rather than call this intrinsic directly.  There's also the 
replace-with-veclib pass that will convert such intrinsics, again only if 
there's a suitable math library available.

Is it a good idea to allow builtins that we know will cause a code generation 
failure? Or might result in a scalarised implementation?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D158479/new/

https://reviews.llvm.org/D158479

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D40299: [Complex] Don't use __div?c3 when building with fast-math.

2017-11-21 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.

Plant an inline version of "((ac+bd)/(cc+dd)) + i((bc-ad)/(cc+dd))" instead.


https://reviews.llvm.org/D40299

Files:
  lib/CodeGen/CGExprComplex.cpp
  test/CodeGen/complex-math.c

Index: test/CodeGen/complex-math.c
===
--- test/CodeGen/complex-math.c
+++ test/CodeGen/complex-math.c
@@ -5,6 +5,7 @@
 // RUN %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
+// RUN %clang_cc1 %s -O1 -ffast-math -emit-llvm -triple x86_64-unknown-unknown | FileCheck %s --check-prefix=FASTMATH
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -128,13 +129,25 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divsc3(
   // X86: ret
+  // FASTMATH-LABEL: @div_float_rc(
+  // FASTMATH-NOT: @__divsc3
+  // FASTMATH: fdiv
+  // FASTMATH: fdiv
+  // FASTMATH-NOT: fdiv
+  // FASTMATH: ret
   return a / b;
 }
 float _Complex div_float_cc(float _Complex a, float _Complex b) {
   // X86-LABEL: @div_float_cc(
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divsc3(
   // X86: ret
+  // FASTMATH-LABEL: @div_float_cc(
+  // FASTMATH-NOT: @__divsc3
+  // FASTMATH: fdiv
+  // FASTMATH: fdiv
+  // FASTMATH-NOT: fdiv
+  // FASTMATH: ret
   return a / b;
 }
 
@@ -260,13 +273,25 @@
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divdc3(
   // X86: ret
+  // FASTMATH-LABEL: @div_double_rc(
+  // FASTMATH-NOT: @__divdc3
+  // FASTMATH: fdiv
+  // FASTMATH: fdiv
+  // FASTMATH-NOT: fdiv
+  // FASTMATH: ret
   return a / b;
 }
 double _Complex div_double_cc(double _Complex a, double _Complex b) {
   // X86-LABEL: @div_double_cc(
   // X86-NOT: fdiv
   // X86: call {{.*}} @__divdc3(
   // X86: ret
+  // FASTMATH-LABEL: @div_double_cc(
+  // FASTMATH-NOT: @__divdc3
+  // FASTMATH: fdiv
+  // FASTMATH: fdiv
+  // FASTMATH-NOT: fdiv
+  // FASTMATH: ret
   return a / b;
 }
 
@@ -410,6 +435,12 @@
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+  // FASTMATH-LABEL: @div_long_double_rc(
+  // FASTMATH-NOT: @__divxc3
+  // FASTMATH: fdiv
+  // FASTMATH: fdiv
+  // FASTMATH-NOT: fdiv
+  // FASTMATH: ret
   return a / b;
 }
 long double _Complex div_long_double_cc(long double _Complex a, long double _Complex b) {
@@ -421,6 +452,12 @@
   // PPC-NOT: fdiv
   // PPC: call {{.*}} @__divtc3(
   // PPC: ret
+  // FASTMATH-LABEL: @div_double_cc(
+  // FASTMATH-NOT: @__divxc3
+  // FASTMATH: fdiv
+  // FASTMATH: fdiv
+  // FASTMATH-NOT: fdiv
+  // FASTMATH: ret
   return a / b;
 }
 
Index: lib/CodeGen/CGExprComplex.cpp
===
--- lib/CodeGen/CGExprComplex.cpp
+++ lib/CodeGen/CGExprComplex.cpp
@@ -761,15 +761,16 @@
   llvm::Value *LHSr = Op.LHS.first, *LHSi = Op.LHS.second;
   llvm::Value *RHSr = Op.RHS.first, *RHSi = Op.RHS.second;
 
-
   llvm::Value *DSTr, *DSTi;
   if (LHSr->getType()->isFloatingPointTy()) {
+llvm::FastMathFlags FMF = Builder.getFastMathFlags();
+
 // If we have a complex operand on the RHS, we delegate to a libcall to
 // handle all of the complexities and minimize underflow/overflow cases.
 //
 // FIXME: We would be able to avoid the libcall in many places if we
 // supported imaginary types in addition to complex types.
-if (RHSi) {
+if (RHSi && !FMF.isFast()) {
   BinOpInfo LibCallOp = Op;
   // If LHS was a real, supply a null imaginary part.
   if (!LHSi)
@@ -791,11 +792,31 @@
   case llvm::Type::FP128TyID:
 return EmitComplexBinOpLibCall("__divtc3", LibCallOp);
   }
-}
-assert(LHSi && "Can have at most one non-complex operand!");
+} else if (RHSi) {
+  if (!LHSi)
+LHSi = llvm::Constant::getNullValue(RHSi->getType());
+
+  // (a+ib) / (c+id) = ((ac+bd)/(cc+dd)) + i((bc-ad)/(cc+dd))
+  llvm::Value *AC = Builder.CreateFMul(LHSr, RHSr); // a*c
+  llvm::Value *BD = Builder.CreateFMul(LHSi, RHSi); // b*d
+  llvm::Value *ACpBD = Builder.CreateFAdd(AC, BD); // ac+bd
 
-DSTr = Builder.CreateFDiv(LHSr, RHSr);
-DSTi = Builder.CreateFDiv(LHSi, RHSr);
+  llvm::Value *CC = Builder.CreateFMul(RHSr, RHSr); // c*c
+  llvm::Value *DD = Builder.CreateFMul(RHSi, RHSi); // d*d
+  llvm::Value *CCpDD = Builder.CreateFAdd(CC, DD); // cc+dd
+
+  llvm::Value *BC = Builder.CreateFMul(LHSi, RHSr); // b*c
+  llvm::Value *AD = Builder.CreateFMul(LHSr, RHSi); // a*d
+  llvm::Value *BCmAD = Builder.CreateFSub(BC, AD); // bc-ad
+
+  DSTr = Builder.CreateFDiv(ACpBD, CCpDD);
+  DSTi = Builder.CreateFDiv(BCmAD, CCpDD);
+} else {
+  assert(LHSi && "Can have at most one non-complex operand!");
+
+  DSTr = Builder.CreateFDiv(LHS

[PATCH] D146146: [Clang] Stop demoting ElementCount/TypeSize conversion errors to warnings.

2023-03-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added a project: All.
paulwalker-arm requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146146

Files:
  clang/lib/Driver/ToolChains/Clang.cpp


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5560,22 +5560,6 @@
 
   RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
 
-  // FIXME: For now we want to demote any errors to warnings, when they have
-  // been raised for asking the wrong question of scalable vectors, such as
-  // asking for the fixed number of elements. This may happen because code that
-  // is not yet ported to work for scalable vectors uses the wrong interfaces,
-  // whereas the behaviour is actually correct. Emitting a warning helps bring
-  // up scalable vector support in an incremental way. When scalable vector
-  // support is stable enough, all uses of wrong interfaces should be 
considered
-  // as errors, but until then, we can live with a warning being emitted by the
-  // compiler. This way, Clang can be used to compile code with scalable 
vectors
-  // and identify possible issues.
-  if (isa(JA) || isa(JA) ||
-  isa(JA)) {
-CmdArgs.push_back("-mllvm");
-CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
-  }
-
   // These two are potentially updated by AddClangCLArgs.
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5560,22 +5560,6 @@
 
   RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
 
-  // FIXME: For now we want to demote any errors to warnings, when they have
-  // been raised for asking the wrong question of scalable vectors, such as
-  // asking for the fixed number of elements. This may happen because code that
-  // is not yet ported to work for scalable vectors uses the wrong interfaces,
-  // whereas the behaviour is actually correct. Emitting a warning helps bring
-  // up scalable vector support in an incremental way. When scalable vector
-  // support is stable enough, all uses of wrong interfaces should be considered
-  // as errors, but until then, we can live with a warning being emitted by the
-  // compiler. This way, Clang can be used to compile code with scalable vectors
-  // and identify possible issues.
-  if (isa(JA) || isa(JA) ||
-  isa(JA)) {
-CmdArgs.push_back("-mllvm");
-CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
-  }
-
   // These two are potentially updated by AddClangCLArgs.
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146146: [Clang] Stop demoting ElementCount/TypeSize conversion errors to warnings.

2023-03-15 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added reviewers: sdesmalen, david-arm, craig.topper, reames.
paulwalker-arm added a comment.

This option was our pragmatic way to ensure scalable vector based toolchains 
remained useful whilst the kinks were worked out.  We've a few releases under 
our belts now and I feel that enough works to consider further errors as 
something we should not be hiding.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146146/new/

https://reviews.llvm.org/D146146

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D146146: [Clang] Stop demoting ElementCount/TypeSize conversion errors to warnings.

2023-03-17 Thread Paul Walker via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG62f09d788f9f: [Clang] Stop demoting ElementCount/TypeSize 
conversion errors to warnings. (authored by paulwalker-arm).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146146/new/

https://reviews.llvm.org/D146146

Files:
  clang/lib/Driver/ToolChains/Clang.cpp


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5560,22 +5560,6 @@
 
   RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
 
-  // FIXME: For now we want to demote any errors to warnings, when they have
-  // been raised for asking the wrong question of scalable vectors, such as
-  // asking for the fixed number of elements. This may happen because code that
-  // is not yet ported to work for scalable vectors uses the wrong interfaces,
-  // whereas the behaviour is actually correct. Emitting a warning helps bring
-  // up scalable vector support in an incremental way. When scalable vector
-  // support is stable enough, all uses of wrong interfaces should be 
considered
-  // as errors, but until then, we can live with a warning being emitted by the
-  // compiler. This way, Clang can be used to compile code with scalable 
vectors
-  // and identify possible issues.
-  if (isa(JA) || isa(JA) ||
-  isa(JA)) {
-CmdArgs.push_back("-mllvm");
-CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
-  }
-
   // These two are potentially updated by AddClangCLArgs.
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;


Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5560,22 +5560,6 @@
 
   RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);
 
-  // FIXME: For now we want to demote any errors to warnings, when they have
-  // been raised for asking the wrong question of scalable vectors, such as
-  // asking for the fixed number of elements. This may happen because code that
-  // is not yet ported to work for scalable vectors uses the wrong interfaces,
-  // whereas the behaviour is actually correct. Emitting a warning helps bring
-  // up scalable vector support in an incremental way. When scalable vector
-  // support is stable enough, all uses of wrong interfaces should be considered
-  // as errors, but until then, we can live with a warning being emitted by the
-  // compiler. This way, Clang can be used to compile code with scalable vectors
-  // and identify possible issues.
-  if (isa(JA) || isa(JA) ||
-  isa(JA)) {
-CmdArgs.push_back("-mllvm");
-CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
-  }
-
   // These two are potentially updated by AddClangCLArgs.
   codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D136864: [Clang] Create opaque type for AArch64 SVE2p1/SME2 svcount_t.

2023-03-02 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm added inline comments.



Comment at: clang/lib/CodeGen/CGDebugInfo.cpp:730
   ASTContext::BuiltinVectorTypeInfo Info =
-  CGM.getContext().getBuiltinVectorTypeInfo(BT);
-  unsigned NumElemsPerVG = (Info.EC.getKnownMinValue() * Info.NumVectors) 
/ 2;
+  // For svcount_t, only the lower 16 bits are relevant.
+  BT->getKind() == BuiltinType::SveCount

Given predicates are stored byte wise perhaps it's worth being more explicit 
and saying "only the first 2 bytes are relevant"?



Comment at: clang/lib/CodeGen/CGDebugInfo.cpp:739
+  // bitpattern for predicates instead.
+  unsigned NumElems = Info.EC.getKnownMinValue() * Info.NumVectors;
   if (Info.ElementType == CGM.getContext().BoolTy) {

Do we need an assert that `Info.NumVectors` is 1 for `svcount_t`? I don't think 
we plan of having `svcountx2_t`, but if we did then I think this code is 
incorrect because `UpperBound` will not take the padding between the "live 
16bit values" into account? And an assert would at least give us a modicum of 
protection.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136864/new/

https://reviews.llvm.org/D136864

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D153560: [Clang] Allow C++11 style initialisation of SVE types.

2023-06-22 Thread Paul Walker via Phabricator via cfe-commits
paulwalker-arm created this revision.
Herald added a project: All.
paulwalker-arm requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Fixes https://github.com/llvm/llvm-project/issues/63223


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153560

Files:
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/aarch64-sve.cpp

Index: clang/test/CodeGen/aarch64-sve.cpp
===
--- /dev/null
+++ clang/test/CodeGen/aarch64-sve.cpp
@@ -0,0 +1,167 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @_Z11test_localsv
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[S8:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S16:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S32:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S64:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U8:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U16:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U32:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U64:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F16:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F32:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F64:%.*]] = alloca , align 16
+// CHECK-NEXT:[[BF16:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S8X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S16X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S32X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[X64X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U8X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U16X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U32X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U64X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F16X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F32X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F64X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[BF16X2:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S8X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S16X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S32X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[X64X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U8X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U16X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U32X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U64X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F16X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F32X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F64X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[BF16X3:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S8X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S16X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[S32X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[X64X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U8X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U16X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U32X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[U64X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F16X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F32X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[F64X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[BF16X4:%.*]] = alloca , align 16
+// CHECK-NEXT:[[B8:%.*]] = alloca , align 2
+// CHECK-NEXT:[[B8X2:%.*]] = alloca , align 2
+// CHECK-NEXT:[[B8X4:%.*]] = alloca , align 2
+// CHECK-NEXT:store  zeroinitializer, ptr [[S8]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[S16]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[S32]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[S64]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U8]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U16]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U32]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U64]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[F16]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[F32]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[F64]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[BF16]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[S8X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[S16X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[S32X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[X64X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U8X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U16X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U32X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[U64X2]], align 16
+// CHECK-NEXT:store  zeroinitializer, ptr [[F16X2]], align 16
+// CHECK-NEXT:store  zeroinitialize

  1   2   >