[llvm-branch-commits] [llvm] 8fbc143 - [AArch64] Merge [US]MULL with half adds and subs into [US]ML[AS]L

2021-01-25 Thread Andre Vieira via llvm-branch-commits

Author: Andre Vieira
Date: 2021-01-25T07:58:12Z
New Revision: 8fbc1437c605fe92c0fa286757e3b287d6b02f05

URL: 
https://github.com/llvm/llvm-project/commit/8fbc1437c605fe92c0fa286757e3b287d6b02f05
DIFF: 
https://github.com/llvm/llvm-project/commit/8fbc1437c605fe92c0fa286757e3b287d6b02f05.diff

LOG: [AArch64] Merge [US]MULL with half adds and subs into [US]ML[AS]L

This patch adds patterns to teach the AArch64 backend to merge [US]MULL
instructions and adds/subs of half the size into [US]ML[AS]L where we don't use
the top half of the result.

Differential Revision: https://reviews.llvm.org/D95218

Added: 
llvm/test/CodeGen/AArch64/mla_mls_merge.ll

Modified: 
llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 




diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 00665bfe7c90..171d3dbaa814 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4792,6 +4792,44 @@ defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
 defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
  BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
 
+// Additional patterns for [SU]ML[AS]L
+multiclass Neon_mul_acc_widen_patterns {
+  def : Pat<(v4i16 (opnode
+V64:$Ra,
+(v4i16 (extract_subvector
+(vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
+(i64 0),
+ (EXTRACT_SUBREG (v8i16 (INST8B
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 
V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+  def : Pat<(v2i32 (opnode
+V64:$Ra,
+(v2i32 (extract_subvector
+(vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
+(i64 0),
+ (EXTRACT_SUBREG (v4i32 (INST4H
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 
V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+  def : Pat<(v1i64 (opnode
+V64:$Ra,
+(v1i64 (extract_subvector
+(vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
+(i64 0),
+ (EXTRACT_SUBREG (v2i64 (INST2S
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 
V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+}
+
+defm : Neon_mul_acc_widen_patterns;
+defm : Neon_mul_acc_widen_patterns;
+defm : Neon_mul_acc_widen_patterns;
+defm : Neon_mul_acc_widen_patterns;
+
 // Additional patterns for SMULL and UMULL
 multiclass Neon_mul_widen_patterns {

diff  --git a/llvm/test/CodeGen/AArch64/mla_mls_merge.ll 
b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
new file mode 100644
index ..d3aa9673d8b3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-unknown-linux-gnu | 
FileCheck %s
+
+define <4 x i16> @test_mla0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> 
%d) {
+; CHECK-LABEL: test_mla0:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:umull v2.8h, v2.8b, v3.8b
+; CHECK-NEXT:umlal v2.8h, v0.8b, v1.8b
+; CHECK-NEXT:mov v0.16b, v2.16b
+; CHECK-NEXT:ret
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, 
<8 x i8> %b)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %c, 
<8 x i8> %d)
+  %add.i = add <8 x i16> %vmull.i.i, %vmull.i
+  %shuffle.i = shufflevector <8 x i16> %add.i, <8 x i16> undef, <4 x i32> 
+  ret <4 x i16> %shuffle.i
+}
+
+
+define <4 x i16> @test_mla1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> 
%d) {
+; CHECK-LABEL: test_mla1:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:smull v2.8h, v2.8b, v3.8b
+; CHECK-NEXT:smlal v2.8h, v0.8b, v1.8b
+; CHECK-NEXT:mov v0.16b, v2.16b
+; CHECK-NEXT:ret
+entry:
+  %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, 
<8 x i8> %b)
+  %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %c, 
<8 x i8> %d)
+  %add.i = add <8 x i16> %vmull.i.i, %vmull.i
+  %shuffle.i = shufflevector <8 x i16> %add.i, <8 x i16> undef, <4 x i32> 
+  ret <4 x i16> %shuffle.i
+}
+
+
+define <2 x i32> @test_mla2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x 
i16> %d) {
+; CHECK-LABEL: test_mla2:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:umull v2.4s, v2.4h, v3.4h
+; CHECK-NEXT:umlal v2.4s, v0.4h, v1.4h
+; CHECK-NEXT:mov v0.16b, v2.16b
+; CHECK-NEXT:ret
+entry:
+  %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, 
<4 x i16> %b)
+  %vmull2.i.i = tail call

[llvm-branch-commits] [clang] a4b80ef - [AArch64] Define __ARM_FEATURE_{CRC32, ATOMICS}

2020-11-27 Thread Andre Vieira via llvm-branch-commits

Author: Andre Vieira
Date: 2020-11-27T17:42:43Z
New Revision: a4b80efea98fc0b6421db40f9718c5c369fecec0

URL: 
https://github.com/llvm/llvm-project/commit/a4b80efea98fc0b6421db40f9718c5c369fecec0
DIFF: 
https://github.com/llvm/llvm-project/commit/a4b80efea98fc0b6421db40f9718c5c369fecec0.diff

LOG: [AArch64] Define __ARM_FEATURE_{CRC32,ATOMICS}

This patch implements the definition of __ARM_FEATURE_ATOMICS and fixes the
missing definition of __ARM_FEATURE_CRC32 for Armv8.1-A.

Differential Revision: https://reviews.llvm.org/D91438

Added: 


Modified: 
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Basic/Targets/AArch64.h
clang/test/Preprocessor/aarch64-target-features.c

Removed: 




diff  --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index 37f0212b7001..6282abca1326 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -155,8 +155,9 @@ void AArch64TargetInfo::fillValidCPUList(
 
 void AArch64TargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts,
 MacroBuilder &Builder) const {
-  // FIXME: Armv8.1 makes __ARM_FEATURE_CRC32 mandatory. Handle it here.
   Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
+  Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1");
+  Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
 }
 
 void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts,
@@ -176,8 +177,6 @@ void AArch64TargetInfo::getTargetDefinesARMV83A(const 
LangOptions &Opts,
 void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
 MacroBuilder &Builder) const {
   // Also include the Armv8.3 defines
-  // FIXME: Armv8.4 makes __ARM_FEATURE_ATOMICS, defined in GCC, mandatory.
-  // Add and handle it here.
   getTargetDefinesARMV83A(Opts, Builder);
 }
 
@@ -304,6 +303,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (HasMatMul)
 Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1");
 
+  if (HasLSE)
+Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1");
+
   if (HasBFloat16) {
 Builder.defineMacro("__ARM_FEATURE_BF16", "1");
 Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1");
@@ -418,6 +420,7 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector &Features,
   HasSVE2BitPerm = false;
   HasMatmulFP64 = false;
   HasMatmulFP32 = false;
+  HasLSE = false;
 
   ArchKind = llvm::AArch64::ArchKind::ARMV8A;
 
@@ -499,6 +502,8 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector &Features,
   HasMatMul = true;
 if (Feature == "+bf16")
   HasBFloat16 = true;
+if (Feature == "+lse")
+  HasLSE = true;
   }
 
   setDataLayout();

diff  --git a/clang/lib/Basic/Targets/AArch64.h 
b/clang/lib/Basic/Targets/AArch64.h
index d1982897d84e..a70abb7bfd90 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -44,6 +44,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool HasSVE2BitPerm;
   bool HasMatmulFP64;
   bool HasMatmulFP32;
+  bool HasLSE;
 
   llvm::AArch64::ArchKind ArchKind;
 

diff  --git a/clang/test/Preprocessor/aarch64-target-features.c 
b/clang/test/Preprocessor/aarch64-target-features.c
index af4f6a1c0e0e..f0b01f519a85 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -62,6 +62,8 @@
 // RUN: %clang -target arm64-none-linux-gnu -mcrc -x c -E -dM %s -o - | 
FileCheck --check-prefix=CHECK-CRC32 %s
 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crc -x c -E -dM 
%s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s 
-o - | FileCheck --check-prefix=CHECK-CRC32 %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s 
-o - | FileCheck --check-prefix=CHECK-CRC32 %s
+// RUN: %clang -target arm64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o 
- | FileCheck --check-prefix=CHECK-CRC32 %s
 // CHECK-CRC32: __ARM_FEATURE_CRC32 1
 
 // RUN: %clang -target aarch64-none-linux-gnu -fno-math-errno 
-fno-signed-zeros\
@@ -447,3 +449,10 @@
 // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve 
-msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck 
-check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=2048 %s
 // CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_BITS [[#VBITS:]]
 // CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_VECTOR_OPERATORS 1
+
+// == Check Largse System Extensions (LSE)
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+lse -x c -E -dM 
%s -o - | FileCheck --check-prefix=CHECK-LSE %s
+// RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s 
-o - | FileCheck --check-prefix=CHECK-LSE %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=