[llvm-branch-commits] [llvm] 8fbc143 - [AArch64] Merge [US]MULL with half adds and subs into [US]ML[AS]L
Author: Andre Vieira Date: 2021-01-25T07:58:12Z New Revision: 8fbc1437c605fe92c0fa286757e3b287d6b02f05 URL: https://github.com/llvm/llvm-project/commit/8fbc1437c605fe92c0fa286757e3b287d6b02f05 DIFF: https://github.com/llvm/llvm-project/commit/8fbc1437c605fe92c0fa286757e3b287d6b02f05.diff LOG: [AArch64] Merge [US]MULL with half adds and subs into [US]ML[AS]L This patch adds patterns to teach the AArch64 backend to merge [US]MULL instructions and adds/subs of half the size into [US]ML[AS]L where we don't use the top half of the result. Differential Revision: https://reviews.llvm.org/D95218 Added: llvm/test/CodeGen/AArch64/mla_mls_merge.ll Modified: llvm/lib/Target/AArch64/AArch64InstrInfo.td Removed: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 00665bfe7c90..171d3dbaa814 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4792,6 +4792,44 @@ defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; +// Additional patterns for [SU]ML[AS]L +multiclass Neon_mul_acc_widen_patterns { + def : Pat<(v4i16 (opnode +V64:$Ra, +(v4i16 (extract_subvector +(vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), +(i64 0), + (EXTRACT_SUBREG (v8i16 (INST8B + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), + V64:$Rn, V64:$Rm)), dsub)>; + def : Pat<(v2i32 (opnode +V64:$Ra, +(v2i32 (extract_subvector +(vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), +(i64 0), + (EXTRACT_SUBREG (v4i32 (INST4H + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), + V64:$Rn, V64:$Rm)), dsub)>; + def : Pat<(v1i64 (opnode +V64:$Ra, +(v1i64 (extract_subvector +(vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), +(i64 0), + (EXTRACT_SUBREG (v2i64 (INST2S + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), + V64:$Rn, V64:$Rm)), dsub)>; +} + +defm : Neon_mul_acc_widen_patterns; +defm : Neon_mul_acc_widen_patterns; +defm : Neon_mul_acc_widen_patterns; +defm : Neon_mul_acc_widen_patterns; + // Additional patterns for SMULL and UMULL multiclass Neon_mul_widen_patterns { diff --git a/llvm/test/CodeGen/AArch64/mla_mls_merge.ll b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll new file mode 100644 index ..d3aa9673d8b3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll @@ -0,0 +1,205 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s + +define <4 x i16> @test_mla0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { +; CHECK-LABEL: test_mla0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:umull v2.8h, v2.8b, v3.8b +; CHECK-NEXT:umlal v2.8h, v0.8b, v1.8b +; CHECK-NEXT:mov v0.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %c, <8 x i8> %d) + %add.i = add <8 x i16> %vmull.i.i, %vmull.i + %shuffle.i = shufflevector <8 x i16> %add.i, <8 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + + +define <4 x i16> @test_mla1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { +; CHECK-LABEL: test_mla1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:smull v2.8h, v2.8b, v3.8b +; CHECK-NEXT:smlal v2.8h, v0.8b, v1.8b +; CHECK-NEXT:mov v0.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) + %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %c, <8 x i8> %d) + %add.i = add <8 x i16> %vmull.i.i, %vmull.i + %shuffle.i = shufflevector <8 x i16> %add.i, <8 x i16> undef, <4 x i32> + ret <4 x i16> %shuffle.i +} + + +define <2 x i32> @test_mla2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { +; CHECK-LABEL: test_mla2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:umull v2.4s, v2.4h, v3.4h +; CHECK-NEXT:umlal v2.4s, v0.4h, v1.4h +; CHECK-NEXT:mov v0.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) + %vmull2.i.i = tail call
[llvm-branch-commits] [clang] a4b80ef - [AArch64] Define __ARM_FEATURE_{CRC32, ATOMICS}
Author: Andre Vieira Date: 2020-11-27T17:42:43Z New Revision: a4b80efea98fc0b6421db40f9718c5c369fecec0 URL: https://github.com/llvm/llvm-project/commit/a4b80efea98fc0b6421db40f9718c5c369fecec0 DIFF: https://github.com/llvm/llvm-project/commit/a4b80efea98fc0b6421db40f9718c5c369fecec0.diff LOG: [AArch64] Define __ARM_FEATURE_{CRC32,ATOMICS} This patch implements the definition of __ARM_FEATURE_ATOMICS and fixes the missing definition of __ARM_FEATURE_CRC32 for Armv8.1-A. Differential Revision: https://reviews.llvm.org/D91438 Added: Modified: clang/lib/Basic/Targets/AArch64.cpp clang/lib/Basic/Targets/AArch64.h clang/test/Preprocessor/aarch64-target-features.c Removed: diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 37f0212b7001..6282abca1326 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -155,8 +155,9 @@ void AArch64TargetInfo::fillValidCPUList( void AArch64TargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const { - // FIXME: Armv8.1 makes __ARM_FEATURE_CRC32 mandatory. Handle it here. Builder.defineMacro("__ARM_FEATURE_QRDMX", "1"); + Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1"); + Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); } void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, @@ -176,8 +177,6 @@ void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts, MacroBuilder &Builder) const { // Also include the Armv8.3 defines - // FIXME: Armv8.4 makes __ARM_FEATURE_ATOMICS, defined in GCC, mandatory. - // Add and handle it here. getTargetDefinesARMV83A(Opts, Builder); } @@ -304,6 +303,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasMatMul) Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1"); + if (HasLSE) +Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1"); + if (HasBFloat16) { Builder.defineMacro("__ARM_FEATURE_BF16", "1"); Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1"); @@ -418,6 +420,7 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, HasSVE2BitPerm = false; HasMatmulFP64 = false; HasMatmulFP32 = false; + HasLSE = false; ArchKind = llvm::AArch64::ArchKind::ARMV8A; @@ -499,6 +502,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, HasMatMul = true; if (Feature == "+bf16") HasBFloat16 = true; +if (Feature == "+lse") + HasLSE = true; } setDataLayout(); diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index d1982897d84e..a70abb7bfd90 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -44,6 +44,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasSVE2BitPerm; bool HasMatmulFP64; bool HasMatmulFP32; + bool HasLSE; llvm::AArch64::ArchKind ArchKind; diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index af4f6a1c0e0e..f0b01f519a85 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -62,6 +62,8 @@ // RUN: %clang -target arm64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s +// RUN: %clang -target arm64-none-linux-gnu -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s // CHECK-CRC32: __ARM_FEATURE_CRC32 1 // RUN: %clang -target aarch64-none-linux-gnu -fno-math-errno -fno-signed-zeros\ @@ -447,3 +449,10 @@ // RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS -D#VBITS=2048 %s // CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_BITS [[#VBITS:]] // CHECK-SVE-VECTOR-BITS: __ARM_FEATURE_SVE_VECTOR_OPERATORS 1 + +// == Check Largse System Extensions (LSE) +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s +// RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+lse -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-LSE %s +// RUN: %clang -target aarch64-none-linux-gnu -march=