Author: Peter Waller Date: 2021-05-20T16:22:50Z New Revision: 2d574a110440597eefe1b2a8b6144e4e89c21d05
URL: https://github.com/llvm/llvm-project/commit/2d574a110440597eefe1b2a8b6144e4e89c21d05 DIFF: https://github.com/llvm/llvm-project/commit/2d574a110440597eefe1b2a8b6144e4e89c21d05.diff LOG: [CodeGen][AArch64][SVE] Canonicalize intrinsic rdffr{ => _z} Follow up to D101357 / 3fa6510f6. Supersedes D102330. Goal: Use flags setting rdffrs instead of rdffr + ptest. Problem: RDFFR_P doesn't have have a flags setting equivalent. Solution: in instcombine, canonicalize to RDFFR_PP at the IR level, and rely on RDFFR_PP+PTEST => RDFFRS_PP optimization in AArch64InstrInfo::optimizePTestInstr. While here: * Test that rdffr.z+ptest generates a rdffrs. * Use update_{test,llc}_checks.py on the tests. * Use sve attribute on functions. Differential Revision: https://reviews.llvm.org/D102623 Added: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll Modified: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll Removed: ################################################################################ diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c index 9b871ee3a8dc..a85ac7bb5cef 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rdffr.c @@ -7,7 +7,8 @@ svbool_t test_svrdffr() { // CHECK-LABEL: test_svrdffr - // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr() + // CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> + // CHECK-NOT: rdffr // CHECK: ret <vscale x 16 x i1> %[[INTRINSIC]] return svrdffr(); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 90762052dc3a..846c07863467 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -470,6 +470,23 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC, return IC.replaceInstUsesWith(II, Extract); } +static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC, + IntrinsicInst &II) { + LLVMContext &Ctx = II.getContext(); + IRBuilder<> Builder(Ctx); + Builder.SetInsertPoint(&II); + // Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr + // can work with RDFFR_PP for ptest elimination. + auto *AllPat = + ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all); + auto *PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, + {II.getType()}, {AllPat}); + auto *RDFFR = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {}, {PTrue}); + RDFFR->takeName(&II); + return IC.replaceInstUsesWith(II, RDFFR); +} + Optional<Instruction *> AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -481,6 +498,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineConvertFromSVBool(IC, II); case Intrinsic::aarch64_sve_dup: return instCombineSVEDup(IC, II); + case Intrinsic::aarch64_sve_rdffr: + return instCombineRDFFR(IC, II); case Intrinsic::aarch64_sve_lasta: case Intrinsic::aarch64_sve_lastb: return instCombineSVELast(IC, II); diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll index 7460037078d1..bc07c972e5fb 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll @@ -1,33 +1,51 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" ; ; RDFFR ; -define <vscale x 16 x i1> @rdffr() { +define <vscale x 16 x i1> @rdffr() #0 { ; CHECK-LABEL: rdffr: -; CHECK: rdffr p0.b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdffr p0.b +; CHECK-NEXT: ret %out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr() ret <vscale x 16 x i1> %out } -define <vscale x 16 x i1> @rdffr_z(<vscale x 16 x i1> %pg) { +define <vscale x 16 x i1> @rdffr_z(<vscale x 16 x i1> %pg) #0 { ; CHECK-LABEL: rdffr_z: -; CHECK: rdffr p0.b, p0/z -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdffr p0.b, p0/z +; CHECK-NEXT: ret %out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg) ret <vscale x 16 x i1> %out } +; Test that rdffr.z followed by ptest optimizes to flags-setting rdffrs. +define i1 @rdffr_z_ptest(<vscale x 16 x i1> %pg) #0 { +; CHECK-LABEL: rdffr_z_ptest: +; CHECK: // %bb.0: +; CHECK-NEXT: rdffrs p0.b, p0/z +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %rdffr = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %rdffr) + ret i1 %out +} + ; ; SETFFR ; -define void @set_ffr() { +define void @set_ffr() #0 { ; CHECK-LABEL: set_ffr: -; CHECK: setffr -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: setffr +; CHECK-NEXT: ret call void @llvm.aarch64.sve.setffr() ret void } @@ -36,10 +54,11 @@ define void @set_ffr() { ; WRFFR ; -define void @wrffr(<vscale x 16 x i1> %a) { +define void @wrffr(<vscale x 16 x i1> %a) #0 { ; CHECK-LABEL: wrffr: -; CHECK: wrffr p0.b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: wrffr p0.b +; CHECK-NEXT: ret call void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1> %a) ret void } @@ -48,3 +67,7 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr() declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>) declare void @llvm.aarch64.sve.setffr() declare void @llvm.aarch64.sve.wrffr(<vscale x 16 x i1>) + +declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll new file mode 100644 index 000000000000..4360d99bd03c --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Test that rdffr is substituted with predicated form which enables ptest optimization later. +define <vscale x 16 x i1> @predicate_rdffr() #0 { +; CHECK-LABEL: @predicate_rdffr( +; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP1]]) +; CHECK-NEXT: ret <vscale x 16 x i1> [[OUT]] +; + %out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr() + ret <vscale x 16 x i1> %out +} + +declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr() + +attributes #0 = { "target-features"="+sve" } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits