Author: Florian Hahn Date: 2021-01-04T15:49:47Z New Revision: ed936aad7814404b3cc767d4515096f078dfcbb9
URL: https://github.com/llvm/llvm-project/commit/ed936aad7814404b3cc767d4515096f078dfcbb9 DIFF: https://github.com/llvm/llvm-project/commit/ed936aad7814404b3cc767d4515096f078dfcbb9.diff LOG: [InterleavedAccess] Return correct 'modified' status. Both tryReplaceExtracts and replaceBinOpShuffles may modify the IR, even if no interleaved loads are generated, but currently the pass pretends no changes were made. This patch updates the pass to return true if either of the functions made any changes. In case of tryReplaceExtracts, changes are made if there are any Extracts and true is returned. `replaceBinOpShuffles` always makes changes if BinOpShuffles is not empty. It also always returned true, so I went ahead and change it to just `replaceBinOpShuffles`. Fixes PR48208. Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D93997 Added: llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll Modified: llvm/lib/CodeGen/InterleavedAccessPass.cpp Removed: ################################################################################ diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 73771609a792..6e1621450755 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -123,10 +123,11 @@ class InterleavedAccess : public FunctionPass { /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them /// to binop(shuffle(x), shuffle(y)) to allow the formation of an /// interleaving load. Any newly created shuffles that operate on \p LI will - /// be added to \p Shuffles. - bool tryReplaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles, - SmallVectorImpl<ShuffleVectorInst *> &Shuffles, - LoadInst *LI); + /// be added to \p Shuffles. Returns true, if any changes to the IR have been + /// made. + bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles, + SmallVectorImpl<ShuffleVectorInst *> &Shuffles, + LoadInst *LI); }; } // end anonymous namespace. @@ -369,14 +370,17 @@ bool InterleavedAccess::lowerInterleavedLoad( // use the shufflevector instructions instead of the load. if (!tryReplaceExtracts(Extracts, Shuffles)) return false; - if (!tryReplaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI)) - return false; + + bool BinOpShuffleChanged = + replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI); LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. - if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) - return false; + if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) { + // If Extracts is not empty, tryReplaceExtracts made changes earlier. + return !Extracts.empty() || BinOpShuffleChanged; + } for (auto SVI : Shuffles) DeadInsts.push_back(SVI); @@ -385,7 +389,7 @@ bool InterleavedAccess::lowerInterleavedLoad( return true; } -bool InterleavedAccess::tryReplaceBinOpShuffles( +bool InterleavedAccess::replaceBinOpShuffles( ArrayRef<ShuffleVectorInst *> BinOpShuffles, SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) { for (auto *SVI : BinOpShuffles) { @@ -410,7 +414,8 @@ bool InterleavedAccess::tryReplaceBinOpShuffles( if (NewSVI2->getOperand(0) == LI) Shuffles.push_back(NewSVI2); } - return true; + + return !BinOpShuffles.empty(); } bool InterleavedAccess::tryReplaceExtracts( diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll new file mode 100644 index 000000000000..80f3195699dc --- /dev/null +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -interleaved-access -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +; No interleaved load instruction is generated, but the shuffle is moved just +; after the load. +define <2 x double> @shuffle_binop_fol(<4 x double>* %ptr) { +; CHECK-LABEL: @shuffle_binop_fol( +; CHECK-NEXT: vector.body.preheader: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8 +; CHECK-NEXT: [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> undef, <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]] +; CHECK-NEXT: ret <2 x double> [[FADD3]] +; +vector.body.preheader: + %wide.load = load <4 x double>, <4 x double>* %ptr, align 8 + %fadd = fadd <4 x double> %wide.load, <double 1.0, double 1.0, double 1.0, double 1.0> + %extracted = shufflevector <4 x double> %fadd, <4 x double> undef, <2 x i32> <i32 0, i32 2> + ret <2 x double> %extracted +} + +; No interleaved load instruction is generated, but the extractelement +; instructions are updated to use the shuffle instead of the load. +define void @shuffle_extract(<4 x double>* %ptr, i1 %c) { +; CHECK-LABEL: @shuffle_extract( +; CHECK-NEXT: vector.body.preheader: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8 +; CHECK-NEXT: [[EXTRACTED:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> <i32 0, i32 2> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_MERGE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[EXTRACTED]], i64 0 +; CHECK-NEXT: call void @use(double [[TMP0]]) +; CHECK-NEXT: br label [[IF_MERGE]] +; CHECK: if.merge: +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[EXTRACTED]], i64 1 +; CHECK-NEXT: call void @use(double [[TMP1]]) +; CHECK-NEXT: ret void +; +vector.body.preheader: + %wide.load = load <4 x double>, <4 x double>* %ptr, align 8 + %extracted = shufflevector <4 x double> %wide.load, <4 x double> undef, <2 x i32> <i32 0, i32 2> + br i1 %c, label %if.then, label %if.merge + +if.then: + %e0 = extractelement <4 x double> %wide.load, i32 0 + call void @use(double %e0) + br label %if.merge + +if.merge: + %e1 = extractelement <4 x double> %wide.load, i32 2 + call void @use(double %e1) + ret void +} + +declare void @use(double) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits