================ @@ -21962,6 +21962,35 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N, return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp); } +static SDValue foldRevInvolution(SDNode *N) { + SDValue InnerRev = N->getOperand(1); + if (!InnerRev.hasOneUse()) + return SDValue(); + + unsigned OuterIId = getIntrinsicID(N); + unsigned InnerIId = getIntrinsicID(InnerRev.getNode()); + if (OuterIId != InnerIId) + return SDValue(); + + switch (OuterIId) { + case Intrinsic::aarch64_sve_revb: + case Intrinsic::aarch64_sve_revd: + case Intrinsic::aarch64_sve_revh: + case Intrinsic::aarch64_sve_revw: + if (N->getOperand(2) != InnerRev.getOperand(2) || + N->getOperand(3) != InnerRev.getOperand(3)) + return SDValue(); + [[fallthrough]]; + case Intrinsic::aarch64_sve_rev: + case Intrinsic::aarch64_sve_rev_b16: + case Intrinsic::aarch64_sve_rev_b32: + case Intrinsic::aarch64_sve_rev_b64: + return InnerRev.getOperand(1); ---------------- sdesmalen-arm wrote:
I realise that I steered you to doing this in `performIntrinsicCombine`, but given my comment above, perhaps it's worth doing the optimization for `rev.b16/b32/b64` in `AArch64TTIimpl::instCombineIntrinsic` instead. That way the canonicalised input into the code-generator is already optimised. https://github.com/llvm/llvm-project/pull/116422 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits