================ @@ -21962,6 +21962,35 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N, return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp); } +static SDValue foldRevInvolution(SDNode *N) { + SDValue InnerRev = N->getOperand(1); + if (!InnerRev.hasOneUse()) + return SDValue(); + + unsigned OuterIId = getIntrinsicID(N); + unsigned InnerIId = getIntrinsicID(InnerRev.getNode()); + if (OuterIId != InnerIId) + return SDValue(); + + switch (OuterIId) { + case Intrinsic::aarch64_sve_revb: + case Intrinsic::aarch64_sve_revd: + case Intrinsic::aarch64_sve_revh: + case Intrinsic::aarch64_sve_revw: + if (N->getOperand(2) != InnerRev.getOperand(2) || + N->getOperand(3) != InnerRev.getOperand(3)) + return SDValue(); + [[fallthrough]]; + case Intrinsic::aarch64_sve_rev: + case Intrinsic::aarch64_sve_rev_b16: + case Intrinsic::aarch64_sve_rev_b32: + case Intrinsic::aarch64_sve_rev_b64: + return InnerRev.getOperand(1); ---------------- sdesmalen-arm wrote:
I don't think the combine on the predicated `rev[bhwd]` intrinsics is correct. It would be if the governing predicate is `all true`. Otherwise the `rev`s might be removed if the passthru is `poison` and the input is equal the other rev result (rather than the source of the first `rev`, which is what the code currently checks). This is probably more an optimisation to do in InstCombine (see `AArch64TTIImpl::instCombineIntrinsic`), but I'd suggest keeping this PR simple and splitting that work out into a separate PR. https://github.com/llvm/llvm-project/pull/116422 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits