================
@@ -21962,6 +21962,35 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
   return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp);
 }
 
+static SDValue foldRevInvolution(SDNode *N) {
+  SDValue InnerRev = N->getOperand(1);
+  if (!InnerRev.hasOneUse())
+    return SDValue();
+
+  unsigned OuterIId = getIntrinsicID(N);
+  unsigned InnerIId = getIntrinsicID(InnerRev.getNode());
+  if (OuterIId != InnerIId)
+    return SDValue();
+
+  switch (OuterIId) {
+  case Intrinsic::aarch64_sve_revb:
+  case Intrinsic::aarch64_sve_revd:
+  case Intrinsic::aarch64_sve_revh:
+  case Intrinsic::aarch64_sve_revw:
+    if (N->getOperand(2) != InnerRev.getOperand(2) ||
+        N->getOperand(3) != InnerRev.getOperand(3))
+      return SDValue();
+    [[fallthrough]];
+  case Intrinsic::aarch64_sve_rev:
+  case Intrinsic::aarch64_sve_rev_b16:
+  case Intrinsic::aarch64_sve_rev_b32:
+  case Intrinsic::aarch64_sve_rev_b64:
+    return InnerRev.getOperand(1);
----------------
sdesmalen-arm wrote:

I don't think the combine on the predicated `rev[bhwd]` intrinsics is correct. 
It would be if the governing predicate is `all true`. Otherwise the `rev`s 
might be removed if the passthru is `poison` and the input is equal the other 
rev result (rather than the source of the first `rev`, which is what the code 
currently checks).

This is probably more an optimisation to do in InstCombine (see 
`AArch64TTIImpl::instCombineIntrinsic`), but I'd suggest keeping this PR simple 
and splitting that work out into a separate PR.

https://github.com/llvm/llvm-project/pull/116422
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to