Author: Max Beck-Jones Date: 2024-07-22T10:24:54+01:00 New Revision: ebf0fc9ae845af15baed663d79a5e4e88542f1e4
URL: https://github.com/llvm/llvm-project/commit/ebf0fc9ae845af15baed663d79a5e4e88542f1e4 DIFF: https://github.com/llvm/llvm-project/commit/ebf0fc9ae845af15baed663d79a5e4e88542f1e4.diff LOG: Revert "[AArch64] Lower scalable i1 vector add reduction to cntp (#99031)" This reverts commit 4db11c1f6cd6cd12b51a3220a54697b90e2e8821. Added: Modified: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Removed: llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll ################################################################################ diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c11855da3fae0..bf205b1706a6c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27640,20 +27640,6 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode, VecOp = convertToScalableVector(DAG, ContainerVT, VecOp); } - // Lower VECREDUCE_ADD of nxv2i1-nxv16i1 to CNTP rather than UADDV. - if (ScalarOp.getOpcode() == ISD::VECREDUCE_ADD && - VecOp.getOpcode() == ISD::ZERO_EXTEND) { - SDValue BoolVec = VecOp.getOperand(0); - if (BoolVec.getValueType().getVectorElementType() == MVT::i1) { - // CNTP(BoolVec & BoolVec) <=> CNTP(BoolVec & PTRUE) - SDValue CntpOp = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, - DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64), - BoolVec, BoolVec); - return DAG.getAnyExtOrTrunc(CntpOp, DL, ScalarOp.getValueType()); - } - } - // UADDV always returns an i64 result. EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 : SrcVT.getVectorElementType(); diff --git a/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll b/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll deleted file mode 100644 index a748cf732e090..0000000000000 --- a/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll +++ /dev/null @@ -1,132 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -define i8 @uaddv_zexti8_nxv16i1(<vscale x 16 x i1> %v) { -; CHECK-LABEL: uaddv_zexti8_nxv16i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.b -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 16 x i1> %v to <vscale x 16 x i8> - %4 = tail call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %3) - ret i8 %4 -} - -define i8 @uaddv_zexti8_nxv8i1(<vscale x 8 x i1> %v) { -; CHECK-LABEL: uaddv_zexti8_nxv8i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.h -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 8 x i1> %v to <vscale x 8 x i8> - %4 = tail call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> %3) - ret i8 %4 -} - -define i16 @uaddv_zexti16_nxv8i1(<vscale x 8 x i1> %v) { -; CHECK-LABEL: uaddv_zexti16_nxv8i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.h -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 8 x i1> %v to <vscale x 8 x i16> - %4 = tail call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %3) - ret i16 %4 -} - -define i8 @uaddv_zexti8_nxv4i1(<vscale x 4 x i1> %v) { -; CHECK-LABEL: uaddv_zexti8_nxv4i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.s -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i8> - %4 = tail call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %3) - ret i8 %4 -} - -define i16 @uaddv_zexti16_nxv4i1(<vscale x 4 x i1> %v) { -; CHECK-LABEL: uaddv_zexti16_nxv4i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.s -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i16> - %4 = tail call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %3) - ret i16 %4 -} - -define i32 @uaddv_zexti32_nxv4i1(<vscale x 4 x i1> %v) { -; CHECK-LABEL: uaddv_zexti32_nxv4i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.s -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i32> - %4 = tail call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %3) - ret i32 %4 -} - -define i8 @uaddv_zexti8_nxv2i1(<vscale x 2 x i1> %v) { -; CHECK-LABEL: uaddv_zexti8_nxv2i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.d -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i8> - %4 = tail call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %3) - ret i8 %4 -} - -define i16 @uaddv_zexti16_nxv2i1(<vscale x 2 x i1> %v) { -; CHECK-LABEL: uaddv_zexti16_nxv2i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.d -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i16> - %4 = tail call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %3) - ret i16 %4 -} - -define i32 @uaddv_zexti32_nxv2i1(<vscale x 2 x i1> %v) { -; CHECK-LABEL: uaddv_zexti32_nxv2i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.d -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i32> - %4 = tail call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %3) - ret i32 %4 -} - -define i64 @uaddv_zexti64_nxv2i1(<vscale x 2 x i1> %v) { -; CHECK-LABEL: uaddv_zexti64_nxv2i1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntp x0, p0, p0.d -; CHECK-NEXT: ret -entry: - %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i64> - %4 = tail call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %3) - ret i64 %4 -} - -declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>) -declare i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8>) -declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>) -declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>) -declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>) -declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>) -declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>) -declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>) -declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>) -declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits