Author: Vitaly Buka Date: 2024-12-11T07:44:50-08:00 New Revision: 44dd7ba443c9dde0ef854e87cea93a4da976bb60
URL: https://github.com/llvm/llvm-project/commit/44dd7ba443c9dde0ef854e87cea93a4da976bb60 DIFF: https://github.com/llvm/llvm-project/commit/44dd7ba443c9dde0ef854e87cea93a4da976bb60.diff LOG: Revert "[AArch64] Add cost model for @experimental.vector.match (#118512)" This reverts commit 2fe30bc6693c60d76c7e44d9fd6323c39125c19e. Added: Modified: llvm/include/llvm/CodeGen/BasicTTIImpl.h llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll Removed: ################################################################################ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 8eef8ea665a26f..f46f07122329e7 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1935,8 +1935,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { return Cost; } - case Intrinsic::experimental_vector_match: - return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); } // Assume that we need to scalarize this intrinsic.) @@ -2192,35 +2190,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); - case Intrinsic::experimental_vector_match: { - auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]); - auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]); - unsigned SearchSize = NeedleTy->getNumElements(); - - // If we're not expanding the intrinsic then we assume this is cheap to - // implement. - EVT SearchVT = getTLI()->getValueType(DL, SearchTy); - if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) - return getTypeLegalizationCost(RetTy).first; - - // Approximate the cost based on the expansion code in - // SelectionDAGBuilder. - InstructionCost Cost = 0; - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy, - CostKind, 1, nullptr, nullptr); - Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy, - CostKind, 0, nullptr, nullptr); - Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt, - CostKind, 0, nullptr); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy, - CmpInst::ICMP_EQ, CostKind); - Cost += - thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind); - Cost *= SearchSize; - Cost += - thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind); - return Cost; - } case Intrinsic::abs: ISD = ISD::ABS; break; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 850a1c3bfca43c..148f03bae28585 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -905,23 +905,6 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, } break; } - case Intrinsic::experimental_vector_match: { - auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]); - EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]); - unsigned SearchSize = NeedleTy->getNumElements(); - if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) { - // Base cost for MATCH instructions. At least on the Neoverse V2 and - // Neoverse V3, these are cheap operations with the same latency as a - // vector ADD. In most cases, however, we also need to do an extra DUP. - // For fixed-length vectors we currently need an extra five--six - // instructions besides the MATCH. - InstructionCost Cost = 4; - if (isa<FixedVectorType>(RetTy)) - Cost += 10; - return Cost; - } - break; - } default: break; } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index dd3909ade53159..cca4ea73ee6628 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -1360,54 +1360,6 @@ define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m ret void } -define void @match() #3 { -; CHECK-VSCALE-1-LABEL: 'match' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; CHECK-VSCALE-2-LABEL: 'match' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -; TYPE_BASED_ONLY-LABEL: 'match' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; - - %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) - %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) - %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) - %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) - - %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) - %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) - %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) - %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) - - ret void -} - declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits