[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From 411c9c8f9fff386807a4ff6317dbec8a3eb1cd1a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 ++- .../Analysis/CostModel/X86/arith-overflow.ll | 8 +-- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index 963bb8a9d9fac..71bc6b5375c73 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1080,7 +1080,7 @@ define i32 @smul(i32 %arg) { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From fc18583308ccaaf60bd234af160888a669648fef Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100521 >From 19f7331a579837b2657a5d0741c6633d6f8296da Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:33:23 +0400 Subject: [PATCH] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 56 +-- llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll | 116 ++--- llvm/test/Analysis/CostModel/X86/fptoi_sat.ll | 400 +- .../AggressiveInstCombine/ARM/fptosisat.ll| 49 ++- 4 files changed, 324 insertions(+), 297 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1a089a3fa9634..ba70498bfb731 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2179,31 +2179,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::UMULO; break; case Intrinsic::fptosi_sat: -case Intrinsic::fptoui_sat: { - if (Tys.empty()) -break; - Type *FromTy = Tys[0]; - bool IsSigned = IID == Intrinsic::fptosi_sat; - - InstructionCost Cost = 0; - IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, - {FromTy, FromTy}); - Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); - IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, - {FromTy, FromTy}); - Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); - Cost += thisT()->getCastInstrCost( - IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, - TTI::CastContextHint::None, CostKind); - if (IsSigned) { -Type *CondTy = RetTy->getWithNewBitWidth(1); -Cost += thisT()->getCmpSelInstrCost( -BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); -Cost += thisT()->getCmpSelInstrCost( -BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); - } - return Cost; -} + ISD = ISD::FP_TO_SINT_SAT; + break; +case Intrinsic::fptoui_sat: + ISD = ISD::FP_TO_UINT_SAT; + break; case Intrinsic::ctpop: ISD = ISD::CTPOP; // In case of legalization use TCC_Expensive. This is cheaper than a @@ -2418,6 +2398,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::fptosi_sat: +case Intrinsic::fptoui_sat: { + if (Tys.empty()) +break; + Type *FromTy = Tys[0]; + bool IsSigned = IID == Intrinsic::fptosi_sat; + + InstructionCost Cost = 0; + IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); + Cost += thisT()->getCastInstrCost( + IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, + TTI::CastContextHint::None, CostKind); + if (IsSigned) { +Type *CondTy = RetTy->getWithNewBitWidth(1); +Cost += thisT()->getCmpSelInstrCost( +BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); +Cost += thisT()->getCmpSelInstrCost( +BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); + } + return Cost; +} default: break; } diff --git a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll index aff7b19a9c87a..29c86fc778a98 100644 --- a/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll @@ -4,26 +4,26 @@ define void @casts() { ; CHECK-MVE-LABEL: 'casts' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s1 = call i1 @llvm.fptosi.sat.i1.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u1 = call i1 @llvm.fptoui.sat.i1.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s8 = call i8 @llvm.fptosi.sat.i8.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u8 = call i8 @llvm.fptoui.sat.i8.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s16 = call i16 @llvm.fptosi.sat.i16.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %f32u16 = call i16 @llvm.fptoui.sat.i16.f32(float undef) -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %f32s32 = call i32 @llvm.fptosi.sat.i32.f32(
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 6a7346484924acdfbd630096e3dbbb4b14474028 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- .../Analysis/CostModel/AMDGPU/arith-ssat.ll | 32 +- .../Analysis/CostModel/AMDGPU/arith-usat.ll | 32 +- 5 files changed, 242 insertions(+), 231 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = c
[llvm-branch-commits] [lld] release/19.x: [lld][ELF][LoongArch] Support R_LARCH_TLS_{LD, GD, DESC}_PCREL_S2 (PR #100917)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/100917 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/19.x: [ELF] Use invokeOnRelocs. NFC (PR #100883)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/100883 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point (PR #100953)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/100953 These callbacks can be invoked in multiple places when building an optimization pipeline, both in compile time and link time. However, there is no indicator on what pipeline it is currently building. In this patch, an extra argument is added to indicate its (Thin)LTO stage such that the callback can check it if needed. There is no test expected from this, and the benefit of this change will be demonstrated in https://github.com/llvm/llvm-project/pull/66488. >From 9c949107271a303b7961cb2a1bea3157008323d6 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 28 Jul 2024 15:28:09 -0400 Subject: [PATCH] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point These callbacks can be invoked in multiple places when building an optimization pipeline, both in compile time and link time. However, there is no indicator on what pipeline it is currently building. In this patch, an extra argument is added to indicate its (Thin)LTO stage such that the callback can check it if needed. There is no test expected from this, and the benefit of this change will be demonstrated in https://github.com/llvm/llvm-project/pull/66488. --- clang/lib/CodeGen/BackendUtil.cpp | 19 ++- llvm/include/llvm/Passes/PassBuilder.h| 10 +++--- llvm/lib/Passes/PassBuilderPipelines.cpp | 12 +++- llvm/lib/Target/AMDGPU/AMDGPU.h | 7 ++- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 11 +++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 15 +-- llvm/tools/opt/NewPMDriver.cpp| 2 +- 7 files changed, 47 insertions(+), 29 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e765bbf637a66..64f0020a170aa 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -643,7 +643,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, // Ensure we lower KCFI operand bundles with -O0. PB.registerOptimizerLastEPCallback( - [&](ModulePassManager &MPM, OptimizationLevel Level) { + [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) { if (Level == OptimizationLevel::O0 && LangOpts.Sanitize.has(SanitizerKind::KCFI)) MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass())); @@ -662,8 +662,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { - auto SanitizersCallback = [&](ModulePassManager &MPM, -OptimizationLevel Level) { + auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level, +ThinOrFullLTOPhase) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); MPM.addPass(SanitizerCoveragePass( @@ -749,7 +749,7 @@ static void addSanitizers(const Triple &TargetTriple, PB.registerOptimizerEarlyEPCallback( [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) { ModulePassManager NewMPM; - SanitizersCallback(NewMPM, Level); + SanitizersCallback(NewMPM, Level, ThinOrFullLTOPhase::None); if (!NewMPM.isEmpty()) { // Sanitizers can abandon. NewMPM.addPass(RequireAnalysisPass()); @@ -1018,11 +1018,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // TODO: Consider passing the MemoryProfileOutput to the pass builder via // the PGOOptions, and set this up there. if (!CodeGenOpts.MemoryProfileOutput.empty()) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel Level) { -MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); -MPM.addPass(ModuleMemProfilerPass()); - }); + PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM, +OptimizationLevel Level, +ThinOrFullLTOPhase) { +MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); +MPM.addPass(ModuleMemProfilerPass()); + }); } if (CodeGenOpts.FatLTO) { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 474a19531ff5d..4c2763404ff05 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -497,7 +497,8 @@ class PassBuilder { /// This extension point allows adding optimizations at the very end of the /// function optimization pipeline. void registerOptimizerLastEPCallback( - const st
[llvm-branch-commits] [clang] [llvm] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point (PR #100953)
https://github.com/shiltian ready_for_review https://github.com/llvm/llvm-project/pull/100953 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point (PR #100953)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100953?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100953** https://app.graphite.dev/github/pr/llvm/llvm-project/100953?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100952** https://app.graphite.dev/github/pr/llvm/llvm-project/100952?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100953 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point (PR #100953)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) Changes These callbacks can be invoked in multiple places when building an optimization pipeline, both in compile time and link time. However, there is no indicator on what pipeline it is currently building. In this patch, an extra argument is added to indicate its (Thin)LTO stage such that the callback can check it if needed. There is no test expected from this, and the benefit of this change will be demonstrated in https://github.com/llvm/llvm-project/pull/66488. --- Full diff: https://github.com/llvm/llvm-project/pull/100953.diff 7 Files Affected: - (modified) clang/lib/CodeGen/BackendUtil.cpp (+10-9) - (modified) llvm/include/llvm/Passes/PassBuilder.h (+7-3) - (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7-5) - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+6-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+7-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+9-6) - (modified) llvm/tools/opt/NewPMDriver.cpp (+1-1) ``diff diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e765bbf637a66..64f0020a170aa 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -643,7 +643,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, // Ensure we lower KCFI operand bundles with -O0. PB.registerOptimizerLastEPCallback( - [&](ModulePassManager &MPM, OptimizationLevel Level) { + [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) { if (Level == OptimizationLevel::O0 && LangOpts.Sanitize.has(SanitizerKind::KCFI)) MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass())); @@ -662,8 +662,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts, static void addSanitizers(const Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts, PassBuilder &PB) { - auto SanitizersCallback = [&](ModulePassManager &MPM, -OptimizationLevel Level) { + auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level, +ThinOrFullLTOPhase) { if (CodeGenOpts.hasSanitizeCoverage()) { auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); MPM.addPass(SanitizerCoveragePass( @@ -749,7 +749,7 @@ static void addSanitizers(const Triple &TargetTriple, PB.registerOptimizerEarlyEPCallback( [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) { ModulePassManager NewMPM; - SanitizersCallback(NewMPM, Level); + SanitizersCallback(NewMPM, Level, ThinOrFullLTOPhase::None); if (!NewMPM.isEmpty()) { // Sanitizers can abandon. NewMPM.addPass(RequireAnalysisPass()); @@ -1018,11 +1018,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // TODO: Consider passing the MemoryProfileOutput to the pass builder via // the PGOOptions, and set this up there. if (!CodeGenOpts.MemoryProfileOutput.empty()) { - PB.registerOptimizerLastEPCallback( - [](ModulePassManager &MPM, OptimizationLevel Level) { -MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); -MPM.addPass(ModuleMemProfilerPass()); - }); + PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM, +OptimizationLevel Level, +ThinOrFullLTOPhase) { +MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); +MPM.addPass(ModuleMemProfilerPass()); + }); } if (CodeGenOpts.FatLTO) { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 474a19531ff5d..4c2763404ff05 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -497,7 +497,8 @@ class PassBuilder { /// This extension point allows adding optimizations at the very end of the /// function optimization pipeline. void registerOptimizerLastEPCallback( - const std::function &C) { + const std::function &C) { OptimizerLastEPCallbacks.push_back(C); } @@ -630,7 +631,8 @@ class PassBuilder { void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level); void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level); + OptimizationLevel Level, + ThinOrFullLTOPhase Phase); void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel L
[llvm-branch-commits] [llvm] [Attributor][AMDGPU] Improve the handling of indirect calls (PR #100954)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/100954 None >From 26e3c81b1488d32620f840d741966648e6d6c884 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sun, 28 Jul 2024 19:24:31 -0400 Subject: [PATCH] [Attributor][AMDGPU] Improve the handling of indirect calls --- llvm/include/llvm/Transforms/IPO/Attributor.h | 9 + llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp| 18 ++ llvm/lib/Transforms/IPO/Attributor.cpp | 2 +- .../Transforms/IPO/AttributorAttributes.cpp| 3 ++- .../AMDGPU/amdgpu-attributor-no-agpr.ll| 16 +++- 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 34557238ecb23..596ee39c35a37 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1448,7 +1448,7 @@ struct AttributorConfig { /// Callback function to determine if an indirect call targets should be made /// direct call targets (with an if-cascade). std::function + Function &AssummedCallee, bool IsSingleton)> IndirectCalleeSpecializationCallback = nullptr; /// Helper to update an underlying call graph and to delete functions. @@ -1718,10 +1718,11 @@ struct Attributor { /// Return true if we should specialize the call site \b CB for the potential /// callee \p Fn. bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA, - CallBase &CB, Function &Callee) { + CallBase &CB, Function &Callee, + bool IsSingleton) { return Configuration.IndirectCalleeSpecializationCallback - ? Configuration.IndirectCalleeSpecializationCallback(*this, AA, -CB, Callee) + ? Configuration.IndirectCalleeSpecializationCallback( + *this, AA, CB, Callee, IsSingleton) : true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index ab98da31b050f..b8ab11a7b420b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -14,6 +14,7 @@ #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CycleAnalysis.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -1041,11 +1042,28 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID}); + /// Helper to decide if we should specialize the indirect \p CB for \p Callee. + /// \p IsSingleton indicates whether the \p Callee is the only assumed callee. + auto IndirectCalleeSpecializationCallback = + [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB, + Function &Callee, bool IsSingleton) { +if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv())) + return false; +// Singleton functions should be specialized. +if (IsSingleton) + return true; +// Otherwise specialize uniform values. +const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller()); +return TTI.isAlwaysUniform(CB.getCalledOperand()); + }; + AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = HasWholeProgramVisibility; AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; + AC.IndirectCalleeSpecializationCallback = + IndirectCalleeSpecializationCallback; AC.IPOAmendableCB = [](const Function &F) { return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; }; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 910c0aeacc42e..879a26bcf328d 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, if (MaxSpecializationPerCB.getNumOccurrences()) { AC.IndirectCalleeSpecializationCallback = [&](Attributor &, const AbstractAttribute &AA, CallBase &CB, -Function &Callee) { +Function &Callee, bool IsSingleton) { if (MaxSpecializationPerCB == 0) return false; auto &Set = IndirectCalleeTrackingMap[&CB]; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2816a85743faa..3f02ea1cbd6cb 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12347,7 +12347,8 @@ struct AAIndi
[llvm-branch-commits] [llvm] [Attributor][AMDGPU] Improve the handling of indirect calls (PR #100954)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100954?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100954** https://app.graphite.dev/github/pr/llvm/llvm-project/100954?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100953** https://app.graphite.dev/github/pr/llvm/llvm-project/100953?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100952** https://app.graphite.dev/github/pr/llvm/llvm-project/100952?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100954 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [LLVM][PassBuilder] Extend the function signature of callback for optimizer pipeline extension point (PR #100953)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/100953 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Attributor][AMDGPU] Improve the handling of indirect calls (PR #100954)
ssahasra wrote: The apparent change here is to simply reverse the effect of #100952 on the lit test. Would be good to have a test which shows what the improvement is. Also, I think #100952 merely enables AAIndirectCallInfo, and feels like an integral part of this change itself. I would lean towards squashing it into this change. https://github.com/llvm/llvm-project/pull/100954 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits