[llvm-branch-commits] [clang] [llvm] [SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (PR #77196)
https://github.com/koachan created https://github.com/llvm/llvm-project/pull/77196 On 64-bit target, prefer usng RDPC over CALL to get the value of %pc. This is faster on modern processors (Niagara T1 and newer) and avoids polluting the processor's predictor state. The old behavior of using a fake CALL is still done when tuning for classic UltraSPARC processors, since RDPC is much slower there. A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads, and about 7% speedup on INSERT/UPDATE loads. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (PR #77196)
llvmbot wrote: @llvm/pr-subscribers-backend-sparc Author: Koakuma (koachan) Changes On 64-bit target, prefer usng RDPC over CALL to get the value of %pc. This is faster on modern processors (Niagara T1 and newer) and avoids polluting the processor's predictor state. The old behavior of using a fake CALL is still done when tuning for classic UltraSPARC processors, since RDPC is much slower there. A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads, and about 7% speedup on INSERT/UPDATE loads. --- Full diff: https://github.com/llvm/llvm-project/pull/77196.diff 3 Files Affected: - (modified) llvm/lib/Target/Sparc/Sparc.td (+14-4) - (modified) llvm/lib/Target/Sparc/SparcAsmPrinter.cpp (+19-2) - (added) llvm/test/CodeGen/SPARC/tune-getpcx.ll (+18) ``diff diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 1a71cfed3128f0..7b103395652433 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -62,6 +62,13 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true", def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software emulation for floating point">; +//===--===// +// SPARC Subtarget tuning features. +// + +def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true", +"rd %pc, %XX is slow", [FeatureV9]>; + // Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" @@ -89,8 +96,9 @@ def SparcAsmParserVariant : AsmParserVariant { // SPARC processors supported. //===--===// -class Proc Features> - : Processor; +class Proc Features, + list TuneFeatures = []> + : Processor; def : Proc<"generic", []>; def : Proc<"v7", [FeatureSoftMulDiv, FeatureNoFSMULD]>; @@ -118,9 +126,11 @@ def : Proc<"ma2480", [FeatureLeon, LeonCASA]>; def : Proc<"ma2485", [FeatureLeon, LeonCASA]>; def : Proc<"ma2x8x", [FeatureLeon, LeonCASA]>; def : Proc<"v9", [FeatureV9]>; -def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; +def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS], + [TuneSlowRDPC]>; def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS, - FeatureVIS2]>; + FeatureVIS2], + [TuneSlowRDPC]>; def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2]>; def : Proc<"niagara2",[FeatureV9, FeatureV8Deprecated, UsePopc, diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index cca624e0926796..97abf10b18540d 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "MCTargetDesc/SparcTargetStreamer.h" #include "Sparc.h" #include "SparcInstrInfo.h" @@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer, OutStreamer.emitInstruction(CallInst, STI); } +static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst RDPCInst; + RDPCInst.setOpcode(SP::RDASR); + RDPCInst.addOperand(RD); + RDPCInst.addOperand(MCOperand::createReg(SP::ASR5)); + OutStreamer.emitInstruction(RDPCInst, STI); +} + static void EmitSETHI(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, const MCSubtargetInfo &STI) @@ -234,8 +244,15 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, // add , %o7, OutStreamer->emitLabel(StartLabel); - MCOperand Callee = createPCXCallOP(EndLabel, OutContext); - EmitCall(*OutStreamer, Callee, STI); + if (!STI.getTargetTriple().isSPARC64() || + STI.hasFeature(Sparc::TuneSlowRDPC)) { +MCOperand Callee = createPCXCallOP(EndLabel, OutContext); +EmitCall(*OutStreamer, Callee, STI); + } else { +// TODO make it possible to store PC in other registers +// so that leaf function optimization becomes possible. +EmitRDPC(*OutStreamer, RegO7, STI); + } OutStreamer->emitLabel(SethiLabel); MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22, GOTLabel, StartLabel, SethiLabel, diff --git a/llvm/test/CodeGen/SPARC/tune-getpcx.ll b/llvm/test/CodeGen/SPARC/tune-getpcx.ll new file mode 100644 index 00..7454fea0e38d57 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/tune-getpcx.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -relocation-model=p
[llvm-branch-commits] [llvm] [compiler-rt] [asan] isInterestingAlloca: remove the isAllocaPromotable condition (PR #77221)
https://github.com/MaskRay created https://github.com/llvm/llvm-project/pull/77221 Commit 8ed1d8196bef89c3099be4ce4aa65f613ab819cc made an AllocaInst interesting only if `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`, which greatly removed memory operand instrumention for -O0. However, this optimization is subsumed by StackSafetyAnalysis and therefore unnecessary when we enable StackSafetyAnalysis by default. Actually, having the `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)` condition before `!(SSGI && SSGI->isSafe(AI)))` has an interesting false positive involving MemIntrinsic (see `hoist-argument-init-insts.ll`): * `isInterestingAlloca` is transitively called by `getInterestingMemoryOperands` and `FunctionStackPoisoner`. * If `getInterestingMemoryOperands` never calls `StackSafetyGlobalInfo::getInfo`, and a MemIntrinsic is converted to `__asan_memcpy` by `instrumentMemIntrinsic`, when `StackSafetyGlobalInfo::getInfo` is called, StackSafetyAnalysis will consider `__asan_memcpy` as unsafe, leading to an unnecessary alloca instrumentation ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [compiler-rt] [asan] isInterestingAlloca: remove the isAllocaPromotable condition (PR #77221)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: Fangrui Song (MaskRay) Changes Commit 8ed1d8196bef89c3099be4ce4aa65f613ab819cc made an AllocaInst interesting only if `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`, which greatly removed memory operand instrumention for -O0. However, this optimization is subsumed by StackSafetyAnalysis and therefore unnecessary when we enable StackSafetyAnalysis by default. Actually, having the `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)` condition before `!(SSGI && SSGI->isSafe(AI)))` has an interesting false positive involving MemIntrinsic (see `hoist-argument-init-insts.ll`): * `isInterestingAlloca` is transitively called by `getInterestingMemoryOperands` and `FunctionStackPoisoner`. * If `getInterestingMemoryOperands` never calls `StackSafetyGlobalInfo::getInfo`, and a MemIntrinsic is converted to `__asan_memcpy` by `instrumentMemIntrinsic`, when `StackSafetyGlobalInfo::getInfo` is called, StackSafetyAnalysis will consider `__asan_memcpy` as unsafe, leading to an unnecessary alloca instrumentation --- Full diff: https://github.com/llvm/llvm-project/pull/77221.diff 4 Files Affected: - (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+1-9) - (modified) llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll (+3-3) - (modified) llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll (+3-2) - (modified) llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll (+1-1) ``diff diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 5e7e08eaa9978d..4dfb85b70d77d6 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -345,11 +345,6 @@ static cl::opt cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(true)); -static cl::opt ClSkipPromotableAllocas( -"asan-skip-promotable-allocas", -cl::desc("Do not instrument promotable allocas"), cl::Hidden, -cl::init(true)); - static cl::opt ClConstructorKind( "asan-constructor-kind", cl::desc("Sets the ASan constructor kind"), @@ -1278,9 +1273,6 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { (AI.getAllocatedType()->isSized() && // alloca() may be called with 0 size, ignore it. ((!AI.isStaticAlloca()) || !getAllocaSizeInBytes(AI).isZero()) && - // We are only interested in allocas not promotable to registers. - // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. !AI.isUsedWithInAlloca() && @@ -1311,7 +1303,7 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { // will not cause memory violations. This greatly speeds up the instrumented // executable at -O0. if (auto AI = dyn_cast_or_null(Ptr)) -if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI)) +if (!isInterestingAlloca(*AI)) return true; if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) && diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll index 6237673921978f..1f1049d6f625ef 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll @@ -10,19 +10,19 @@ target triple = "x86_64-apple-macosx10.10.0" define i32 @foo() sanitize_address { entry: - ; Won't be instrumented because of asan-skip-promotable-allocas. + ; Won't be instrumented because of asan-use-safety-analysis. %non_instrumented1 = alloca i32, align 4 ; Regular alloca, will get instrumented (forced by the ptrtoint below). %instrumented = alloca i32, align 4 - ; Won't be instrumented because of asan-skip-promotable-allocas. + ; Won't be instrumented because of asan-use-safety-analysis. %non_instrumented2 = alloca i32, align 4 br label %bb0 bb0: - ; Won't be instrumented because of asan-skip-promotable-allocas. + ; Won't be instrumented because of asan-use-safety-analysis. %non_instrumented3 = alloca i32, align 4 %ptr = ptrtoint ptr %instrumented to i32 diff --git a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll index 5ecd4dc7fb9430..85bfe761aee6a9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -
[llvm-branch-commits] [llvm] [compiler-rt] [asan] isInterestingAlloca: remove the isAllocaPromotable condition (PR #77221)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Fangrui Song (MaskRay) Changes Commit 8ed1d8196bef89c3099be4ce4aa65f613ab819cc made an AllocaInst interesting only if `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`, which greatly removed memory operand instrumention for -O0. However, this optimization is subsumed by StackSafetyAnalysis and therefore unnecessary when we enable StackSafetyAnalysis by default. Actually, having the `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)` condition before `!(SSGI && SSGI->isSafe(AI)))` has an interesting false positive involving MemIntrinsic (see `hoist-argument-init-insts.ll`): * `isInterestingAlloca` is transitively called by `getInterestingMemoryOperands` and `FunctionStackPoisoner`. * If `getInterestingMemoryOperands` never calls `StackSafetyGlobalInfo::getInfo`, and a MemIntrinsic is converted to `__asan_memcpy` by `instrumentMemIntrinsic`, when `StackSafetyGlobalInfo::getInfo` is called, StackSafetyAnalysis will consider `__asan_memcpy` as unsafe, leading to an unnecessary alloca instrumentation --- Full diff: https://github.com/llvm/llvm-project/pull/77221.diff 4 Files Affected: - (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+1-9) - (modified) llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll (+3-3) - (modified) llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll (+3-2) - (modified) llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll (+1-1) ``diff diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 5e7e08eaa9978d..4dfb85b70d77d6 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -345,11 +345,6 @@ static cl::opt cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(true)); -static cl::opt ClSkipPromotableAllocas( -"asan-skip-promotable-allocas", -cl::desc("Do not instrument promotable allocas"), cl::Hidden, -cl::init(true)); - static cl::opt ClConstructorKind( "asan-constructor-kind", cl::desc("Sets the ASan constructor kind"), @@ -1278,9 +1273,6 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { (AI.getAllocatedType()->isSized() && // alloca() may be called with 0 size, ignore it. ((!AI.isStaticAlloca()) || !getAllocaSizeInBytes(AI).isZero()) && - // We are only interested in allocas not promotable to registers. - // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. !AI.isUsedWithInAlloca() && @@ -1311,7 +1303,7 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { // will not cause memory violations. This greatly speeds up the instrumented // executable at -O0. if (auto AI = dyn_cast_or_null(Ptr)) -if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI)) +if (!isInterestingAlloca(*AI)) return true; if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) && diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll index 6237673921978f..1f1049d6f625ef 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll @@ -10,19 +10,19 @@ target triple = "x86_64-apple-macosx10.10.0" define i32 @foo() sanitize_address { entry: - ; Won't be instrumented because of asan-skip-promotable-allocas. + ; Won't be instrumented because of asan-use-safety-analysis. %non_instrumented1 = alloca i32, align 4 ; Regular alloca, will get instrumented (forced by the ptrtoint below). %instrumented = alloca i32, align 4 - ; Won't be instrumented because of asan-skip-promotable-allocas. + ; Won't be instrumented because of asan-use-safety-analysis. %non_instrumented2 = alloca i32, align 4 br label %bb0 bb0: - ; Won't be instrumented because of asan-skip-promotable-allocas. + ; Won't be instrumented because of asan-use-safety-analysis. %non_instrumented3 = alloca i32, align 4 %ptr = ptrtoint ptr %instrumented to i32 diff --git a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll index 5ecd4dc7fb9430..85bfe761aee6a9 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes