[llvm-branch-commits] [clang] [llvm] [SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (PR #77196)

2024-01-06 Thread via llvm-branch-commits

https://github.com/koachan created 
https://github.com/llvm/llvm-project/pull/77196

On 64-bit target, prefer usng RDPC over CALL to get the value of %pc.
This is faster on modern processors (Niagara T1 and newer) and avoids polluting
the processor's predictor state.

The old behavior of using a fake CALL is still done when tuning for classic
UltraSPARC processors, since RDPC is much slower there.

A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads,
and about 7% speedup on INSERT/UPDATE loads.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (PR #77196)

2024-01-06 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-sparc

Author: Koakuma (koachan)


Changes

On 64-bit target, prefer usng RDPC over CALL to get the value of %pc.
This is faster on modern processors (Niagara T1 and newer) and avoids polluting
the processor's predictor state.

The old behavior of using a fake CALL is still done when tuning for classic
UltraSPARC processors, since RDPC is much slower there.

A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads,
and about 7% speedup on INSERT/UPDATE loads.


---
Full diff: https://github.com/llvm/llvm-project/pull/77196.diff


3 Files Affected:

- (modified) llvm/lib/Target/Sparc/Sparc.td (+14-4) 
- (modified) llvm/lib/Target/Sparc/SparcAsmPrinter.cpp (+19-2) 
- (added) llvm/test/CodeGen/SPARC/tune-getpcx.ll (+18) 


``diff
diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index 1a71cfed3128f0..7b103395652433 100644
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -62,6 +62,13 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true",
 def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
   "Use software emulation for floating point">;
 
+//===--===//
+// SPARC Subtarget tuning features.
+//
+
+def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
+"rd %pc, %XX is slow", [FeatureV9]>;
+
 // Features added predmoninantly for LEON subtarget support
 include "LeonFeatures.td"
 
@@ -89,8 +96,9 @@ def SparcAsmParserVariant : AsmParserVariant {
 // SPARC processors supported.
 
//===--===//
 
-class Proc Features>
- : Processor;
+class Proc Features,
+   list TuneFeatures = []>
+ : Processor;
 
 def : Proc<"generic", []>;
 def : Proc<"v7",  [FeatureSoftMulDiv, FeatureNoFSMULD]>;
@@ -118,9 +126,11 @@ def : Proc<"ma2480",  [FeatureLeon, LeonCASA]>;
 def : Proc<"ma2485",  [FeatureLeon, LeonCASA]>;
 def : Proc<"ma2x8x",  [FeatureLeon, LeonCASA]>;
 def : Proc<"v9",  [FeatureV9]>;
-def : Proc<"ultrasparc",  [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+def : Proc<"ultrasparc",  [FeatureV9, FeatureV8Deprecated, FeatureVIS],
+  [TuneSlowRDPC]>;
 def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
-   FeatureVIS2]>;
+   FeatureVIS2],
+  [TuneSlowRDPC]>;
 def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
FeatureVIS2]>;
 def : Proc<"niagara2",[FeatureV9, FeatureV8Deprecated, UsePopc,
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp 
b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index cca624e0926796..97abf10b18540d 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -13,6 +13,7 @@
 
 #include "MCTargetDesc/SparcInstPrinter.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "MCTargetDesc/SparcTargetStreamer.h"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
@@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer,
   OutStreamer.emitInstruction(CallInst, STI);
 }
 
+static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD,
+ const MCSubtargetInfo &STI) {
+  MCInst RDPCInst;
+  RDPCInst.setOpcode(SP::RDASR);
+  RDPCInst.addOperand(RD);
+  RDPCInst.addOperand(MCOperand::createReg(SP::ASR5));
+  OutStreamer.emitInstruction(RDPCInst, STI);
+}
+
 static void EmitSETHI(MCStreamer &OutStreamer,
   MCOperand &Imm, MCOperand &RD,
   const MCSubtargetInfo &STI)
@@ -234,8 +244,15 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const 
MachineInstr *MI,
   //   add , %o7, 
 
   OutStreamer->emitLabel(StartLabel);
-  MCOperand Callee =  createPCXCallOP(EndLabel, OutContext);
-  EmitCall(*OutStreamer, Callee, STI);
+  if (!STI.getTargetTriple().isSPARC64() ||
+  STI.hasFeature(Sparc::TuneSlowRDPC)) {
+MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
+EmitCall(*OutStreamer, Callee, STI);
+  } else {
+// TODO make it possible to store PC in other registers
+// so that leaf function optimization becomes possible.
+EmitRDPC(*OutStreamer, RegO7, STI);
+  }
   OutStreamer->emitLabel(SethiLabel);
   MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22,
GOTLabel, StartLabel, SethiLabel,
diff --git a/llvm/test/CodeGen/SPARC/tune-getpcx.ll 
b/llvm/test/CodeGen/SPARC/tune-getpcx.ll
new file mode 100644
index 00..7454fea0e38d57
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/tune-getpcx.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -relocation-model=p

[llvm-branch-commits] [llvm] [compiler-rt] [asan] isInterestingAlloca: remove the isAllocaPromotable condition (PR #77221)

2024-01-06 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay created 
https://github.com/llvm/llvm-project/pull/77221

Commit 8ed1d8196bef89c3099be4ce4aa65f613ab819cc made an AllocaInst
interesting only if
`!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`, which greatly
removed memory operand instrumention for -O0. However, this optimization
is subsumed by StackSafetyAnalysis and therefore unnecessary when we
enable StackSafetyAnalysis by default.

Actually, having the `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`
condition before `!(SSGI && SSGI->isSafe(AI)))` has an interesting false
positive involving MemIntrinsic (see `hoist-argument-init-insts.ll`):

* `isInterestingAlloca` is transitively called by
  `getInterestingMemoryOperands` and `FunctionStackPoisoner`.
* If `getInterestingMemoryOperands` never calls
  `StackSafetyGlobalInfo::getInfo`, and a MemIntrinsic is converted to
  `__asan_memcpy` by `instrumentMemIntrinsic`, when
  `StackSafetyGlobalInfo::getInfo` is called, StackSafetyAnalysis will
  consider `__asan_memcpy` as unsafe, leading to an unnecessary
  alloca instrumentation



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [compiler-rt] [asan] isInterestingAlloca: remove the isAllocaPromotable condition (PR #77221)

2024-01-06 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Fangrui Song (MaskRay)


Changes

Commit 8ed1d8196bef89c3099be4ce4aa65f613ab819cc made an AllocaInst
interesting only if
`!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`, which greatly
removed memory operand instrumention for -O0. However, this optimization
is subsumed by StackSafetyAnalysis and therefore unnecessary when we
enable StackSafetyAnalysis by default.

Actually, having the `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`
condition before `!(SSGI && SSGI->isSafe(AI)))` has an interesting 
false
positive involving MemIntrinsic (see `hoist-argument-init-insts.ll`):

* `isInterestingAlloca` is transitively called by
  `getInterestingMemoryOperands` and `FunctionStackPoisoner`.
* If `getInterestingMemoryOperands` never calls
  `StackSafetyGlobalInfo::getInfo`, and a MemIntrinsic is converted to
  `__asan_memcpy` by `instrumentMemIntrinsic`, when
  `StackSafetyGlobalInfo::getInfo` is called, StackSafetyAnalysis will
  consider `__asan_memcpy` as unsafe, leading to an unnecessary
  alloca instrumentation


---
Full diff: https://github.com/llvm/llvm-project/pull/77221.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+1-9) 
- (modified) 
llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll 
(+3-3) 
- (modified) 
llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll (+3-2) 
- (modified) llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll 
(+1-1) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 5e7e08eaa9978d..4dfb85b70d77d6 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -345,11 +345,6 @@ static cl::opt
cl::desc("instrument dynamic allocas"),
cl::Hidden, cl::init(true));
 
-static cl::opt ClSkipPromotableAllocas(
-"asan-skip-promotable-allocas",
-cl::desc("Do not instrument promotable allocas"), cl::Hidden,
-cl::init(true));
-
 static cl::opt ClConstructorKind(
 "asan-constructor-kind",
 cl::desc("Sets the ASan constructor kind"),
@@ -1278,9 +1273,6 @@ bool AddressSanitizer::isInterestingAlloca(const 
AllocaInst &AI) {
   (AI.getAllocatedType()->isSized() &&
// alloca() may be called with 0 size, ignore it.
((!AI.isStaticAlloca()) || !getAllocaSizeInBytes(AI).isZero()) &&
-   // We are only interested in allocas not promotable to registers.
-   // Promotable allocas are common under -O0.
-   (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
// inalloca allocas are not treated as static, and we don't want
// dynamic alloca instrumentation for them as well.
!AI.isUsedWithInAlloca() &&
@@ -1311,7 +1303,7 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, 
Value *Ptr) {
   // will not cause memory violations. This greatly speeds up the instrumented
   // executable at -O0.
   if (auto AI = dyn_cast_or_null(Ptr))
-if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI))
+if (!isInterestingAlloca(*AI))
   return true;
 
   if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) &&
diff --git 
a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
 
b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
index 6237673921978f..1f1049d6f625ef 100644
--- 
a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
+++ 
b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
@@ -10,19 +10,19 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define i32 @foo() sanitize_address {
 entry:
-  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  ; Won't be instrumented because of asan-use-safety-analysis.
   %non_instrumented1 = alloca i32, align 4
 
   ; Regular alloca, will get instrumented (forced by the ptrtoint below).
   %instrumented = alloca i32, align 4
 
-  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  ; Won't be instrumented because of asan-use-safety-analysis.
   %non_instrumented2 = alloca i32, align 4
 
   br label %bb0
 
 bb0:
-  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  ; Won't be instrumented because of asan-use-safety-analysis.
   %non_instrumented3 = alloca i32, align 4
 
   %ptr = ptrtoint ptr %instrumented to i32
diff --git 
a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll 
b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
index 5ecd4dc7fb9430..85bfe761aee6a9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -

[llvm-branch-commits] [llvm] [compiler-rt] [asan] isInterestingAlloca: remove the isAllocaPromotable condition (PR #77221)

2024-01-06 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Fangrui Song (MaskRay)


Changes

Commit 8ed1d8196bef89c3099be4ce4aa65f613ab819cc made an AllocaInst
interesting only if
`!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`, which greatly
removed memory operand instrumention for -O0. However, this optimization
is subsumed by StackSafetyAnalysis and therefore unnecessary when we
enable StackSafetyAnalysis by default.

Actually, having the `!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)`
condition before `!(SSGI && SSGI->isSafe(AI)))` has an interesting 
false
positive involving MemIntrinsic (see `hoist-argument-init-insts.ll`):

* `isInterestingAlloca` is transitively called by
  `getInterestingMemoryOperands` and `FunctionStackPoisoner`.
* If `getInterestingMemoryOperands` never calls
  `StackSafetyGlobalInfo::getInfo`, and a MemIntrinsic is converted to
  `__asan_memcpy` by `instrumentMemIntrinsic`, when
  `StackSafetyGlobalInfo::getInfo` is called, StackSafetyAnalysis will
  consider `__asan_memcpy` as unsafe, leading to an unnecessary
  alloca instrumentation


---
Full diff: https://github.com/llvm/llvm-project/pull/77221.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+1-9) 
- (modified) 
llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll 
(+3-3) 
- (modified) 
llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll (+3-2) 
- (modified) llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll 
(+1-1) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 5e7e08eaa9978d..4dfb85b70d77d6 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -345,11 +345,6 @@ static cl::opt
cl::desc("instrument dynamic allocas"),
cl::Hidden, cl::init(true));
 
-static cl::opt ClSkipPromotableAllocas(
-"asan-skip-promotable-allocas",
-cl::desc("Do not instrument promotable allocas"), cl::Hidden,
-cl::init(true));
-
 static cl::opt ClConstructorKind(
 "asan-constructor-kind",
 cl::desc("Sets the ASan constructor kind"),
@@ -1278,9 +1273,6 @@ bool AddressSanitizer::isInterestingAlloca(const 
AllocaInst &AI) {
   (AI.getAllocatedType()->isSized() &&
// alloca() may be called with 0 size, ignore it.
((!AI.isStaticAlloca()) || !getAllocaSizeInBytes(AI).isZero()) &&
-   // We are only interested in allocas not promotable to registers.
-   // Promotable allocas are common under -O0.
-   (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
// inalloca allocas are not treated as static, and we don't want
// dynamic alloca instrumentation for them as well.
!AI.isUsedWithInAlloca() &&
@@ -1311,7 +1303,7 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, 
Value *Ptr) {
   // will not cause memory violations. This greatly speeds up the instrumented
   // executable at -O0.
   if (auto AI = dyn_cast_or_null(Ptr))
-if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI))
+if (!isInterestingAlloca(*AI))
   return true;
 
   if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) &&
diff --git 
a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
 
b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
index 6237673921978f..1f1049d6f625ef 100644
--- 
a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
+++ 
b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
@@ -10,19 +10,19 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define i32 @foo() sanitize_address {
 entry:
-  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  ; Won't be instrumented because of asan-use-safety-analysis.
   %non_instrumented1 = alloca i32, align 4
 
   ; Regular alloca, will get instrumented (forced by the ptrtoint below).
   %instrumented = alloca i32, align 4
 
-  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  ; Won't be instrumented because of asan-use-safety-analysis.
   %non_instrumented2 = alloca i32, align 4
 
   br label %bb0
 
 bb0:
-  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  ; Won't be instrumented because of asan-use-safety-analysis.
   %non_instrumented3 = alloca i32, align 4
 
   %ptr = ptrtoint ptr %instrumented to i32
diff --git 
a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll 
b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
index 5ecd4dc7fb9430..85bfe761aee6a9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes