date:20240816

[llvm-branch-commits] [llvm] [WIP] AMDGPU: Handle v_add* in eliminateFrameIndex (PR #102346)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/102346
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [WIP] AMDGPU: Handle v_add* in eliminateFrameIndex (PR #102346)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102346

error: too big or took too long to generate
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64} builtins (PR #96874)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96874

>From e10cf564b3f902655d465a2a62f1d25d3ac82018 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:15:26 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw from flat_atomic_{f32|f64}
 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp | 17 ++---
 .../CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl |  6 --
 .../CodeGenOpenCL/builtins-fp-atomics-gfx940.cl |  3 ++-
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 77c652573cae42..0b6e4f55502655 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18922,10 +18922,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
 Intrinsic::ID IID;
 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
 switch (BuiltinID) {
@@ -18935,19 +18933,12 @@ Value 
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
   IID = Intrinsic::amdgcn_global_atomic_fmax;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   IID = Intrinsic::amdgcn_flat_atomic_fmin;
   break;
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
   IID = Intrinsic::amdgcn_flat_atomic_fmax;
   break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  ArgTy = llvm::Type::getFloatTy(getLLVMContext());
-  IID = Intrinsic::amdgcn_flat_atomic_fadd;
-  break;
 }
 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
@@ -19350,7 +19341,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19370,6 +19363,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index e6469c189ac91d..9381ce951df3e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -45,7 +45,8 @@ void test_global_max_f64(__global double *addr, double x){
 }
 
 // CHECK-LABEL: test_flat_add_local_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr 
addrspace(3) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} 
syncscope("agent") monotonic, align 8{{$}}
+
 // GFX90A-LABEL:  test_flat_add_local_f64$local
 // GFX90A:  ds_add_rtn_f64
 void test_flat_add_local_f64(__local double *addr, double x){
@@ -54,7 +55,8 @@ void test_flat_add_local_f64(__local double *addr, double x){
 }
 
 // CHECK-LABEL: test_flat_global_add_f64
-// CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_flat_global_add_f64$local
 // GFX90A:  global_atomic_add_f64
 void test_flat_global_add_f64(__global double *addr, double x){
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
index b07bdf524f230e..254e2814580ca6 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx940.cl
@@ -10,7 +10,8 @@ type

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16 builtins (PR #96875)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96875

>From f162998c081c4e6a6162fede2878d7a85add640e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 19:34:43 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for global/flat fadd v2bf16
 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 26 ++-
 .../builtins-fp-atomics-gfx12.cl  | 18 ++---
 .../builtins-fp-atomics-gfx940.cl | 10 +--
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0b6e4f55502655..18efc0de2b90d6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18946,22 +18946,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
 return Builder.CreateCall(F, {Addr, Val});
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
-Intrinsic::ID IID;
-switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
-  IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
-  break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
-  IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
-  break;
-}
-llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
-llvm::Value *Val = EmitScalarExpr(E->getArg(1));
-llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
-return Builder.CreateCall(F, {Addr, Val});
-  }
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19343,7 +19327,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19365,6 +19351,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19409,7 +19397,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   AO = AtomicOrdering::Monotonic;
 
   // The v2bf16 builtin uses i16 instead of a natural bfloat type.
-  if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
+  if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
+  BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
+  BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
 llvm::Type *V2BF16Ty = FixedVectorType::get(
 llvm::Type::getBFloatTy(Builder.getContext()), 2);
 Val = Builder.CreateBitCast(Val, V2BF16Ty);
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
index df88c707a19882..e8b6eb57c38d7a 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl
@@ -11,7 +11,7 @@ typedef short __attribute__((ext_vector_type(2))) short2;
 
 // CHECK-LABEL: test_local_add_2bf16
 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
-// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> 
[[BC0]] syncscope("agent") monotonic, align 4
+// CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x 
bfloat> [[BC0]] syncscope("agent") monotonic, align 4
 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
 
 // GFX12-LABEL:  test_local_add_2bf16
@@ -57,7 +57,10 @@ half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
 }
 
 // CHECK-LABEL: test_flat_add_2bf16
-// CHECK: call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %{{.*}}, 
<2 x i16> %{{.*}})
+// CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
+// CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x bfloat> [[BC]] 
syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+// CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
+
 // GFX12-LABEL:  test_flat_add_

[llvm-branch-commits] [clang] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max f64 builtins (PR #96876)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/96876

>From 7e0bed6a0511eeaaae318b9fe80bdd4ce06d527a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 26 Jun 2024 23:18:32 +0200
Subject: [PATCH] clang/AMDGPU: Emit atomicrmw for flat/global atomic min/max
 f64 builtins

---
 clang/lib/CodeGen/CGBuiltin.cpp   | 36 +--
 .../builtins-fp-atomics-gfx90a.cl | 18 ++
 2 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 18efc0de2b90d6..b2d9a34d27e558 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18920,32 +18920,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
   }
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
-Intrinsic::ID IID;
-llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
-switch (BuiltinID) {
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
-  IID = Intrinsic::amdgcn_global_atomic_fmin;
-  break;
-case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
-  IID = Intrinsic::amdgcn_global_atomic_fmax;
-  break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fmin;
-  break;
-case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
-  IID = Intrinsic::amdgcn_flat_atomic_fmax;
-  break;
-}
-llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
-llvm::Value *Val = EmitScalarExpr(E->getArg(1));
-llvm::Function *F =
-CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
-return Builder.CreateCall(F, {Addr, Val});
-  }
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
   case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19329,7 +19303,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
   case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
   case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
-  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+  case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
+  case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
 llvm::AtomicRMWInst::BinOp BinOp;
 switch (BuiltinID) {
 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19356,8 +19334,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   BinOp = llvm::AtomicRMWInst::FAdd;
   break;
 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
   BinOp = llvm::AtomicRMWInst::FMin;
   break;
+case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
   BinOp = llvm::AtomicRMWInst::FMax;
   break;
diff --git a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl 
b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
index 9381ce951df3e3..556e553903d1a5 100644
--- a/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
+++ b/clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx90a.cl
@@ -27,7 +27,8 @@ void test_global_add_half2(__global half2 *addr, half2 x) {
 }
 
 // CHECK-LABEL: test_global_global_min_f64
-// CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_global_global_min_f64$local
 // GFX90A:  global_atomic_min_f64
 void test_global_global_min_f64(__global double *addr, double x){
@@ -36,7 +37,8 @@ void test_global_global_min_f64(__global double *addr, double 
x){
 }
 
 // CHECK-LABEL: test_global_max_f64
-// CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr 
addrspace(1) %{{.*}}, double %{{.*}})
+// CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} 
syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory 
!{{[0-9]+$}}
+
 // GFX90A-LABEL:  test_global_max_f64$local
 // GFX90A:  global_atomic_max_f64
 void test_global_max_f64(__global double *addr, double x){
@@ -65,7 +67,8 @@ void test_flat_global_add_f64(__global double *addr,

[llvm-branch-commits] [llvm] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics (PR #97050)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/97050

>From 04528075a6cd460a4db7f1e6412ce54cca123468 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 27 Jun 2024 16:32:48 +0200
Subject: [PATCH] AMDGPU: Remove flat/global atomic fadd v2bf16 intrinsics

These are now fully covered by atomicrmw.
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td  |   4 -
 llvm/lib/IR/AutoUpgrade.cpp   |  14 +-
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |   2 -
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   2 -
 .../Target/AMDGPU/AMDGPUSearchableTables.td   |   2 -
 llvm/lib/Target/AMDGPU/FLATInstructions.td|   2 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |   6 +-
 llvm/test/Bitcode/amdgcn-atomic.ll|  22 ++
 .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll| 106 -
 .../test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll | 218 --
 llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll | 193 
 11 files changed, 33 insertions(+), 538 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8c25467cc5e4b1..e24571d8b184c8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2990,10 +2990,6 @@ multiclass AMDGPUMFp8SmfmacIntrinsic {
 def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic;
 }
 
-// bf16 atomics use v2i16 argument since there is no bf16 data type in the 
llvm.
-def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUAtomicRtn;
-def int_amdgcn_flat_atomic_fadd_v2bf16   : AMDGPUAtomicRtn;
-
 defset list AMDGPUMFMAIntrinsics940 = {
 def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic;
 def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index e24d119b781628..c6963edf5288ae 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1034,7 +1034,9 @@ static bool upgradeIntrinsicFunction1(Function *F, 
Function *&NewFn,
   }
 
   if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
-  Name.starts_with("ds.fmax")) {
+  Name.starts_with("ds.fmax") ||
+  Name.starts_with("global.atomic.fadd.v2bf16") ||
+  Name.starts_with("flat.atomic.fadd.v2bf16")) {
 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
 // declaration.
 NewFn = nullptr;
@@ -4042,7 +4044,9 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   .StartsWith("ds.fmin", AtomicRMWInst::FMin)
   .StartsWith("ds.fmax", AtomicRMWInst::FMax)
   .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
-  .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
+  .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
+  .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
+  .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd);
 
   unsigned NumOperands = CI->getNumOperands();
   if (NumOperands < 3) // Malformed bitcode.
@@ -4097,8 +4101,10 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, 
CallBase *CI,
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
   if (PtrTy->getAddressSpace() != 3) {
-RMW->setMetadata("amdgpu.no.fine.grained.memory",
- MDNode::get(F->getContext(), {}));
+MDNode *EmptyMD = MDNode::get(F->getContext(), {});
+RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
+if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
+  RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
   }
 
   if (IsVolatile)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index c6dbc58395e48f..db8b44149cf47e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -620,12 +620,10 @@ multiclass local_addr_space_atomic_op {
 
 defm int_amdgcn_flat_atomic_fadd : noret_op;
 defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
-defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_flat_atomic_fmin : noret_op;
 defm int_amdgcn_flat_atomic_fmax : noret_op;
 defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
 defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
-defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
 defm int_amdgcn_global_atomic_fmin : noret_op;
 defm int_amdgcn_global_atomic_fmax : noret_op;
 defm int_amdgcn_global_atomic_csub : noret_op;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 17067ddd93ff08..00878da1bfc68a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4907,8 +4907,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const 
MachineInstr &MI) const {
 case Intrinsic::amdgcn_flat_atomic_fmax:
 case I

[llvm-branch-commits] [clang] d39fff2 - Revert "[clang][driver] Fix -print-target-triple OS version for apple targets…"

2024-08-16 Thread via llvm-branch-commits


Author: Martin Storsjö
Date: 2024-08-16T11:34:18+03:00
New Revision: d39fff2a6c7c77d6046a075e6119a4126d955b1a

URL: 
https://github.com/llvm/llvm-project/commit/d39fff2a6c7c77d6046a075e6119a4126d955b1a
DIFF: 
https://github.com/llvm/llvm-project/commit/d39fff2a6c7c77d6046a075e6119a4126d955b1a.diff

LOG: Revert "[clang][driver] Fix -print-target-triple OS version for apple 
targets…"

This reverts commit 7227b44f928a87b5d7fb05bd1539fdfb6d4958dc.

Added: 


Modified: 
clang/lib/Driver/Driver.cpp

Removed: 
clang/test/Driver/darwin-print-target-triple.c



diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 5b95019c25cab6..e12416e51f8d24 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2271,7 +2271,8 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
 return false;
   }
 
-  auto initializeTargets = [&]() {
+  if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
+ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
 const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
 // The 'Darwin' toolchain is initialized only when its arguments are
 // computed. Get the default arguments for OFK_None to ensure that
@@ -2281,12 +2282,6 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
 // FIXME: For some more esoteric targets the default toolchain is not the
 //correct one.
 C.getArgsForToolChain(&TC, Triple.getArchName(), Action::OFK_None);
-return Triple;
-  };
-
-  if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
-ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
-const llvm::Triple Triple = initializeTargets();
 RegisterEffectiveTriple TripleRAII(TC, Triple);
 switch (RLT) {
 case ToolChain::RLT_CompilerRT:
@@ -2330,9 +2325,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
   }
 
   if (C.getArgs().hasArg(options::OPT_print_target_triple)) {
-initializeTargets();
-llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
-llvm::outs() << Triple.getTriple() << "\n";
+llvm::outs() << TC.getTripleString() << "\n";
 return false;
   }
 

diff  --git a/clang/test/Driver/darwin-print-target-triple.c 
b/clang/test/Driver/darwin-print-target-triple.c
deleted file mode 100644
index 4f5fdfe9d0db34..00
--- a/clang/test/Driver/darwin-print-target-triple.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// Test the output of -print-target-triple on Darwin.
-// See https://github.com/llvm/llvm-project/issues/61762
-
-//
-// All platforms
-//
-
-// RUN: %clang -print-target-triple \
-// RUN: --target=x86_64-apple-macos -mmacos-version-min=15 \
-// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-MACOS %s
-// CHECK-CLANGRT-MACOS: x86_64-apple-macosx15.0.0
-
-// RUN: %clang -print-target-triple \
-// RUN: --target=arm64-apple-ios -mios-version-min=9 \
-// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-IOS %s
-// CHECK-CLANGRT-IOS: arm64-apple-ios9.0.0
-
-// RUN: %clang -print-target-triple \
-// RUN: --target=arm64-apple-watchos -mwatchos-version-min=3 \
-// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS %s
-// CHECK-CLANGRT-WATCHOS: arm64-apple-watchos3.0.0
-
-// RUN: %clang -print-target-triple \
-// RUN: --target=armv7k-apple-watchos -mwatchos-version-min=3 \
-// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS-ARMV7K %s
-// CHECK-CLANGRT-WATCHOS-ARMV7K: thumbv7-apple-watchos3.0.0
-
-// RUN: %clang -print-target-triple \
-// RUN: --target=arm64-apple-tvos -mtvos-version-min=1\
-// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-TVOS %s
-// CHECK-CLANGRT-TVOS: arm64-apple-tvos1.0.0
-
-// RUN: %clang -print-target-triple \
-// RUN: --target=arm64-apple-driverkit \
-// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \
-// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-DRIVERKIT %s
-// CHECK-CLANGRT-DRIVERKIT: arm64-apple-driverkit19.0.0



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)

2024-08-16 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/102904

>From 3ffb03f8e4bcb2fa235ae989320c466af4a3cda8 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 12 Aug 2024 14:40:57 -0700
Subject: [PATCH] stable_sort

Created using spr 1.3.4
---
 llvm/lib/MC/MCPseudoProbe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 45fe95e176ff24..1c81630dda4dd9 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -638,7 +638,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   std::vector> SortedA2P(ProbeCount);
   for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
 SortedA2P[I] = {Probe.getAddress(), I};
-  llvm::sort(SortedA2P, llvm::less_first());
+  llvm::stable_sort(SortedA2P, llvm::less_first());
   Address2ProbesMap.reserve(ProbeCount);
   for (const uint32_t I : llvm::make_second_range(SortedA2P))
 Address2ProbesMap.emplace_back(PseudoProbeVec[I]);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)

2024-08-16 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/102904

>From 3ffb03f8e4bcb2fa235ae989320c466af4a3cda8 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 12 Aug 2024 14:40:57 -0700
Subject: [PATCH] stable_sort

Created using spr 1.3.4
---
 llvm/lib/MC/MCPseudoProbe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 45fe95e176ff24..1c81630dda4dd9 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -638,7 +638,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   std::vector> SortedA2P(ProbeCount);
   for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
 SortedA2P[I] = {Probe.getAddress(), I};
-  llvm::sort(SortedA2P, llvm::less_first());
+  llvm::stable_sort(SortedA2P, llvm::less_first());
   Address2ProbesMap.reserve(ProbeCount);
   for (const uint32_t I : llvm::make_second_range(SortedA2P))
 Address2ProbesMap.emplace_back(PseudoProbeVec[I]);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [MC][NFC] Use vector for GUIDProbeFunctionMap (PR #102905)

2024-08-16 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/102905


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [MC][NFC] Use vector for GUIDProbeFunctionMap (PR #102905)

2024-08-16 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/102905


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [MC][NFC] Use vector for GUIDProbeFunctionMap (PR #102905)

2024-08-16 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/102905

>From 284c701cc57a613d11130a349aba522397946f12 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 16 Aug 2024 03:30:54 -0700
Subject: [PATCH] Assert func desc size

Created using spr 1.3.4
---
 llvm/lib/MC/MCPseudoProbe.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 10def15275fac..90d7588407068 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -425,6 +425,8 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const 
uint8_t *Start,
 GUID2FuncDescMap.emplace_back(GUID, Hash, Name.copy(FuncNameAllocator));
   }
   assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
+  assert(GUID2FuncDescMap.size() == FuncDescCount &&
+ "Mismatching function description count pre- and post-parsing");
   llvm::sort(GUID2FuncDescMap, [](const auto &LHS, const auto &RHS) {
 return LHS.FuncGUID < RHS.FuncGUID;
   });

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AArch64: Use consistent atomicrmw expansion for FP operations (PR #103702)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/103702

>From 3c1ef5cfbcc6c14215681c1d6aff3c8e2486bdc1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 14 Aug 2024 00:43:03 +0400
Subject: [PATCH 1/3] AArch64: Use consistent atomicrmw expansion for FP
 operations

Use LLSC or cmpxchg in the same cases as for the unsupported
integer operations. This required some fixups to the LLSC
implementatation to deal with the fp128 case.
---
 .../Target/AArch64/AArch64ISelLowering.cpp|  30 +-
 llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll   | 392 
 llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll   | 418 +-
 llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll   | 418 +-
 llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll   | 392 
 5 files changed, 420 insertions(+), 1230 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 97fb2c5f552731..f059e79b9024a6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27067,9 +27067,6 @@ 
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
 
-  if (AI->isFloatingPointOperation())
-return AtomicExpansionKind::CmpXChg;
-
   bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
   (AI->getOperation() == AtomicRMWInst::Xchg ||
AI->getOperation() == AtomicRMWInst::Or ||
@@ -27079,7 +27076,8 @@ 
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
 
   // Nand is not supported in LSE.
   // Leave 128 bits to LLSC or CmpXChg.
-  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
+  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
+  !AI->isFloatingPointOperation()) {
 if (Subtarget->hasLSE())
   return AtomicExpansionKind::None;
 if (Subtarget->outlineAtomics()) {
@@ -27152,10 +27150,14 @@ Value 
*AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
 
 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
-Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
-Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
-return Builder.CreateOr(
-Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
+
+auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
+Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
+Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");
+
+Value *Or = Builder.CreateOr(
+Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
+return Builder.CreateBitCast(Or, ValueTy);
   }
 
   Type *Tys[] = { Addr->getType() };
@@ -27166,8 +27168,8 @@ Value 
*AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
   const DataLayout &DL = M->getDataLayout();
   IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
   CallInst *CI = Builder.CreateCall(Ldxr, Addr);
-  CI->addParamAttr(
-  0, Attribute::get(Builder.getContext(), Attribute::ElementType, 
ValueTy));
+  CI->addParamAttr(0, Attribute::get(Builder.getContext(),
+ Attribute::ElementType, IntEltTy));
   Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
 
   return Builder.CreateBitCast(Trunc, ValueTy);
@@ -27193,9 +27195,13 @@ Value 
*AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
 Function *Stxr = Intrinsic::getDeclaration(M, Int);
 Type *Int64Ty = Type::getInt64Ty(M->getContext());
+Type *Int128Ty = Type::getInt128Ty(M->getContext());
+
+Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
 
-Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
-Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, 
"hi");
+Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
+Value *Hi =
+Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
 return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
   }
 
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll 
b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 605c7d5e0a55ea..20a8a862506f70 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -6,33 +6,17 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, 
half %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
 ; NOLSE:   // %bb.0:
 ; NOLSE-NEXT:fcvt s1, h0
-; NOLSE-NEXT:ldr h0, [x0]
-; NOLSE-NEXT:b .LBB0_2
 ; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
-; NOLSE-NEXT:// in Loop: Header=BB0_2 Depth=1
-; NOLSE-NEXT:fmov s0, w10
-; NOLSE-NEXT:cmp w10, w9, uxth
-; NOLSE-NEXT:

[llvm-branch-commits] [AArch64][GlobalISel] Disable fixed-point iteration in all Combiners (PR #102167)

2024-08-16 Thread Tobias Stadler via llvm-branch-commits


https://github.com/tobias-stadler updated 
https://github.com/llvm/llvm-project/pull/102167


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AArch64][GlobalISel] Disable fixed-point iteration in all Combiners (PR #102167)

2024-08-16 Thread Tobias Stadler via llvm-branch-commits


https://github.com/tobias-stadler updated 
https://github.com/llvm/llvm-project/pull/102167


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][R600] Move createMachineFunctionInfo into R600 TM. (PR #104038)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/104038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][R600] Move createMachineFunctionInfo into R600 TM. (PR #104038)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm closed 
https://github.com/llvm/llvm-project/pull/104038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][R600] Move R600CodeGenPassBuilder into R600TargetMachine(NFC). (PR #103721)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/103721

>From a10910597e6ee30e87dd09a4f77fcfa1729873f0 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan 
Date: Wed, 14 Aug 2024 14:18:59 +0530
Subject: [PATCH 1/3] [AMDGPU][R600] Move R600TargetMachine into
 R600CodeGenPassBuilder(NFC).

---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   2 +-
 llvm/lib/Target/AMDGPU/CMakeLists.txt |   1 -
 .../Target/AMDGPU/R600CodeGenPassBuilder.cpp  | 149 -
 .../Target/AMDGPU/R600CodeGenPassBuilder.h|  38 -
 llvm/lib/Target/AMDGPU/R600ISelLowering.cpp   |   3 +-
 llvm/lib/Target/AMDGPU/R600TargetMachine.cpp  | 154 --
 6 files changed, 187 insertions(+), 160 deletions(-)
 delete mode 100644 llvm/lib/Target/AMDGPU/R600TargetMachine.cpp

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index bcedc3623d3ed7..13024f45151899 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -32,8 +32,8 @@
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
 #include "R600.h"
+#include "R600CodeGenPassBuilder.h"
 #include "R600MachineFunctionInfo.h"
-#include "R600TargetMachine.h"
 #include "SIFixSGPRCopies.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt 
b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index f493076f5bb8a3..16186f1f1bbed0 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -137,7 +137,6 @@ add_llvm_target(AMDGPUCodeGen
   R600Packetizer.cpp
   R600RegisterInfo.cpp
   R600Subtarget.cpp
-  R600TargetMachine.cpp
   R600TargetTransformInfo.cpp
   SIAnnotateControlFlow.cpp
   SIFixSGPRCopies.cpp
diff --git a/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp
index a57b3aa0adb158..1b182e17add9c0 100644
--- a/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp
@@ -5,12 +5,159 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===--===//
+//
+/// \file
+/// This file contains both AMDGPU-R600 target machine and the CodeGen pass
+/// builder. The target machine contains all of the hardware specific
+/// information needed to emit code for R600 GPUs and the CodeGen pass builder
+/// handles the same for new pass manager infrastructure.
+//
+//===--===//
 
 #include "R600CodeGenPassBuilder.h"
-#include "R600TargetMachine.h"
+#include "R600.h"
+#include "R600MachineScheduler.h"
+#include "R600TargetTransformInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include 
 
 using namespace llvm;
 
+static cl::opt
+EnableR600StructurizeCFG("r600-ir-structurize",
+ cl::desc("Use StructurizeCFG IR pass"),
+ cl::init(true));
+
+static cl::opt EnableR600IfConvert("r600-if-convert",
+ cl::desc("Use if conversion pass"),
+ cl::ReallyHidden, cl::init(true));
+
+static cl::opt EnableAMDGPUFunctionCallsOpt(
+"amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"),
+cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true),
+cl::Hidden);
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMILive(C, std::make_unique());
+}
+
+static MachineSchedRegistry R600SchedRegistry("r600",
+  "Run R600's custom scheduler",
+  createR600MachineScheduler);
+
+//===--===//
+// R600 Target Machine (R600 -> Cayman) - Legacy Pass Manager interface.
+//===--===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ std::optional RM,
+ std::optional CM,
+ CodeGenOptLevel OL, bool JIT)
+: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+  setRequiresStructuredCFG(true);
+
+  // Override the default since calls aren't supported for r600.
+  if (EnableFunctionCalls &&
+  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
+EnableFunctionCalls = false;
+}
+
+const TargetSubtargetInfo *
+R600TargetMachine::getSubtargetImpl(const Function &F) const {
+  StringRef GPU = getGPUName(F);
+  StringRef FS = getFeatureString(F);
+
+  SmallString<128> SubtargetKey(GPU);
+

[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)

2024-08-16 Thread Tulio Magno Quites Machado Filho via llvm-branch-commits


https://github.com/tuliom updated 
https://github.com/llvm/llvm-project/pull/103491

>From 92587cf8f51323cfa2cdd5c00f692119b5d6c288 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho 
Date: Tue, 13 Aug 2024 15:34:41 -0300
Subject: [PATCH] [OpenMP][AArch64] Fix branch protection in microtasks
 (#102317)

Start __kmp_invoke_microtask with PACBTI in order to identify the
function as a valid branch target. Before returning, SP is
authenticated.
Also add the BTI and PAC markers to z_Linux_asm.S.

With this patch, libomp.so can now be generated with DT_AARCH64_BTI_PLT
when built with -mbranch-protection=standard.

The implementation is based on the code available in compiler-rt.

(cherry picked from commit 0aa22dcd2f6ec5f46b8ef18fee88066463734935)
---
 openmp/runtime/src/z_Linux_asm.S | 53 
 1 file changed, 53 insertions(+)

diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index 5b614e26a8337e..223ad091030e77 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -176,6 +176,53 @@ KMP_PREFIX_UNDERSCORE(\proc):
 .endm
 # endif // KMP_OS_DARWIN
 
+# if KMP_OS_LINUX
+// BTI and PAC gnu property note
+#  define NT_GNU_PROPERTY_TYPE_0 5
+#  define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc000
+#  define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+#  define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+#  define GNU_PROPERTY(type, value)
\
+  .pushsection .note.gnu.property, "a";
\
+  .p2align 3;  
\
+  .word 4; 
\
+  .word 16;
\
+  .word NT_GNU_PROPERTY_TYPE_0;
\
+  .asciz "GNU";
\
+  .word type;  
\
+  .word 4; 
\
+  .word value; 
\
+  .word 0; 
\
+  .popsection
+# endif
+
+# if defined(__ARM_FEATURE_BTI_DEFAULT)
+#  define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+# else
+#  define BTI_FLAG 0
+# endif
+# if __ARM_FEATURE_PAC_DEFAULT & 3
+#  define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+# else
+#  define PAC_FLAG 0
+# endif
+
+# if (BTI_FLAG | PAC_FLAG) != 0
+#  if PAC_FLAG != 0
+#   define PACBTI_C hint #25
+#   define PACBTI_RET hint #29
+#  else
+#   define PACBTI_C hint #34
+#   define PACBTI_RET
+#  endif
+#  define GNU_PROPERTY_BTI_PAC \
+GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+# else
+#  define PACBTI_C
+#  define PACBTI_RET
+#  define GNU_PROPERTY_BTI_PAC
+# endif
 #endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && 
(KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
 
 .macro COMMON name, size, align_power
@@ -1296,6 +1343,7 @@ __tid = 8
 // mark_begin;
.text
PROC __kmp_invoke_microtask
+   PACBTI_C
 
stp x29, x30, [sp, #-16]!
 # if OMPT_SUPPORT
@@ -1359,6 +1407,7 @@ KMP_LABEL(kmp_1):
ldp x19, x20, [sp], #16
 # endif
ldp x29, x30, [sp], #16
+   PACBTI_RET
ret
 
DEBUG_INFO __kmp_invoke_microtask
@@ -2472,3 +2521,7 @@ __kmp_unnamed_critical_addr:
 .4byte .gomp_critical_user_
 .size __kmp_unnamed_critical_addr, 4
 #endif
+
+#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
+GNU_PROPERTY_BTI_PAC
+#endif

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)

2024-08-16 Thread Tulio Magno Quites Machado Filho via llvm-branch-commits


tuliom wrote:

@tru Could you take a look at this backport PR for `release/19.x`, please?

https://github.com/llvm/llvm-project/pull/103491
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-16 Thread via llvm-branch-commits


https://github.com/SundeepKushwaha updated 
https://github.com/llvm/llvm-project/pull/102179

>From 56ed15517a94f797a0a71029280c9cf0c10e4bf3 Mon Sep 17 00:00:00 2001
From: yandalur 
Date: Thu, 1 Aug 2024 21:37:23 +0530
Subject: [PATCH] [Hexagon] Do not optimize address of another function's block
 (#101209)

When the constant extender optimization pass encounters an instruction
that uses an extended address pointing to another function's block,
avoid adding the instruction to the extender list for the current
machine function.

Fixes https://github.com/llvm/llvm-project/issues/99714

(cherry picked from commit 68df06a0b2998765cb0a41353fcf0919bbf57ddb)
---
 .../Target/Hexagon/HexagonConstExtenders.cpp  |   4 +
 .../CodeGen/Hexagon/cext-opt-block-addr.mir   | 173 ++
 2 files changed, 177 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir

diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp 
b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index f0933765bbcbda..86ce6b4e05ed27 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned 
OpNum) {
   if (ER.Kind == MachineOperand::MO_GlobalAddress)
 if (ER.V.GV->getName().empty())
   return;
+  // Ignore block address that points to block in another function
+  if (ER.Kind == MachineOperand::MO_BlockAddress)
+if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction()))
+  return;
   Extenders.push_back(ED);
 }
 
diff --git a/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir 
b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
new file mode 100644
index 00..9f140132dcd6c3
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
@@ -0,0 +1,173 @@
+# REQUIRES: asserts
+# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s
+
+# Check that the HexagonConstantExtenders pass does not assert when block
+# addresses from different functions are used
+# CHECK-LABEL: name: wibble
+# CHECK: A2_tfrsi blockaddress(@baz
+# CHECK: A2_tfrsi blockaddress(@wibble
+
+--- |
+  target triple = "hexagon"
+
+  define dso_local void @baz() {
+  bb:
+br label %bb1
+
+  bb1:  ; preds = %bb
+%call = tail call fastcc i32 @wibble(i32 poison)
+ret void
+  }
+
+  define internal fastcc i32 @wibble(i32 %arg) {
+  bb:
+%call = tail call i32 @eggs(i32 noundef ptrtoint (ptr blockaddress(@baz, 
%bb1) to i32))
+br label %bb1
+
+  bb1:  ; preds = %bb
+tail call void @baz.1(i32 noundef ptrtoint (ptr blockaddress(@wibble, 
%bb1) to i32))
+ret i32 %call
+  }
+
+  declare i32 @eggs(i32 noundef) local_unnamed_addr
+
+  declare void @baz.1(i32 noundef) local_unnamed_addr
+
+...
+---
+name:baz
+alignment:   16
+exposesReturnsTwice: false
+legalized:   false
+regBankSelected: false
+selected:false
+failedISel:  false
+tracksRegLiveness: true
+hasWinCFI:   false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes: false
+hasEHFunclets:   false
+isOutlined:  false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+liveins: []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap: false
+  hasPatchPoint:   false
+  stackSize:   0
+  offsetAdjustment: 0
+  maxAlignment:1
+  adjustsStack:false
+  hasCalls:false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:  false
+  hasMustTailInVarArgFunc: false
+  hasTailCall: true
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:   ''
+  restorePoint:''
+fixedStack:  []
+stack:   []
+entry_values:[]
+callSites:   []
+debugValueSubstitutions: []
+constants:   []
+machineFunctionInfo: {}
+body: |
+  bb.0.bb:
+successors: %bb.1(0x8000)
+
+  bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+%0:intregs = IMPLICIT_DEF
+$r0 = COPY %0
+PS_tailcall_i @wibble, hexagoncsr, implicit $r0
+
+...
+---
+name:wibble
+alignment:   16
+exposesReturnsTwice: false
+legalized:   false
+regBankSelected: false
+selected:false
+failedISel:  false
+tracksRegLiveness: true
+hasWinCFI:   false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes: false
+hasEHFunclets:   false
+isOutlined:  false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+  - { id: 1, class: intregs, preferred-register: '' }
+  - { id: 2, class: intregs,

[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-16 Thread via llvm-branch-commits


SundeepKushwaha wrote:

LGTM. I don't think I have access to merge on 19.0 branch.

https://github.com/llvm/llvm-project/pull/102179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-16 Thread via llvm-branch-commits


https://github.com/SpencerAbson created 
https://github.com/llvm/llvm-project/pull/104602

The enablement of SVE/SME non-widening BFloat16 instructions was recently 
changed in response to an architecture update, in which:

-   FEAT_SVE_B16B16 was weakened
-   FEAT_SME_B16B16 was introduced 

New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the 
existing 'b16b16'. This was acheived in the below two patches.

- https://github.com/llvm/llvm-project/pull/101480
- https://github.com/llvm/llvm-project/pull/102501 

Ideally, the interface change introduced here will be valid in LLVM-19. We do 
not see it necessary to back-port the entire change, but just to add 
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also 
fixed in this change.

>From f04b2e8eea2f0d542cc6eea219dcf667cd4f3f7d Mon Sep 17 00:00:00 2001
From: Spencer Abson 
Date: Fri, 16 Aug 2024 14:39:43 +
Subject: [PATCH] [AArch64] Adopt updated B16B16 target flags

The enablement of SVE/SME non-widening BFloat16 instructions was recently
changed in response to an architecture update, in which:
- FEAT_SVE_B16B16 was weakened
- FEAT_SME_B16B16 was introduced
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the
existing 'b16b16'. This was acheived in the below two patches.
- https://github.com/llvm/llvm-project/pull/101480
- https://github.com/llvm/llvm-project/pull/102501
Ideally, the interface change introduced here will be valid in LLVM-19.
We do not see it necessary to back-port the entire change, but just to add
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged)
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also
fixed in this change.
---
 clang/include/clang/Basic/arm_sve.td  | 26 +++
 .../print-supported-extensions-aarch64.c  |  2 ++
 llvm/lib/Target/AArch64/AArch64Features.td|  9 +++
 .../TargetParser/TargetParserTest.cpp | 15 ++-
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 94c093d8911562..fb11d743fd6479 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", 
MergeNone, "aarch64_sve_
 multiclass MinMaxIntr {
   def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
   def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
-  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", 
 MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
 }
 
 multiclass SInstMinMaxByVector {
-  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
-  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
 
-  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
-  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
   def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
 }
 
+multiclass BfSingleMultiVector {
+  def NAME # _SINGLE_X2 : SInst<"sv" #

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-16 Thread via llvm-branch-commits


https://github.com/SpencerAbson milestoned 
https://github.com/llvm/llvm-project/pull/104602
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-16 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: None (SpencerAbson)


Changes

The enablement of SVE/SME non-widening BFloat16 instructions was recently 
changed in response to an architecture update, in which:

-   FEAT_SVE_B16B16 was weakened
-   FEAT_SME_B16B16 was introduced 

New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the 
existing 'b16b16'. This was acheived in the below two patches.

- https://github.com/llvm/llvm-project/pull/101480
- https://github.com/llvm/llvm-project/pull/102501 

Ideally, the interface change introduced here will be valid in LLVM-19. We do 
not see it necessary to back-port the entire change, but just to add 
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also 
fixed in this change.

---
Full diff: https://github.com/llvm/llvm-project/pull/104602.diff


4 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+21-5) 
- (modified) clang/test/Driver/print-supported-extensions-aarch64.c (+2) 
- (modified) llvm/lib/Target/AArch64/AArch64Features.td (+9) 
- (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+14-1) 


``diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 94c093d8911562..fb11d743fd6479 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", 
MergeNone, "aarch64_sve_
 multiclass MinMaxIntr {
   def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
   def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
-  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", 
 MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
 }
 
 multiclass SInstMinMaxByVector {
-  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
-  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
 
-  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
-  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
   def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
 }
 
+multiclass BfSingleMultiVector {
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>;
+
+  def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, 
"aarch64_sve_f" # name # "_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, 
"aarch64_sve_f" # name # "_x4", [IsStreaming], []>;
+}
+
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in {
   def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "b",  
MergeNone, "aarch64_sve_bfclamp_single_x2",  [IsStreaming], []>;
   def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "b",  
MergeNone, "aarch64_sve_bfclamp_single_x4",  [IsStreaming], []>;
+
+  // bfmin, bfmax (single, multi)
+  defm SVBFMIN : BfSingleMultiVector<"min">;
+  defm SVBF

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-16 Thread via llvm-branch-commits


https://github.com/SpencerAbson edited 
https://github.com/llvm/llvm-project/pull/104602
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-16 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/104603

Backport 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b

Requested by: @ldionne

>From 64b1a3eca8f783468d7ee5a38d48e649881b2de3 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Fri, 16 Aug 2024 11:08:34 -0400
Subject: [PATCH] [libc++] Fix rejects-valid in std::span copy construction
 (#104500)

Trying to copy-construct a std::span from another std::span holding an
incomplete type would fail as we evaluate the SFINAE for the range-based
constructor. The problem was that we checked for __is_std_span after
checking for the range being a contiguous_range, which hard-errored
because of arithmetic on a pointer to incomplete type.

As a drive-by, refactor the whole test and format it.

Fixes #104496

(cherry picked from commit 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b)
---
 libcxx/include/span   |   2 +-
 .../views/views.span/span.cons/copy.pass.cpp  | 126 --
 2 files changed, 86 insertions(+), 42 deletions(-)

diff --git a/libcxx/include/span b/libcxx/include/span
index 60d76d830f0f31..da631cdc3f90e6 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span
@@ -206,10 +206,10 @@ struct __is_std_span> : true_type {};
 
 template 
 concept __span_compatible_range =
+!__is_std_span>::value &&//
 ranges::contiguous_range<_Range> && //
 ranges::sized_range<_Range> &&  //
 (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && //
-!__is_std_span>::value &&//
 !__is_std_array>::value &&   //
 !is_array_v> &&  //
 is_convertible_v> 
(*)[], _ElementType (*)[]>;
diff --git 
a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp 
b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
index 28f13e122ddc5e..d3990fd60a459a 100644
--- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
+++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===--===//
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
 // 
@@ -14,58 +15,101 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_macros.h"
 
-template 
-constexpr bool doCopy(const T &rhs)
-{
-ASSERT_NOEXCEPT(T{rhs});
-T lhs{rhs};
-return lhs.data() == rhs.data()
- &&lhs.size() == rhs.size();
-}
+template 
+constexpr void test() {
+  ASSERT_NOEXCEPT(std::span(std::declval const&>()));
+  ASSERT_NOEXCEPT(std::span{std::declval const&>()});
 
-struct A{};
-
-template 
-void testCV ()
-{
-int  arr[] = {1,2,3};
-assert((doCopy(std::span  ()  )));
-assert((doCopy(std::span()  )));
-assert((doCopy(std::span  (&arr[0], 1;
-assert((doCopy(std::span(&arr[0], 1;
-assert((doCopy(std::span  (&arr[0], 2;
-assert((doCopy(std::span(&arr[0], 2;
+  // dynamic_extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array, 3);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[3] = {};
+std::span x(array, 2);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
+
+  // static extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[2] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
 }
 
+struct Foo {};
+
+constexpr bool test_all() {
+  test();
+  test();
+  test();
+  test();
 
-int main(int, char**)
-{
-constexpr int carr[] = {1,2,3};
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span<  int>  ()),"");
-static_assert(doCopy(std::span<  int,0>()),"");
-static_assert(doCopy(std::span  (&carr[0], 1)), "");
-static_assert(doCopy(std::span(&carr[0], 1)), "");
-static_assert(doCopy(std::span  (&carr[0], 2)), "");
-static_assert(doCopy(std::span(&carr[0], 2)), "");
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span()),   "");
-static_assert(doCopy(std::span()), "");
-static_assert(doCopy(std::span()),  "");
+  // Note: Can't test non-fundamental types with volatile because we require 
`T*` to be indirectly_readable,
+  //   which isn't the case when T is volatile.
+  test();
+  test();
 
-std::string s;
-assert(doCopy(std::span   () ))

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-16 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/104603
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-16 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-libcxx

Author: None (llvmbot)


Changes

Backport 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b

Requested by: @ldionne

---
Full diff: https://github.com/llvm/llvm-project/pull/104603.diff


2 Files Affected:

- (modified) libcxx/include/span (+1-1) 
- (modified) 
libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp (+85-41) 


``diff
diff --git a/libcxx/include/span b/libcxx/include/span
index 60d76d830f0f31..da631cdc3f90e6 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span
@@ -206,10 +206,10 @@ struct __is_std_span> : true_type {};
 
 template 
 concept __span_compatible_range =
+!__is_std_span>::value &&//
 ranges::contiguous_range<_Range> && //
 ranges::sized_range<_Range> &&  //
 (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && //
-!__is_std_span>::value &&//
 !__is_std_array>::value &&   //
 !is_array_v> &&  //
 is_convertible_v> 
(*)[], _ElementType (*)[]>;
diff --git 
a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp 
b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
index 28f13e122ddc5e..d3990fd60a459a 100644
--- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
+++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===--===//
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
 // 
@@ -14,58 +15,101 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_macros.h"
 
-template 
-constexpr bool doCopy(const T &rhs)
-{
-ASSERT_NOEXCEPT(T{rhs});
-T lhs{rhs};
-return lhs.data() == rhs.data()
- &&lhs.size() == rhs.size();
-}
+template 
+constexpr void test() {
+  ASSERT_NOEXCEPT(std::span(std::declval const&>()));
+  ASSERT_NOEXCEPT(std::span{std::declval const&>()});
 
-struct A{};
-
-template 
-void testCV ()
-{
-int  arr[] = {1,2,3};
-assert((doCopy(std::span  ()  )));
-assert((doCopy(std::span()  )));
-assert((doCopy(std::span  (&arr[0], 1;
-assert((doCopy(std::span(&arr[0], 1;
-assert((doCopy(std::span  (&arr[0], 2;
-assert((doCopy(std::span(&arr[0], 2;
+  // dynamic_extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array, 3);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[3] = {};
+std::span x(array, 2);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
+
+  // static extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[2] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
 }
 
+struct Foo {};
+
+constexpr bool test_all() {
+  test();
+  test();
+  test();
+  test();
 
-int main(int, char**)
-{
-constexpr int carr[] = {1,2,3};
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span<  int>  ()),"");
-static_assert(doCopy(std::span<  int,0>()),"");
-static_assert(doCopy(std::span  (&carr[0], 1)), "");
-static_assert(doCopy(std::span(&carr[0], 1)), "");
-static_assert(doCopy(std::span  (&carr[0], 2)), "");
-static_assert(doCopy(std::span(&carr[0], 2)), "");
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span()),   "");
-static_assert(doCopy(std::span()), "");
-static_assert(doCopy(std::span()),  "");
+  // Note: Can't test non-fundamental types with volatile because we require 
`T*` to be indirectly_readable,
+  //   which isn't the case when T is volatile.
+  test();
+  test();
 
-std::string s;
-assert(doCopy(std::span   () ));
-assert(doCopy(std::span() ));
-assert(doCopy(std::span   (&s, 1)));
-assert(doCopy(std::span(&s, 1)));
+  test();
+  test();
+
+  // Regression test for https://github.com/llvm/llvm-project/issues/104496
+  {
+struct Incomplete;
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+
+  return true;
+}
 
-testCV<   int>();
-testCV();
-testCV<  volatile int>();
-testCV();
+int main(int, char**) {
+  test_all();
+  static_assert(test_all());
 
   return 0;
 }

``




https://github.com/llvm/llvm-project/pul

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-16 Thread Louis Dionne via llvm-branch-commits


https://github.com/ldionne approved this pull request.


https://github.com/llvm/llvm-project/pull/104603
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)

2024-08-16 Thread Amy Kwan via llvm-branch-commits


amy-kwan wrote:

> > > So we should remove this tool from the 19.x release? Can someone confirm?
> > 
> > 
> > @kyulee-com @thevinster Are you two able to help confirm this?
> 
> Yeah. I think we should remove this from the release as it was reverted. We 
> plan to re-land it via #101461 once it gets approved.

Thanks for confirming! If we can get this PR reviewed so @tru can help merge 
this, that would be awesome.

https://github.com/llvm/llvm-project/pull/103886
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)

2024-08-16 Thread Kyungwoo Lee via llvm-branch-commits


https://github.com/kyulee-com approved this pull request.


https://github.com/llvm/llvm-project/pull/103886
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-16 Thread Sander de Smalen via llvm-branch-commits


https://github.com/sdesmalen-arm approved this pull request.

Thanks for making this change! The Clang/driver behaviour looks correct to me 
now. The patch is a lot smaller than the patches that went into main, so 
hopefully this can still make it into LLVM 19!

https://github.com/llvm/llvm-project/pull/104602
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-16 Thread Tobias Hieta via llvm-branch-commits


tru wrote:

No I just need you to approve it and I'll merge it!

https://github.com/llvm/llvm-project/pull/102179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)

2024-08-16 Thread Lei Wang via llvm-branch-commits



@@ -213,6 +208,31 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
  bool ShowName) const;
 };
 
+// Address to pseudo probes map.
+class AddressProbesMap
+: public std::vector> {
+  auto getIt(uint64_t Addr) const {
+auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
+  return Probe.getAddress() < Addr;
+};
+return llvm::lower_bound(*this, Addr, CompareProbe);
+  }
+
+public:
+  // Returns range of probes within [\p From, \p To) address range.
+  auto find(uint64_t From, uint64_t To) const {
+return llvm::make_range(getIt(From), getIt(To));
+  }
+  // Returns range of probes with given \p Address.
+  auto find(uint64_t Address) const {
+auto FromIt = getIt(Address);
+if (FromIt == end())

wlei-llvm wrote:

Should this be `if (FromIt->getAddress() != Address)`?, iiuc, it will return 
the first greater element iterator not always the `end()` if the key doesn't 
exist.

https://github.com/llvm/llvm-project/pull/102904
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [asan] Catch `initialization-order-fiasco` in mudules without globals (PR #104621)

2024-08-16 Thread Vitaly Buka via llvm-branch-commits


https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/104621

Thouse modules still can have global constructors and access
globals in other modules which are not initialized yet.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [asan] Catch `initialization-order-fiasco` in mudules without globals (PR #104621)

2024-08-16 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes

Thouse modules still can have global constructors and access
globals in other modules which are not initialized yet.


---
Full diff: https://github.com/llvm/llvm-project/pull/104621.diff


4 Files Affected:

- (modified) compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp 
(-3) 
- (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+3-11) 
- (modified) llvm/lib/Transforms/Instrumentation/CMakeLists.txt (+2) 
- (modified) 
llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
 (+2) 


``diff
diff --git a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp 
b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
index 8249abf804324a..a8243016bdcf66 100644
--- a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
+++ b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
@@ -1,9 +1,6 @@
 // RUN: %clangxx_asan %min_macos_deployment_target=10.11 -O0 %s 
%p/Helpers/initialization-bug-extra.cpp -o %t
 // RUN: %env_asan_opts=check_initialization_order=true:strict_init_order=true 
not %run %t 2>&1 | FileCheck %s
 
-// Not implemented.
-// XFAIL: * 
-
 // Do not test with optimization -- the error may be optimized away.
 
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=186
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d1bb1334aae6a3..34366b98aed7ae 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2531,15 +2531,10 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   SmallVector NewGlobals(n);
   SmallVector Initializers(n);
 
-  bool HasDynamicallyInitializedGlobals = false;
-
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
-  GlobalVariable *ModuleName =
-  n != 0
-  ? createPrivateGlobalForString(M, M.getModuleIdentifier(),
- /*AllowMerging*/ false, 
kAsanGenPrefix)
-  : nullptr;
+  GlobalVariable *ModuleName = createPrivateGlobalForString(
+  M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
 
   for (size_t i = 0; i < n; i++) {
 GlobalVariable *G = GlobalsToChange[i];
@@ -2646,9 +2641,6 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
 Constant::getNullValue(IntptrTy),
 ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
 
-if (ClInitializers && MD.IsDynInit)
-  HasDynamicallyInitializedGlobals = true;
-
 LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
 
 Initializers[i] = Initializer;
@@ -2688,7 +2680,7 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   }
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (HasDynamicallyInitializedGlobals)
+  if (ClInitializers)
 createInitializerPoisonCalls(M, ModuleName);
 
   LLVM_DEBUG(dbgs() << M);
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt 
b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index deab37801ff1df..99efa37d11572f 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -43,3 +43,5 @@ add_llvm_component_library(LLVMInstrumentation
   TransformUtils
   ProfileData
   )
+
+  set_property(TARGET LLVMInstrumentation APPEND_STRING PROPERTY COMPILE_FLAGS 
" -g -O0")
\ No newline at end of file
diff --git 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
index c8a6541bacfdfa..b6ab4aca547a4f 100644
--- 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
+++ 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
@@ -18,7 +18,9 @@ define internal void @__late_ctor() sanitize_address section 
".text.startup" {
 ; CHECK-LABEL: define internal void @__late_ctor(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:call void @__asan_before_dynamic_init(i64 ptrtoint (ptr 
@___asan_gen_ to i64))
 ; CHECK-NEXT:call void @initializer()
+; CHECK-NEXT:call void @__asan_after_dynamic_init()
 ; CHECK-NEXT:ret void
 ;
 ; NOINIT-LABEL: define internal void @__late_ctor(

``




https://github.com/llvm/llvm-project/pull/104621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [asan] Catch `initialization-order-fiasco` in mudules without globals (PR #104621)

2024-08-16 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Vitaly Buka (vitalybuka)


Changes

Thouse modules still can have global constructors and access
globals in other modules which are not initialized yet.


---
Full diff: https://github.com/llvm/llvm-project/pull/104621.diff


4 Files Affected:

- (modified) compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp 
(-3) 
- (modified) llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp (+3-11) 
- (modified) llvm/lib/Transforms/Instrumentation/CMakeLists.txt (+2) 
- (modified) 
llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
 (+2) 


``diff
diff --git a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp 
b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
index 8249abf804324a..a8243016bdcf66 100644
--- a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
+++ b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
@@ -1,9 +1,6 @@
 // RUN: %clangxx_asan %min_macos_deployment_target=10.11 -O0 %s 
%p/Helpers/initialization-bug-extra.cpp -o %t
 // RUN: %env_asan_opts=check_initialization_order=true:strict_init_order=true 
not %run %t 2>&1 | FileCheck %s
 
-// Not implemented.
-// XFAIL: * 
-
 // Do not test with optimization -- the error may be optimized away.
 
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=186
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d1bb1334aae6a3..34366b98aed7ae 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2531,15 +2531,10 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   SmallVector NewGlobals(n);
   SmallVector Initializers(n);
 
-  bool HasDynamicallyInitializedGlobals = false;
-
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
-  GlobalVariable *ModuleName =
-  n != 0
-  ? createPrivateGlobalForString(M, M.getModuleIdentifier(),
- /*AllowMerging*/ false, 
kAsanGenPrefix)
-  : nullptr;
+  GlobalVariable *ModuleName = createPrivateGlobalForString(
+  M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
 
   for (size_t i = 0; i < n; i++) {
 GlobalVariable *G = GlobalsToChange[i];
@@ -2646,9 +2641,6 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
 Constant::getNullValue(IntptrTy),
 ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
 
-if (ClInitializers && MD.IsDynInit)
-  HasDynamicallyInitializedGlobals = true;
-
 LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
 
 Initializers[i] = Initializer;
@@ -2688,7 +2680,7 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   }
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (HasDynamicallyInitializedGlobals)
+  if (ClInitializers)
 createInitializerPoisonCalls(M, ModuleName);
 
   LLVM_DEBUG(dbgs() << M);
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt 
b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index deab37801ff1df..99efa37d11572f 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -43,3 +43,5 @@ add_llvm_component_library(LLVMInstrumentation
   TransformUtils
   ProfileData
   )
+
+  set_property(TARGET LLVMInstrumentation APPEND_STRING PROPERTY COMPILE_FLAGS 
" -g -O0")
\ No newline at end of file
diff --git 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
index c8a6541bacfdfa..b6ab4aca547a4f 100644
--- 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
+++ 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
@@ -18,7 +18,9 @@ define internal void @__late_ctor() sanitize_address section 
".text.startup" {
 ; CHECK-LABEL: define internal void @__late_ctor(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:call void @__asan_before_dynamic_init(i64 ptrtoint (ptr 
@___asan_gen_ to i64))
 ; CHECK-NEXT:call void @initializer()
+; CHECK-NEXT:call void @__asan_after_dynamic_init()
 ; CHECK-NEXT:ret void
 ;
 ; NOINIT-LABEL: define internal void @__late_ctor(

``




https://github.com/llvm/llvm-project/pull/104621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-16 Thread via llvm-branch-commits


https://github.com/SpencerAbson updated 
https://github.com/llvm/llvm-project/pull/104602

>From f04b2e8eea2f0d542cc6eea219dcf667cd4f3f7d Mon Sep 17 00:00:00 2001
From: Spencer Abson 
Date: Fri, 16 Aug 2024 14:39:43 +
Subject: [PATCH 1/2] [AArch64] Adopt updated B16B16 target flags

The enablement of SVE/SME non-widening BFloat16 instructions was recently
changed in response to an architecture update, in which:
- FEAT_SVE_B16B16 was weakened
- FEAT_SME_B16B16 was introduced
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the
existing 'b16b16'. This was acheived in the below two patches.
- https://github.com/llvm/llvm-project/pull/101480
- https://github.com/llvm/llvm-project/pull/102501
Ideally, the interface change introduced here will be valid in LLVM-19.
We do not see it necessary to back-port the entire change, but just to add
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged)
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also
fixed in this change.
---
 clang/include/clang/Basic/arm_sve.td  | 26 +++
 .../print-supported-extensions-aarch64.c  |  2 ++
 llvm/lib/Target/AArch64/AArch64Features.td|  9 +++
 .../TargetParser/TargetParserTest.cpp | 15 ++-
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 94c093d8911562..fb11d743fd6479 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", 
MergeNone, "aarch64_sve_
 multiclass MinMaxIntr {
   def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
   def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
-  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", 
 MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
 }
 
 multiclass SInstMinMaxByVector {
-  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
-  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
 
-  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
-  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
   def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
 }
 
+multiclass BfSingleMultiVector {
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>;
+
+  def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, 
"aarch64_sve_f" # name # "_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, 
"aarch64_sve_f" # name # "_x4", [IsStreaming], []>;
+}
+
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in {
   def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "b",  
MergeNone, "aarch64_sve_bfclamp_single_x2",  [IsStreaming], []>;
   def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "b",  
MergeNone, "aarch64_sve_bfc

[llvm-branch-commits] [llvm] [MC][NFC] Reduce Address2ProbesMap size (PR #102904)

2024-08-16 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/102904

>From 3ffb03f8e4bcb2fa235ae989320c466af4a3cda8 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 12 Aug 2024 14:40:57 -0700
Subject: [PATCH 1/2] stable_sort

Created using spr 1.3.4
---
 llvm/lib/MC/MCPseudoProbe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 45fe95e176ff24..1c81630dda4dd9 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -638,7 +638,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
   std::vector> SortedA2P(ProbeCount);
   for (const auto &[I, Probe] : llvm::enumerate(PseudoProbeVec))
 SortedA2P[I] = {Probe.getAddress(), I};
-  llvm::sort(SortedA2P, llvm::less_first());
+  llvm::stable_sort(SortedA2P, llvm::less_first());
   Address2ProbesMap.reserve(ProbeCount);
   for (const uint32_t I : llvm::make_second_range(SortedA2P))
 Address2ProbesMap.emplace_back(PseudoProbeVec[I]);

>From 445b646bbf9718f98689cae92d9bc71366d80e31 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 16 Aug 2024 10:34:40 -0700
Subject: [PATCH 2/2] Check find addr

Created using spr 1.3.4
---
 llvm/include/llvm/MC/MCPseudoProbe.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h 
b/llvm/include/llvm/MC/MCPseudoProbe.h
index edac9b94e28f44..559e8ff9055df9 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -228,6 +228,8 @@ class AddressProbesMap
 auto FromIt = getIt(Address);
 if (FromIt == end())
   return llvm::make_range(end(), end());
+if (FromIt->get().getAddress() != Address)
+  return llvm::make_range(end(), end());
 auto ToIt = getIt(Address + 1);
 return llvm::make_range(FromIt, ToIt);
   }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [asan] Catch `initialization-order-fiasco` in mudules without globals (PR #104621)

2024-08-16 Thread Florian Mayer via llvm-branch-commits


https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/104621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [asan] Catch `initialization-order-fiasco` in mudules without globals (PR #104621)

2024-08-16 Thread Florian Mayer via llvm-branch-commits



@@ -43,3 +43,5 @@ add_llvm_component_library(LLVMInstrumentation
   TransformUtils
   ProfileData
   )
+
+  set_property(TARGET LLVMInstrumentation APPEND_STRING PROPERTY COMPILE_FLAGS 
" -g -O0")

fmayer wrote:

nit: missing EOL

https://github.com/llvm/llvm-project/pull/104621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-16 Thread via llvm-branch-commits


https://github.com/SundeepKushwaha approved this pull request.

lgmt

https://github.com/llvm/llvm-project/pull/102179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-16 Thread via llvm-branch-commits


SundeepKushwaha wrote:

Thanks. Please let me know if you need any other info.

https://github.com/llvm/llvm-project/pull/102179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [asan] Catch `initialization-order-fiasco` in mudules without globals (PR #104621)

2024-08-16 Thread Vitaly Buka via llvm-branch-commits



@@ -43,3 +43,5 @@ add_llvm_component_library(LLVMInstrumentation
   TransformUtils
   ProfileData
   )
+
+  set_property(TARGET LLVMInstrumentation APPEND_STRING PROPERTY COMPILE_FLAGS 
" -g -O0")

vitalybuka wrote:

sorry, this is debug change is not intended for commit

https://github.com/llvm/llvm-project/pull/104621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU][R600] Move R600CodeGenPassBuilder into R600TargetMachine(NFC). (PR #103721)

2024-08-16 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/103721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] 829226d - Revert "[libc] Disable old headergen checks unless enabled (#104522)"

2024-08-16 Thread via llvm-branch-commits


Author: Michael Jones
Date: 2024-08-16T11:22:07-07:00
New Revision: 829226dfb39409a279707e0e492cf4379d0c7eab

URL: 
https://github.com/llvm/llvm-project/commit/829226dfb39409a279707e0e492cf4379d0c7eab
DIFF: 
https://github.com/llvm/llvm-project/commit/829226dfb39409a279707e0e492cf4379d0c7eab.diff

LOG: Revert "[libc] Disable old headergen checks unless enabled (#104522)"

This reverts commit f668708796b981733a5816f2efed0d5195af923d.

Added: 


Modified: 
libc/CMakeLists.txt
libc/include/CMakeLists.txt
llvm/runtimes/CMakeLists.txt

Removed: 




diff  --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index 133686acf6fa62..dd45d6cc8cb6ab 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -50,32 +50,31 @@ set(LIBC_NAMESPACE ${default_namespace}
   CACHE STRING "The namespace to use to enclose internal implementations. Must 
start with '__llvm_libc'."
 )
 
-option(LIBC_USE_NEW_HEADER_GEN "Generate header files using new headergen 
instead of the old one" ON)
 
-if(LIBC_USE_NEW_HEADER_GEN)
-  add_subdirectory(newhdrgen)
-else()
-  if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
-if(NOT LIBC_HDRGEN_EXE)
-  # We need to set up hdrgen first since other targets depend on it.
-  add_subdirectory(utils/LibcTableGenUtil)
-  add_subdirectory(utils/HdrGen)
-  # Calling add_tablegen sets variables like LIBC_TABLEGEN_EXE in
-  # PARENT_SCOPE which get lost until saved in the cache.
-  set(LIBC_TABLEGEN_EXE "${LIBC_TABLEGEN_EXE}" CACHE INTERNAL "")
-  set(LIBC_TABLEGEN_TARGET "${LIBC_TABLEGEN_TARGET}" CACHE INTERNAL "")
-else()
-  message(STATUS "Will use ${LIBC_HDRGEN_EXE} for libc header generation.")
-endif()
+add_subdirectory(newhdrgen)
+
+
+if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
+  if(NOT LIBC_HDRGEN_EXE)
+# We need to set up hdrgen first since other targets depend on it.
+add_subdirectory(utils/LibcTableGenUtil)
+add_subdirectory(utils/HdrGen)
+# Calling add_tablegen sets variables like LIBC_TABLEGEN_EXE in
+# PARENT_SCOPE which get lost until saved in the cache.
+set(LIBC_TABLEGEN_EXE "${LIBC_TABLEGEN_EXE}" CACHE INTERNAL "")
+set(LIBC_TABLEGEN_TARGET "${LIBC_TABLEGEN_TARGET}" CACHE INTERNAL "")
+  else()
+message(STATUS "Will use ${LIBC_HDRGEN_EXE} for libc header generation.")
   endif()
 endif()
-
 # We will build the GPU utilities if we are not doing a runtimes build.
 option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
 if(LIBC_BUILD_GPU_LOADER OR (LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD))
   add_subdirectory(utils/gpu)
 endif()
 
+option(LIBC_USE_NEW_HEADER_GEN "Generate header files using new headergen 
instead of the old one" ON)
+
 set(NEED_LIBC_HDRGEN FALSE)
 if(NOT LLVM_RUNTIMES_BUILD)
   if("libc" IN_LIST LLVM_ENABLE_RUNTIMES)

diff  --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 4e3ae7f801f4a0..2b6eb61782a632 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -420,21 +420,19 @@ add_header_macro(
 .llvm-libc-types.posix_spawn_file_actions_t
 )
 
-add_header_macro(
+add_gen_header(
   link
-  ../libc/newhdrgen/yaml/link.yaml
-  link.h.def
-  link.h
+  DEF_FILE link.h.def
+  GEN_HDR link.h
   DEPENDS
 .llvm_libc_common_h
 .llvm-libc-macros.link_macros
 )
 
-add_header_macro(
+add_gen_header(
   elf
-  ../libc/newhdrgen/yaml/elf.yaml
-  elf.h.def
-  elf.h
+  DEF_FILE elf.h.def
+  GEN_HDR elf.h
   DEPENDS
 .llvm-libc-macros.elf_macros
 )

diff  --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 76202b2cfe0421..187c44fb9d04d2 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -521,8 +521,7 @@ if(build_runtimes)
 endforeach()
   endif()
   if("libc" IN_LIST LLVM_ENABLE_PROJECTS AND
-  (LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD) AND 
-  (NOT LIBC_USE_NEW_HEADER_GEN))
+  (LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD))
 if(LIBC_HDRGEN_EXE)
   set(hdrgen_exe ${LIBC_HDRGEN_EXE})
 else()



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] cf8d13e - Revert "[flang][cuda][driver] Make sure flang does not switch to cc1 (#104613)"

2024-08-16 Thread via llvm-branch-commits


Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-08-16T12:02:25-07:00
New Revision: cf8d13e27b647afaca66457d93b45a7da0e9cdfb

URL: 
https://github.com/llvm/llvm-project/commit/cf8d13e27b647afaca66457d93b45a7da0e9cdfb
DIFF: 
https://github.com/llvm/llvm-project/commit/cf8d13e27b647afaca66457d93b45a7da0e9cdfb.diff

LOG: Revert "[flang][cuda][driver] Make sure flang does not switch to cc1 
(#104613)"

This reverts commit e6b9f12b0ac0f1e6f7f7145719092c10731e4fe4.

Added: 


Modified: 
clang/lib/Driver/Types.cpp
flang/test/Driver/cuda-option.f90

Removed: 




diff  --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index 3de45b00b4d00f..2b9b391c19c9fd 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -170,9 +170,6 @@ bool types::isAcceptedByFlang(ID Id) {
   case TY_LLVM_IR:
   case TY_LLVM_BC:
 return true;
-  case TY_PP_CUDA:
-  case TY_CUDA:
-return true;
   }
 }
 

diff  --git a/flang/test/Driver/cuda-option.f90 
b/flang/test/Driver/cuda-option.f90
index 591d7d070d3a85..562f8683b0ff7d 100644
--- a/flang/test/Driver/cuda-option.f90
+++ b/flang/test/Driver/cuda-option.f90
@@ -1,6 +1,5 @@
 ! Test -fcuda option
 ! RUN: %flang_fc1 -cpp -x cuda -fdebug-unparse %s -o - | FileCheck %s
-! RUN: not %flang -cpp -x cuda %s -o - 2>&1 | FileCheck %s 
--check-prefix=MLIRERROR
 ! RUN: not %flang_fc1 -cpp %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
 program main
 #if _CUDA
@@ -13,8 +12,4 @@ program main
 ! CHECK: INTEGER :: var = 1
 ! CHECK: INTEGER, DEVICE :: dvar
 
-! ERROR: cuda-option.f90:{{.*}}:{{.*}}: error: expected end of statement
-
-! The whole pipeline is not in place yet. It will currently fail at MLIR
-! translation level.
-! MLIRERROR: failed to legalize operation 'cuf.alloc'
+! ERROR: cuda-option.f90:8:19: error: expected end of statement



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)

2024-08-16 Thread Justin Bogner via llvm-branch-commits



@@ -13,27 +13,52 @@
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/DXILResource.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/TargetParser/Triple.h"
 
 using namespace llvm;
 using namespace llvm::dxil;
 
-static void emitResourceMetadata(Module &M,
+static void emitResourceMetadata(Module &M, const DXILResourceMap &DRM,
  const dxil::Resources &MDResources) {
-  Metadata *SRVMD = nullptr, *UAVMD = nullptr, *CBufMD = nullptr,
-   *SmpMD = nullptr;
-  bool HasResources = false;
+  LLVMContext &Context = M.getContext();
+
+  SmallVector SRVs, UAVs, CBufs, Smps;
+  for (auto [_, RI] : DRM) {
+switch (RI.getResourceClass()) {
+case dxil::ResourceClass::SRV:
+  SRVs.push_back(RI.getAsMetadata(Context));
+  break;
+case dxil::ResourceClass::UAV:
+  UAVs.push_back(RI.getAsMetadata(Context));
+  break;
+case dxil::ResourceClass::CBuffer:
+  CBufs.push_back(RI.getAsMetadata(Context));
+  break;
+case dxil::ResourceClass::Sampler:
+  Smps.push_back(RI.getAsMetadata(Context));
+  break;
+}
+  }
+  Metadata *SRVMD = SRVs.empty() ? nullptr : MDNode::get(Context, SRVs);
+  Metadata *UAVMD = UAVs.empty() ? nullptr : MDNode::get(Context, UAVs);
+  Metadata *CBufMD = CBufs.empty() ? nullptr : MDNode::get(Context, CBufs);
+  Metadata *SmpMD = Smps.empty() ? nullptr : MDNode::get(Context, Smps);
+  bool HasResources = !DRM.empty();
 
   if (MDResources.hasUAVs()) {
+assert(!UAVMD && "Old and new UAV representations can't coexist");

bogner wrote:

Yes. Once the frontend starts generating target extension types it will no 
longer generate the uav metadata node and we can remove all of that code. 
Similarly for the cbufs.

https://github.com/llvm/llvm-project/pull/104447
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [ctx_prof] Profile flatterner (PR #104539)

2024-08-16 Thread Mircea Trofin via llvm-branch-commits


https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/104539
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Differentiate between 0/1 overloads in the OpBuilder. NFC (PR #104246)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104246
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Disentangle DXIL.td's op types from LLVMType. NFC (PR #104247)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104247
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Encapsulate DXILOpLowering's state into a class. NFC (PR #104248)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104248
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Register a few DXIL passes with the new PM (PR #104250)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits



@@ -46,14 +46,14 @@ define void @test_typedbuffer() {
   ; Buffer Buf[24] : register(t3, space5)
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
   @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0t(
-  i32 2, i32 7, i32 24, i32 0, i1 false)
+  i32 5, i32 3, i32 24, i32 0, i1 false)

damyanp wrote:

What happened here?

https://github.com/llvm/llvm-project/pull/104250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Register a few DXIL passes with the new PM (PR #104250)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104250
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Lower `@llvm.dx.handle.fromBinding` to DXIL ops (PR #104251)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104251
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferLoad` to DXIL ops (PR #104252)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Lower `@llvm.dx.typedBufferStore` to DXIL ops (PR #104253)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104253
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata. NFC (PR #104446)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Move resource logic into PrettyPrinter and TranslateMetadata. NFC (PR #104446)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits



@@ -12,13 +12,27 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 static void prettyPrintResources(raw_ostream &OS,
  const dxil::Resources &MDResources) {

damyanp wrote:

What does the "MD" in "MDResources" mean?

https://github.com/llvm/llvm-project/pull/104446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Implement metadata lowering for resources (PR #104447)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104447
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [DirectX] Add resource handling to the DXIL pretty printer (PR #104448)

2024-08-16 Thread Damyan Pepper via llvm-branch-commits


https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/104448
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-16 Thread Anton Korobeynikov via llvm-branch-commits


https://github.com/asl created https://github.com/llvm/llvm-project/pull/104657

None

>From 0232f080dce628f473d9ceae599109fe2be0fa11 Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov 
Date: Fri, 16 Aug 2024 18:09:53 -0700
Subject: [PATCH] Add some brief LLVM 19 release notes for Pointer
 Authentication ABI support.

---
 clang/docs/ReleaseNotes.rst |  4 
 llvm/docs/ReleaseNotes.rst  | 14 ++
 2 files changed, 18 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..77a0f0ca300a8c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1207,6 +1207,10 @@ Arm and AArch64 Support
 * Arm Neoverse-N3 (neoverse-n3).
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
+ - Experimental support has been added for pointer authentication ABI for С/C++
+ - Pointer authentication ABI could be enabled for AArch64 Linux via
+   ``-mabi=pauthtest` option or via specifying `pauthtest` environment part of
+   target triple.
 
 Android Support
 ^^^
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a81caa160883d8..60b6c6e786df89 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -80,6 +80,11 @@ Changes to the LLVM IR
 removed. The next argument has been changed from byte index to bit
 index.
 * Added ``llvm.experimental.vector.compress`` intrinsic.
+* Added special kind of `constant expressions
+  `_ to
+  represent pointers with signature embedded into it.
+* Added `pointer authentication operand bundles
+  
`_. 
 
 Changes to LLVM infrastructure
 --
@@ -125,6 +130,15 @@ Changes to the AArch64 Backend
   when specified via ``-march=`` or an ``-mcpu=`` that supports them.  The
   attribute ``"target-features"="+v9a"`` no longer implies ``"+sve"`` and
   ``"+sve2"`` respectively.
+* Added support for ELF pointer authentication relocations as specified in
+  `PAuth ABI Extension to ELF
+  
`_.
+* Added codegeneration, ELF object file and linker support for authenticated
+  call lowering, signed constants and emission of signing scheme details in
+  ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` property of ``.note.gnu.property``
+  section.
+* Added codegeneration support for ``llvm.ptrauth.auth`` and
+  ``llvm.ptrauth.resign`` intrinsics.
 
 Changes to the AMDGPU Backend
 -

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-16 Thread Anton Korobeynikov via llvm-branch-commits


asl wrote:

Feel free to add more, I decided to go brief as the documentation is not there 
yet.

Thanks!

https://github.com/llvm/llvm-project/pull/104657
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-16 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Anton Korobeynikov (asl)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/104657.diff


2 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+4) 
- (modified) llvm/docs/ReleaseNotes.rst (+14) 


``diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..77a0f0ca300a8c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1207,6 +1207,10 @@ Arm and AArch64 Support
 * Arm Neoverse-N3 (neoverse-n3).
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
+ - Experimental support has been added for pointer authentication ABI for С/C++
+ - Pointer authentication ABI could be enabled for AArch64 Linux via
+   ``-mabi=pauthtest` option or via specifying `pauthtest` environment part of
+   target triple.
 
 Android Support
 ^^^
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a81caa160883d8..60b6c6e786df89 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -80,6 +80,11 @@ Changes to the LLVM IR
 removed. The next argument has been changed from byte index to bit
 index.
 * Added ``llvm.experimental.vector.compress`` intrinsic.
+* Added special kind of `constant expressions
+  `_ to
+  represent pointers with signature embedded into it.
+* Added `pointer authentication operand bundles
+  
`_. 
 
 Changes to LLVM infrastructure
 --
@@ -125,6 +130,15 @@ Changes to the AArch64 Backend
   when specified via ``-march=`` or an ``-mcpu=`` that supports them.  The
   attribute ``"target-features"="+v9a"`` no longer implies ``"+sve"`` and
   ``"+sve2"`` respectively.
+* Added support for ELF pointer authentication relocations as specified in
+  `PAuth ABI Extension to ELF
+  
`_.
+* Added codegeneration, ELF object file and linker support for authenticated
+  call lowering, signed constants and emission of signing scheme details in
+  ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` property of ``.note.gnu.property``
+  section.
+* Added codegeneration support for ``llvm.ptrauth.auth`` and
+  ``llvm.ptrauth.resign`` intrinsics.
 
 Changes to the AMDGPU Backend
 -

``




https://github.com/llvm/llvm-project/pull/104657
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-16 Thread Anton Korobeynikov via llvm-branch-commits


https://github.com/asl updated https://github.com/llvm/llvm-project/pull/104657

>From 92c76c04d8a3142ee569fcd705df08e566402b1d Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov 
Date: Fri, 16 Aug 2024 18:09:53 -0700
Subject: [PATCH] Add some brief LLVM 19 release notes for Pointer
 Authentication ABI support.

---
 clang/docs/ReleaseNotes.rst |  4 
 llvm/docs/ReleaseNotes.rst  | 14 ++
 2 files changed, 18 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..fc9a141c7ec180 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1207,6 +1207,10 @@ Arm and AArch64 Support
 * Arm Neoverse-N3 (neoverse-n3).
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
+ - Experimental support has been added for pointer authentication ABI for 
С/C++.
+ - Pointer authentication ABI could be enabled for AArch64 Linux via
+   ``-mabi=pauthtest` option or via specifying `pauthtest` environment part of
+   target triple.
 
 Android Support
 ^^^
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a81caa160883d8..60b6c6e786df89 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -80,6 +80,11 @@ Changes to the LLVM IR
 removed. The next argument has been changed from byte index to bit
 index.
 * Added ``llvm.experimental.vector.compress`` intrinsic.
+* Added special kind of `constant expressions
+  `_ to
+  represent pointers with signature embedded into it.
+* Added `pointer authentication operand bundles
+  
`_. 
 
 Changes to LLVM infrastructure
 --
@@ -125,6 +130,15 @@ Changes to the AArch64 Backend
   when specified via ``-march=`` or an ``-mcpu=`` that supports them.  The
   attribute ``"target-features"="+v9a"`` no longer implies ``"+sve"`` and
   ``"+sve2"`` respectively.
+* Added support for ELF pointer authentication relocations as specified in
+  `PAuth ABI Extension to ELF
+  
`_.
+* Added codegeneration, ELF object file and linker support for authenticated
+  call lowering, signed constants and emission of signing scheme details in
+  ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` property of ``.note.gnu.property``
+  section.
+* Added codegeneration support for ``llvm.ptrauth.auth`` and
+  ``llvm.ptrauth.resign`` intrinsics.
 
 Changes to the AMDGPU Backend
 -

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-16 Thread Anton Korobeynikov via llvm-branch-commits


https://github.com/asl updated https://github.com/llvm/llvm-project/pull/104657

>From a01e5a8d95e48bd195b471ae2e5416531c37663d Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov 
Date: Fri, 16 Aug 2024 18:09:53 -0700
Subject: [PATCH] Add some brief LLVM 19 release notes for Pointer
 Authentication ABI support.

---
 clang/docs/ReleaseNotes.rst |  8 
 llvm/docs/ReleaseNotes.rst  | 14 ++
 2 files changed, 22 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..809247a771926d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1191,11 +1191,13 @@ Arm and AArch64 Support
   improvements for most targets. We have not changed the default behavior for
   ARMv6, but may revisit that decision in the future. Users can restore the old
   behavior with -m[no-]unaligned-access.
+
 - An alias identifier (rdma) has been added for targeting the AArch64
   Architecture Extension which uses Rounding Doubling Multiply Accumulate
   instructions (rdm). The identifier is available on the command line as
   a feature modifier for -march and -mcpu as well as via target attributes
   like ``target_version`` or ``target_clones``.
+
 - Support has been added for the following processors (-mcpu identifiers in 
parenthesis):
 * Arm Cortex-R52+ (cortex-r52plus).
 * Arm Cortex-R82AE (cortex-r82ae).
@@ -1208,6 +1210,12 @@ Arm and AArch64 Support
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
 
+ - Experimental support has been added for pointer authentication ABI for 
С/C++.
+
+ - Pointer authentication ABI could be enabled for AArch64 Linux via
+   ``-mabi=pauthtest` option or via specifying `pauthtest` environment part of
+   target triple.
+
 Android Support
 ^^^
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a81caa160883d8..60b6c6e786df89 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -80,6 +80,11 @@ Changes to the LLVM IR
 removed. The next argument has been changed from byte index to bit
 index.
 * Added ``llvm.experimental.vector.compress`` intrinsic.
+* Added special kind of `constant expressions
+  `_ to
+  represent pointers with signature embedded into it.
+* Added `pointer authentication operand bundles
+  
`_. 
 
 Changes to LLVM infrastructure
 --
@@ -125,6 +130,15 @@ Changes to the AArch64 Backend
   when specified via ``-march=`` or an ``-mcpu=`` that supports them.  The
   attribute ``"target-features"="+v9a"`` no longer implies ``"+sve"`` and
   ``"+sve2"`` respectively.
+* Added support for ELF pointer authentication relocations as specified in
+  `PAuth ABI Extension to ELF
+  
`_.
+* Added codegeneration, ELF object file and linker support for authenticated
+  call lowering, signed constants and emission of signing scheme details in
+  ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` property of ``.note.gnu.property``
+  section.
+* Added codegeneration support for ``llvm.ptrauth.auth`` and
+  ``llvm.ptrauth.resign`` intrinsics.
 
 Changes to the AMDGPU Backend
 -

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-16 Thread Anton Korobeynikov via llvm-branch-commits


https://github.com/asl updated https://github.com/llvm/llvm-project/pull/104657

>From 4b6c656ca145e15ee59b16c5adb974b4767ec585 Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov 
Date: Fri, 16 Aug 2024 18:09:53 -0700
Subject: [PATCH] Add some brief LLVM 19 release notes for Pointer
 Authentication ABI support.

---
 clang/docs/ReleaseNotes.rst |  8 
 llvm/docs/ReleaseNotes.rst  | 14 ++
 2 files changed, 22 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..c5d776a228da0e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1191,11 +1191,13 @@ Arm and AArch64 Support
   improvements for most targets. We have not changed the default behavior for
   ARMv6, but may revisit that decision in the future. Users can restore the old
   behavior with -m[no-]unaligned-access.
+
 - An alias identifier (rdma) has been added for targeting the AArch64
   Architecture Extension which uses Rounding Doubling Multiply Accumulate
   instructions (rdm). The identifier is available on the command line as
   a feature modifier for -march and -mcpu as well as via target attributes
   like ``target_version`` or ``target_clones``.
+
 - Support has been added for the following processors (-mcpu identifiers in 
parenthesis):
 * Arm Cortex-R52+ (cortex-r52plus).
 * Arm Cortex-R82AE (cortex-r82ae).
@@ -1208,6 +1210,12 @@ Arm and AArch64 Support
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
 
+ - Experimental support has been added for pointer authentication ABI for 
С/C++.
+
+ - Pointer authentication ABI could be enabled for AArch64 Linux via
+   ``-mabi=pauthtest`` option or via specifying ``pauthtest`` environment part 
of
+   target triple.
+
 Android Support
 ^^^
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a81caa160883d8..60b6c6e786df89 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -80,6 +80,11 @@ Changes to the LLVM IR
 removed. The next argument has been changed from byte index to bit
 index.
 * Added ``llvm.experimental.vector.compress`` intrinsic.
+* Added special kind of `constant expressions
+  `_ to
+  represent pointers with signature embedded into it.
+* Added `pointer authentication operand bundles
+  
`_. 
 
 Changes to LLVM infrastructure
 --
@@ -125,6 +130,15 @@ Changes to the AArch64 Backend
   when specified via ``-march=`` or an ``-mcpu=`` that supports them.  The
   attribute ``"target-features"="+v9a"`` no longer implies ``"+sve"`` and
   ``"+sve2"`` respectively.
+* Added support for ELF pointer authentication relocations as specified in
+  `PAuth ABI Extension to ELF
+  
`_.
+* Added codegeneration, ELF object file and linker support for authenticated
+  call lowering, signed constants and emission of signing scheme details in
+  ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` property of ``.note.gnu.property``
+  section.
+* Added codegeneration support for ``llvm.ptrauth.auth`` and
+  ``llvm.ptrauth.resign`` intrinsics.
 
 Changes to the AMDGPU Backend
 -

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] 30e7b89 - Revert "[asan] Catch `initialization-order-fiasco` in modules without globals…"

2024-08-16 Thread via llvm-branch-commits


Author: Vitaly Buka
Date: 2024-08-16T22:43:22-07:00
New Revision: 30e7b89bc9c26267ca28f937c79b07540baa8ce3

URL: 
https://github.com/llvm/llvm-project/commit/30e7b89bc9c26267ca28f937c79b07540baa8ce3
DIFF: 
https://github.com/llvm/llvm-project/commit/30e7b89bc9c26267ca28f937c79b07540baa8ce3.diff

LOG: Revert "[asan] Catch `initialization-order-fiasco` in modules without 
globals…"

This reverts commit f44f02629274ea67aad23553ebc11042537758ad.

Added: 


Modified: 
compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll

Removed: 




diff  --git a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp 
b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
index 369449303f3e08..50698b2a7d06fc 100644
--- a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
+++ b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
@@ -1,6 +1,9 @@
 // RUN: %clangxx_asan %min_macos_deployment_target=10.11 -O0 %s 
%p/Helpers/initialization-bug-extra.cpp -o %t
 // RUN: %env_asan_opts=check_initialization_order=true:strict_init_order=true 
not %run %t 2>&1 | FileCheck %s
 
+// Not implemented.
+// XFAIL: *
+
 // Do not test with optimization -- the error may be optimized away.
 
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=186

diff  --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 34366b98aed7ae..d1bb1334aae6a3 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2531,10 +2531,15 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   SmallVector NewGlobals(n);
   SmallVector Initializers(n);
 
+  bool HasDynamicallyInitializedGlobals = false;
+
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
-  GlobalVariable *ModuleName = createPrivateGlobalForString(
-  M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
+  GlobalVariable *ModuleName =
+  n != 0
+  ? createPrivateGlobalForString(M, M.getModuleIdentifier(),
+ /*AllowMerging*/ false, 
kAsanGenPrefix)
+  : nullptr;
 
   for (size_t i = 0; i < n; i++) {
 GlobalVariable *G = GlobalsToChange[i];
@@ -2641,6 +2646,9 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
 Constant::getNullValue(IntptrTy),
 ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
 
+if (ClInitializers && MD.IsDynInit)
+  HasDynamicallyInitializedGlobals = true;
+
 LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
 
 Initializers[i] = Initializer;
@@ -2680,7 +2688,7 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   }
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (ClInitializers)
+  if (HasDynamicallyInitializedGlobals)
 createInitializerPoisonCalls(M, ModuleName);
 
   LLVM_DEBUG(dbgs() << M);

diff  --git 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
index b6ab4aca547a4f..c8a6541bacfdfa 100644
--- 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
+++ 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
@@ -18,9 +18,7 @@ define internal void @__late_ctor() sanitize_address section 
".text.startup" {
 ; CHECK-LABEL: define internal void @__late_ctor(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:call void @__asan_before_dynamic_init(i64 ptrtoint (ptr 
@___asan_gen_ to i64))
 ; CHECK-NEXT:call void @initializer()
-; CHECK-NEXT:call void @__asan_after_dynamic_init()
 ; CHECK-NEXT:ret void
 ;
 ; NOINIT-LABEL: define internal void @__late_ctor(



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] c9f70c3 - Revert "Revert "[asan] Catch `initialization-order-fiasco` in modules without…"

2024-08-16 Thread via llvm-branch-commits


Author: Vitaly Buka
Date: 2024-08-16T22:44:02-07:00
New Revision: c9f70c3d7632b7d30da4e93d9d62565069a0b3cc

URL: 
https://github.com/llvm/llvm-project/commit/c9f70c3d7632b7d30da4e93d9d62565069a0b3cc
DIFF: 
https://github.com/llvm/llvm-project/commit/c9f70c3d7632b7d30da4e93d9d62565069a0b3cc.diff

LOG: Revert "Revert "[asan] Catch `initialization-order-fiasco` in modules 
without…"

This reverts commit 34f941a2f96b804dd24c2a25770d899b018339ff.

Added: 


Modified: 
compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll

Removed: 




diff  --git a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp 
b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
index 50698b2a7d06fc..369449303f3e08 100644
--- a/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
+++ b/compiler-rt/test/asan/TestCases/initialization-bug-no-global.cpp
@@ -1,9 +1,6 @@
 // RUN: %clangxx_asan %min_macos_deployment_target=10.11 -O0 %s 
%p/Helpers/initialization-bug-extra.cpp -o %t
 // RUN: %env_asan_opts=check_initialization_order=true:strict_init_order=true 
not %run %t 2>&1 | FileCheck %s
 
-// Not implemented.
-// XFAIL: *
-
 // Do not test with optimization -- the error may be optimized away.
 
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=186

diff  --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d1bb1334aae6a3..34366b98aed7ae 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2531,15 +2531,10 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   SmallVector NewGlobals(n);
   SmallVector Initializers(n);
 
-  bool HasDynamicallyInitializedGlobals = false;
-
   // We shouldn't merge same module names, as this string serves as unique
   // module ID in runtime.
-  GlobalVariable *ModuleName =
-  n != 0
-  ? createPrivateGlobalForString(M, M.getModuleIdentifier(),
- /*AllowMerging*/ false, 
kAsanGenPrefix)
-  : nullptr;
+  GlobalVariable *ModuleName = createPrivateGlobalForString(
+  M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
 
   for (size_t i = 0; i < n; i++) {
 GlobalVariable *G = GlobalsToChange[i];
@@ -2646,9 +2641,6 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
 Constant::getNullValue(IntptrTy),
 ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
 
-if (ClInitializers && MD.IsDynInit)
-  HasDynamicallyInitializedGlobals = true;
-
 LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
 
 Initializers[i] = Initializer;
@@ -2688,7 +2680,7 @@ void 
ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
   }
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (HasDynamicallyInitializedGlobals)
+  if (ClInitializers)
 createInitializerPoisonCalls(M, ModuleName);
 
   LLVM_DEBUG(dbgs() << M);

diff  --git 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
index c8a6541bacfdfa..b6ab4aca547a4f 100644
--- 
a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
+++ 
b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_without_global.ll
@@ -18,7 +18,9 @@ define internal void @__late_ctor() sanitize_address section 
".text.startup" {
 ; CHECK-LABEL: define internal void @__late_ctor(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] section ".text.startup" {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:call void @__asan_before_dynamic_init(i64 ptrtoint (ptr 
@___asan_gen_ to i64))
 ; CHECK-NEXT:call void @initializer()
+; CHECK-NEXT:call void @__asan_after_dynamic_init()
 ; CHECK-NEXT:ret void
 ;
 ; NOINIT-LABEL: define internal void @__late_ctor(



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

70 matches

Mail list logo