[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/100216

>From 1a5c4e5c4fded8293985dc1875a971f7783cfc45 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Tue, 23 Jul 2024 14:41:57 -0500
Subject: [PATCH] [Clang] Correctly forward `--cuda-path` to the nvlink wrapper
 (#100170)

Summary:
This was not forwarded properly as it would try to pass it to `nvlink`.

Fixes https://github.com/llvm/llvm-project/issues/100168

(cherry picked from commit 7e1fcf5dd657d465c3fc846f56c6f9d3a4560b43)
---
 clang/lib/Driver/ToolChains/Cuda.cpp   | 4 
 clang/test/Driver/linker-wrapper-passes.c  | 1 -
 clang/test/Driver/nvlink-wrapper.c | 7 +++
 clang/tools/clang-nvlink-wrapper/NVLinkOpts.td | 4 ++--
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index 59453c484ae4f4..61d12b10dfb62b 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back(Args.MakeArgString(
 "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ)));
 
+  if (Args.hasArg(options::OPT_cuda_path_EQ))
+CmdArgs.push_back(Args.MakeArgString(
+"--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ)));
+
   // Add paths specified in LIBRARY_PATH environment variable as -L options.
   addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
 
diff --git a/clang/test/Driver/linker-wrapper-passes.c 
b/clang/test/Driver/linker-wrapper-passes.c
index aadcf472e9b636..b257c942afa075 100644
--- a/clang/test/Driver/linker-wrapper-passes.c
+++ b/clang/test/Driver/linker-wrapper-passes.c
@@ -3,7 +3,6 @@
 // REQUIRES: llvm-plugins, llvm-examples
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
-
 // Setup.
 // RUN: mkdir -p %t
 // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \
diff --git a/clang/test/Driver/nvlink-wrapper.c 
b/clang/test/Driver/nvlink-wrapper.c
index fdda93f1f9cdc1..318315ddaca340 100644
--- a/clang/test/Driver/nvlink-wrapper.c
+++ b/clang/test/Driver/nvlink-wrapper.c
@@ -63,3 +63,10 @@ int baz() { return y + x; }
 // RUN:   -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO
 // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin
 // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin 
{{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin
+
+//
+// Check that we don't forward some arguments.
+//
+// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \
+// RUN:   -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s 
--check-prefix=PATH
+// PATH-NOT: --cuda-path=/opt/cuda
diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td 
b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
index e84b530f2787d3..8c80a51b12a44e 100644
--- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
+++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
@@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose 
information">;
 def version : Flag<["--"], "version">,
   HelpText<"Display the version number and exit">;
 
-def cuda_path_EQ : Joined<["--"], "cuda-path=">,
+def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>,
   MetaVarName<"">, HelpText<"Set the system CUDA path">;
-def ptxas_path_EQ : Joined<["--"], "ptxas-path=">,
+def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>,
   MetaVarName<"">, HelpText<"Set the 'ptxas' path">;
 
 def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"">,

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 1a5c4e5 - [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Joseph Huber
Date: 2024-08-19T09:02:52+02:00
New Revision: 1a5c4e5c4fded8293985dc1875a971f7783cfc45

URL: 
https://github.com/llvm/llvm-project/commit/1a5c4e5c4fded8293985dc1875a971f7783cfc45
DIFF: 
https://github.com/llvm/llvm-project/commit/1a5c4e5c4fded8293985dc1875a971f7783cfc45.diff

LOG: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170)

Summary:
This was not forwarded properly as it would try to pass it to `nvlink`.

Fixes https://github.com/llvm/llvm-project/issues/100168

(cherry picked from commit 7e1fcf5dd657d465c3fc846f56c6f9d3a4560b43)

Added: 


Modified: 
clang/lib/Driver/ToolChains/Cuda.cpp
clang/test/Driver/linker-wrapper-passes.c
clang/test/Driver/nvlink-wrapper.c
clang/tools/clang-nvlink-wrapper/NVLinkOpts.td

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index 59453c484ae4f4..61d12b10dfb62b 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back(Args.MakeArgString(
 "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ)));
 
+  if (Args.hasArg(options::OPT_cuda_path_EQ))
+CmdArgs.push_back(Args.MakeArgString(
+"--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ)));
+
   // Add paths specified in LIBRARY_PATH environment variable as -L options.
   addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
 

diff  --git a/clang/test/Driver/linker-wrapper-passes.c 
b/clang/test/Driver/linker-wrapper-passes.c
index aadcf472e9b636..b257c942afa075 100644
--- a/clang/test/Driver/linker-wrapper-passes.c
+++ b/clang/test/Driver/linker-wrapper-passes.c
@@ -3,7 +3,6 @@
 // REQUIRES: llvm-plugins, llvm-examples
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
-
 // Setup.
 // RUN: mkdir -p %t
 // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \

diff  --git a/clang/test/Driver/nvlink-wrapper.c 
b/clang/test/Driver/nvlink-wrapper.c
index fdda93f1f9cdc1..318315ddaca340 100644
--- a/clang/test/Driver/nvlink-wrapper.c
+++ b/clang/test/Driver/nvlink-wrapper.c
@@ -63,3 +63,10 @@ int baz() { return y + x; }
 // RUN:   -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO
 // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin
 // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin 
{{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin
+
+//
+// Check that we don't forward some arguments.
+//
+// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \
+// RUN:   -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s 
--check-prefix=PATH
+// PATH-NOT: --cuda-path=/opt/cuda

diff  --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td 
b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
index e84b530f2787d3..8c80a51b12a44e 100644
--- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
+++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
@@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose 
information">;
 def version : Flag<["--"], "version">,
   HelpText<"Display the version number and exit">;
 
-def cuda_path_EQ : Joined<["--"], "cuda-path=">,
+def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>,
   MetaVarName<"">, HelpText<"Set the system CUDA path">;
-def ptxas_path_EQ : Joined<["--"], "ptxas-path=">,
+def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>,
   MetaVarName<"">, HelpText<"Set the 'ptxas' path">;
 
 def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"">,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/100216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101506

>From c45fc691a2a39318fc146ba1665a2fe2d9f43b2b Mon Sep 17 00:00:00 2001
From: Yeting Kuo <46629943+yeti...@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:37:42 +0800
Subject: [PATCH] [RISCV] Use experimental.vp.splat to splat specific vector
 length elements. (#101329)

Previously, llvm IR is hard to create a scalable vector splat with a
specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do
this work. But the two rvv intrinsics needs strict type constraint which
can not support fixed vector types and illegal vector types. Using
vp.splat could preserve old functionality and also generate more
optimized code for vector types and illegal vectors.
This patch also fixes crash for getEVT not serving ptr types.

(cherry picked from commit 87af9ee870ad7ca93abced0b09459c3760dec891)
---
 llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 19 +
 .../RISCV/rvv/fixed-vectors-strided-vpload.ll |  8 +-
 llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 80 +--
 3 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp 
b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 0a66a38f6d5abc..be2e880ecd3a98 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -187,25 +187,10 @@ bool 
RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
   auto *VTy = cast(II.getType());
 
   IRBuilder<> Builder(&II);
-
-  // Extend VL from i32 to XLen if needed.
-  if (ST->is64Bit())
-VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
   Type *STy = VTy->getElementType();
   Value *Val = Builder.CreateLoad(STy, BasePtr);
-  const auto &TLI = *ST->getTargetLowering();
-  Value *Res;
-
-  // TODO: Also support fixed/illegal vector types to splat with evl = vl.
-  if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
-unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
-  : Intrinsic::riscv_vmv_v_x;
-Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
-  {PoisonValue::get(VTy), Val, VL});
-  } else {
-Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
-  }
+  Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
+   {Val, II.getOperand(2), VL});
 
   II.replaceAllUsesWith(Res);
   II.eraseFromParent();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll 
b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index b8c7037580c46b..849f98c26f4593 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -638,14 +638,14 @@ declare <33 x double> 
@llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
 define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-OPT:   # %bb.0:
-; CHECK-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-OPT-NEXT:vlse8.v v8, (a0), zero
 ; CHECK-OPT-NEXT:ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-NO-OPT:   # %bb.0:
 ; CHECK-NO-OPT-NEXT:lbu a0, 0(a0)
-; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-NO-OPT-NEXT:vmv.v.x v8, a0
 ; CHECK-NO-OPT-NEXT:ret
   %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, 
i8 0, <4 x i1> splat (i1 true), i32 3)
@@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr 
%ptr) {
 define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-OPT:   # %bb.0:
-; CHECK-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-OPT-NEXT:vlse16.v v8, (a0), zero
 ; CHECK-OPT-NEXT:ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-NO-OPT:   # %bb.0:
 ; CHECK-NO-OPT-NEXT:flh fa5, 0(a0)
-; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-NO-OPT-NEXT:vfmv.v.f v8, fa5
 ; CHECK-NO-OPT-NEXT:ret
   %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr 
%ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll 
b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 0010f64a93fd62..14976f21b7dbba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 
-mattr=+m,+d,+zfh,+v,+z

[llvm-branch-commits] [llvm] c45fc69 - [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Yeting Kuo
Date: 2024-08-19T09:04:45+02:00
New Revision: c45fc691a2a39318fc146ba1665a2fe2d9f43b2b

URL: 
https://github.com/llvm/llvm-project/commit/c45fc691a2a39318fc146ba1665a2fe2d9f43b2b
DIFF: 
https://github.com/llvm/llvm-project/commit/c45fc691a2a39318fc146ba1665a2fe2d9f43b2b.diff

LOG: [RISCV] Use experimental.vp.splat to splat specific vector length 
elements. (#101329)

Previously, llvm IR is hard to create a scalable vector splat with a
specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do
this work. But the two rvv intrinsics needs strict type constraint which
can not support fixed vector types and illegal vector types. Using
vp.splat could preserve old functionality and also generate more
optimized code for vector types and illegal vectors.
This patch also fixes crash for getEVT not serving ptr types.

(cherry picked from commit 87af9ee870ad7ca93abced0b09459c3760dec891)

Added: 


Modified: 
llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll

Removed: 




diff  --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp 
b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 0a66a38f6d5abc..be2e880ecd3a98 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -187,25 +187,10 @@ bool 
RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
   auto *VTy = cast(II.getType());
 
   IRBuilder<> Builder(&II);
-
-  // Extend VL from i32 to XLen if needed.
-  if (ST->is64Bit())
-VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
   Type *STy = VTy->getElementType();
   Value *Val = Builder.CreateLoad(STy, BasePtr);
-  const auto &TLI = *ST->getTargetLowering();
-  Value *Res;
-
-  // TODO: Also support fixed/illegal vector types to splat with evl = vl.
-  if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
-unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
-  : Intrinsic::riscv_vmv_v_x;
-Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
-  {PoisonValue::get(VTy), Val, VL});
-  } else {
-Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
-  }
+  Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
+   {Val, II.getOperand(2), VL});
 
   II.replaceAllUsesWith(Res);
   II.eraseFromParent();

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll 
b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index b8c7037580c46b..849f98c26f4593 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -638,14 +638,14 @@ declare <33 x double> 
@llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
 define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-OPT:   # %bb.0:
-; CHECK-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-OPT-NEXT:vlse8.v v8, (a0), zero
 ; CHECK-OPT-NEXT:ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-NO-OPT:   # %bb.0:
 ; CHECK-NO-OPT-NEXT:lbu a0, 0(a0)
-; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-NO-OPT-NEXT:vmv.v.x v8, a0
 ; CHECK-NO-OPT-NEXT:ret
   %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, 
i8 0, <4 x i1> splat (i1 true), i32 3)
@@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr 
%ptr) {
 define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-OPT:   # %bb.0:
-; CHECK-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-OPT-NEXT:vlse16.v v8, (a0), zero
 ; CHECK-OPT-NEXT:ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-NO-OPT:   # %bb.0:
 ; CHECK-NO-OPT-NEXT:flh fa5, 0(a0)
-; CHECK-NO-OPT-NEXT:vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NO-OPT-NEXT:vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-NO-OPT-NEXT:vfmv.v.f v8, fa5
 ; CHECK-NO-OPT-NEXT:ret
   %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr 
%ptr, i32 0, <4 x i1> splat (i1 true), i32 3)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll 
b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 0010f64a93fd62..14976f21b7dbba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by 

[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/101506
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [Clang] Correctly forward `--cuda-path` to the nvlink wrapper (#100170) (PR #100216)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@jhuber6 (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/100216
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [RISCV] Use experimental.vp.splat to splat specific vector length elements. (#101329) (PR #101506)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@topperc (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/101506
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102179

>From 2ab8d93061581edad3501561722ebd5632d73892 Mon Sep 17 00:00:00 2001
From: yandalur 
Date: Thu, 1 Aug 2024 21:37:23 +0530
Subject: [PATCH] [Hexagon] Do not optimize address of another function's block
 (#101209)

When the constant extender optimization pass encounters an instruction
that uses an extended address pointing to another function's block,
avoid adding the instruction to the extender list for the current
machine function.

Fixes https://github.com/llvm/llvm-project/issues/99714

(cherry picked from commit 68df06a0b2998765cb0a41353fcf0919bbf57ddb)
---
 .../Target/Hexagon/HexagonConstExtenders.cpp  |   4 +
 .../CodeGen/Hexagon/cext-opt-block-addr.mir   | 173 ++
 2 files changed, 177 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir

diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp 
b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index f0933765bbcbda..86ce6b4e05ed27 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned 
OpNum) {
   if (ER.Kind == MachineOperand::MO_GlobalAddress)
 if (ER.V.GV->getName().empty())
   return;
+  // Ignore block address that points to block in another function
+  if (ER.Kind == MachineOperand::MO_BlockAddress)
+if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction()))
+  return;
   Extenders.push_back(ED);
 }
 
diff --git a/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir 
b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
new file mode 100644
index 00..9f140132dcd6c3
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
@@ -0,0 +1,173 @@
+# REQUIRES: asserts
+# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s
+
+# Check that the HexagonConstantExtenders pass does not assert when block
+# addresses from different functions are used
+# CHECK-LABEL: name: wibble
+# CHECK: A2_tfrsi blockaddress(@baz
+# CHECK: A2_tfrsi blockaddress(@wibble
+
+--- |
+  target triple = "hexagon"
+
+  define dso_local void @baz() {
+  bb:
+br label %bb1
+
+  bb1:  ; preds = %bb
+%call = tail call fastcc i32 @wibble(i32 poison)
+ret void
+  }
+
+  define internal fastcc i32 @wibble(i32 %arg) {
+  bb:
+%call = tail call i32 @eggs(i32 noundef ptrtoint (ptr blockaddress(@baz, 
%bb1) to i32))
+br label %bb1
+
+  bb1:  ; preds = %bb
+tail call void @baz.1(i32 noundef ptrtoint (ptr blockaddress(@wibble, 
%bb1) to i32))
+ret i32 %call
+  }
+
+  declare i32 @eggs(i32 noundef) local_unnamed_addr
+
+  declare void @baz.1(i32 noundef) local_unnamed_addr
+
+...
+---
+name:baz
+alignment:   16
+exposesReturnsTwice: false
+legalized:   false
+regBankSelected: false
+selected:false
+failedISel:  false
+tracksRegLiveness: true
+hasWinCFI:   false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes: false
+hasEHFunclets:   false
+isOutlined:  false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+liveins: []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap: false
+  hasPatchPoint:   false
+  stackSize:   0
+  offsetAdjustment: 0
+  maxAlignment:1
+  adjustsStack:false
+  hasCalls:false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:  false
+  hasMustTailInVarArgFunc: false
+  hasTailCall: true
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:   ''
+  restorePoint:''
+fixedStack:  []
+stack:   []
+entry_values:[]
+callSites:   []
+debugValueSubstitutions: []
+constants:   []
+machineFunctionInfo: {}
+body: |
+  bb.0.bb:
+successors: %bb.1(0x8000)
+
+  bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+%0:intregs = IMPLICIT_DEF
+$r0 = COPY %0
+PS_tailcall_i @wibble, hexagoncsr, implicit $r0
+
+...
+---
+name:wibble
+alignment:   16
+exposesReturnsTwice: false
+legalized:   false
+regBankSelected: false
+selected:false
+failedISel:  false
+tracksRegLiveness: true
+hasWinCFI:   false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes: false
+hasEHFunclets:   false
+isOutlined:  false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+  - { id: 1, class: intregs, preferred-register: '' }
+  - { id: 2, class: intregs, preferred-reg

[llvm-branch-commits] [llvm] 2ab8d93 - [Hexagon] Do not optimize address of another function's block (#101209)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: yandalur
Date: 2024-08-19T09:06:57+02:00
New Revision: 2ab8d93061581edad3501561722ebd5632d73892

URL: 
https://github.com/llvm/llvm-project/commit/2ab8d93061581edad3501561722ebd5632d73892
DIFF: 
https://github.com/llvm/llvm-project/commit/2ab8d93061581edad3501561722ebd5632d73892.diff

LOG: [Hexagon] Do not optimize address of another function's block (#101209)

When the constant extender optimization pass encounters an instruction
that uses an extended address pointing to another function's block,
avoid adding the instruction to the extender list for the current
machine function.

Fixes https://github.com/llvm/llvm-project/issues/99714

(cherry picked from commit 68df06a0b2998765cb0a41353fcf0919bbf57ddb)

Added: 
llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir

Modified: 
llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp

Removed: 




diff  --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp 
b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index f0933765bbcbda..86ce6b4e05ed27 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned 
OpNum) {
   if (ER.Kind == MachineOperand::MO_GlobalAddress)
 if (ER.V.GV->getName().empty())
   return;
+  // Ignore block address that points to block in another function
+  if (ER.Kind == MachineOperand::MO_BlockAddress)
+if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction()))
+  return;
   Extenders.push_back(ED);
 }
 

diff  --git a/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir 
b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
new file mode 100644
index 00..9f140132dcd6c3
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
@@ -0,0 +1,173 @@
+# REQUIRES: asserts
+# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s
+
+# Check that the HexagonConstantExtenders pass does not assert when block
+# addresses from 
diff erent functions are used
+# CHECK-LABEL: name: wibble
+# CHECK: A2_tfrsi blockaddress(@baz
+# CHECK: A2_tfrsi blockaddress(@wibble
+
+--- |
+  target triple = "hexagon"
+
+  define dso_local void @baz() {
+  bb:
+br label %bb1
+
+  bb1:  ; preds = %bb
+%call = tail call fastcc i32 @wibble(i32 poison)
+ret void
+  }
+
+  define internal fastcc i32 @wibble(i32 %arg) {
+  bb:
+%call = tail call i32 @eggs(i32 noundef ptrtoint (ptr blockaddress(@baz, 
%bb1) to i32))
+br label %bb1
+
+  bb1:  ; preds = %bb
+tail call void @baz.1(i32 noundef ptrtoint (ptr blockaddress(@wibble, 
%bb1) to i32))
+ret i32 %call
+  }
+
+  declare i32 @eggs(i32 noundef) local_unnamed_addr
+
+  declare void @baz.1(i32 noundef) local_unnamed_addr
+
+...
+---
+name:baz
+alignment:   16
+exposesReturnsTwice: false
+legalized:   false
+regBankSelected: false
+selected:false
+failedISel:  false
+tracksRegLiveness: true
+hasWinCFI:   false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes: false
+hasEHFunclets:   false
+isOutlined:  false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+liveins: []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap: false
+  hasPatchPoint:   false
+  stackSize:   0
+  offsetAdjustment: 0
+  maxAlignment:1
+  adjustsStack:false
+  hasCalls:false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:  false
+  hasMustTailInVarArgFunc: false
+  hasTailCall: true
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:   ''
+  restorePoint:''
+fixedStack:  []
+stack:   []
+entry_values:[]
+callSites:   []
+debugValueSubstitutions: []
+constants:   []
+machineFunctionInfo: {}
+body: |
+  bb.0.bb:
+successors: %bb.1(0x8000)
+
+  bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+%0:intregs = IMPLICIT_DEF
+$r0 = COPY %0
+PS_tailcall_i @wibble, hexagoncsr, implicit $r0
+
+...
+---
+name:wibble
+alignment:   16
+exposesReturnsTwice: false
+legalized:   false
+regBankSelected: false
+selected:false
+failedISel:  false
+tracksRegLiveness: true
+hasWinCFI:   false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes: false
+hasEHFunclets:   false
+isOutlined:  false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+  - { id: 1, class: intregs

[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Hexagon] Do not optimize address of another function's block (#101209) (PR #102179)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@yandalur (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/102179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AArch64] Add GCS release notes (PR #103866)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103866

>From 1cfd6754454ba62fd0ad306c09a7b6d526f835dc Mon Sep 17 00:00:00 2001
From: John Brawn 
Date: Wed, 14 Aug 2024 12:22:51 +0100
Subject: [PATCH] [AArch64] Add GCS release notes

---
 clang/docs/ReleaseNotes.rst | 5 +
 1 file changed, 5 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..b56e7177846d99 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1207,6 +1207,11 @@ Arm and AArch64 Support
 * Arm Neoverse-N3 (neoverse-n3).
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
+- ``-mbranch-protection=gcs`` has been added which enables support for the
+  Guarded Control Stack extension, and ``-mbranch-protection=standard`` also
+  enables this. Enabling GCS causes the GCS GNU property bit to be set on 
output
+  objects. It doesn't cause any code generation changes, as the code generated
+  by clang is already compatible with GCS.
 
 Android Support
 ^^^

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 1cfd675 - [AArch64] Add GCS release notes

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: John Brawn
Date: 2024-08-19T09:10:20+02:00
New Revision: 1cfd6754454ba62fd0ad306c09a7b6d526f835dc

URL: 
https://github.com/llvm/llvm-project/commit/1cfd6754454ba62fd0ad306c09a7b6d526f835dc
DIFF: 
https://github.com/llvm/llvm-project/commit/1cfd6754454ba62fd0ad306c09a7b6d526f835dc.diff

LOG: [AArch64] Add GCS release notes

Added: 


Modified: 
clang/docs/ReleaseNotes.rst

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946d..b56e7177846d99 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1207,6 +1207,11 @@ Arm and AArch64 Support
 * Arm Neoverse-N3 (neoverse-n3).
 * Arm Neoverse-V3 (neoverse-v3).
 * Arm Neoverse-V3AE (neoverse-v3ae).
+- ``-mbranch-protection=gcs`` has been added which enables support for the
+  Guarded Control Stack extension, and ``-mbranch-protection=standard`` also
+  enables this. Enabling GCS causes the GCS GNU property bit to be set on 
output
+  objects. It doesn't cause any code generation changes, as the code generated
+  by clang is already compatible with GCS.
 
 Android Support
 ^^^



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AArch64] Add GCS release notes (PR #103866)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103866
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/103886

>From 9e90c40564e21dc5f1a12e08cfdf29305aaf9f50 Mon Sep 17 00:00:00 2001
From: Gulfem Savrun Yeniceri 
Date: Tue, 23 Jul 2024 11:06:30 +
Subject: [PATCH] Revert "[CGData] llvm-cgdata (#89884)"

This reverts commit d3fb41dddc11b0ebc338a3b9e6a5ab7288ff7d1d
and forward fix patches because of the issue explained in:
https://github.com/llvm/llvm-project/pull/89884#issuecomment-2244348117.

Revert "Fix tests for https://github.com/llvm/llvm-project/pull/89884
(#100061)"

This reverts commit 67937a3f969aaf97a745a45281a0d22273bff713.

Revert "Fix build break for https://github.com/llvm/llvm-project/pull/89884 
(#100050)"

This reverts commit c33878c5787c128234d533ad19d672dc3eea19a8.

Revert "[CGData] Fix -Wpessimizing-move in CodeGenDataReader.cpp (NFC)"

This reverts commit 1f8b2b146141f3563085a1acb77deb50857a636d.

(cherry picked from commit 73d78973fe072438f0f73088f889c66845b2b51a)
---
 llvm/include/llvm/CodeGenData/CodeGenData.h   | 204 -
 llvm/include/llvm/CodeGenData/CodeGenData.inc |  46 ---
 .../llvm/CodeGenData/CodeGenDataReader.h  | 154 --
 .../llvm/CodeGenData/CodeGenDataWriter.h  |  68 -
 llvm/lib/CodeGenData/CMakeLists.txt   |   3 -
 llvm/lib/CodeGenData/CodeGenData.cpp  | 196 -
 llvm/lib/CodeGenData/CodeGenDataReader.cpp| 175 
 llvm/lib/CodeGenData/CodeGenDataWriter.cpp| 162 ---
 llvm/test/CMakeLists.txt  |   1 -
 llvm/test/lit.cfg.py  |   1 -
 llvm/test/tools/llvm-cgdata/dump.test |  32 ---
 llvm/test/tools/llvm-cgdata/empty.test|  35 ---
 llvm/test/tools/llvm-cgdata/error.test|  38 ---
 .../test/tools/llvm-cgdata/merge-archive.test |  90 --
 llvm/test/tools/llvm-cgdata/merge-concat.test |  83 --
 llvm/test/tools/llvm-cgdata/merge-double.test |  87 --
 llvm/test/tools/llvm-cgdata/merge-single.test |  49 
 llvm/test/tools/llvm-cgdata/show.test |  30 --
 llvm/tools/llvm-cgdata/CMakeLists.txt |  15 -
 llvm/tools/llvm-cgdata/llvm-cgdata.cpp| 268 --
 20 files changed, 1737 deletions(-)
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenData.h
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenData.inc
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenDataReader.h
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenDataWriter.h
 delete mode 100644 llvm/lib/CodeGenData/CodeGenData.cpp
 delete mode 100644 llvm/lib/CodeGenData/CodeGenDataReader.cpp
 delete mode 100644 llvm/lib/CodeGenData/CodeGenDataWriter.cpp
 delete mode 100644 llvm/test/tools/llvm-cgdata/dump.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/empty.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/error.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-archive.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-concat.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-double.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-single.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/show.test
 delete mode 100644 llvm/tools/llvm-cgdata/CMakeLists.txt
 delete mode 100644 llvm/tools/llvm-cgdata/llvm-cgdata.cpp

diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.h 
b/llvm/include/llvm/CodeGenData/CodeGenData.h
deleted file mode 100644
index 659008c78abd93..00
--- a/llvm/include/llvm/CodeGenData/CodeGenData.h
+++ /dev/null
@@ -1,204 +0,0 @@
-//===- CodeGenData.h *- C++ 
-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// This file contains support for codegen data that has stable summary which
-// can be used to optimize the code in the subsequent codegen.
-//
-//===--===//
-
-#ifndef LLVM_CODEGENDATA_CODEGENDATA_H
-#define LLVM_CODEGENDATA_CODEGENDATA_H
-
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/CodeGenData/OutlinedHashTree.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/TargetParser/Triple.h"
-#include 
-
-namespace llvm {
-
-enum CGDataSectKind {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
-  Triple::ObjectFormatType OF,
-  bool AddSegmentInfo = true);
-
-enum class CGDataKind {
-  Unknown = 0x0,
-  // A function ou

[llvm-branch-commits] [llvm] 9e90c40 - Revert "[CGData] llvm-cgdata (#89884)"

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Gulfem Savrun Yeniceri
Date: 2024-08-19T09:10:52+02:00
New Revision: 9e90c40564e21dc5f1a12e08cfdf29305aaf9f50

URL: 
https://github.com/llvm/llvm-project/commit/9e90c40564e21dc5f1a12e08cfdf29305aaf9f50
DIFF: 
https://github.com/llvm/llvm-project/commit/9e90c40564e21dc5f1a12e08cfdf29305aaf9f50.diff

LOG: Revert "[CGData] llvm-cgdata (#89884)"

This reverts commit d3fb41dddc11b0ebc338a3b9e6a5ab7288ff7d1d
and forward fix patches because of the issue explained in:
https://github.com/llvm/llvm-project/pull/89884#issuecomment-2244348117.

Revert "Fix tests for https://github.com/llvm/llvm-project/pull/89884
(#100061)"

This reverts commit 67937a3f969aaf97a745a45281a0d22273bff713.

Revert "Fix build break for https://github.com/llvm/llvm-project/pull/89884 
(#100050)"

This reverts commit c33878c5787c128234d533ad19d672dc3eea19a8.

Revert "[CGData] Fix -Wpessimizing-move in CodeGenDataReader.cpp (NFC)"

This reverts commit 1f8b2b146141f3563085a1acb77deb50857a636d.

(cherry picked from commit 73d78973fe072438f0f73088f889c66845b2b51a)

Added: 


Modified: 
llvm/lib/CodeGenData/CMakeLists.txt
llvm/test/CMakeLists.txt
llvm/test/lit.cfg.py

Removed: 
llvm/include/llvm/CodeGenData/CodeGenData.h
llvm/include/llvm/CodeGenData/CodeGenData.inc
llvm/include/llvm/CodeGenData/CodeGenDataReader.h
llvm/include/llvm/CodeGenData/CodeGenDataWriter.h
llvm/lib/CodeGenData/CodeGenData.cpp
llvm/lib/CodeGenData/CodeGenDataReader.cpp
llvm/lib/CodeGenData/CodeGenDataWriter.cpp
llvm/test/tools/llvm-cgdata/dump.test
llvm/test/tools/llvm-cgdata/empty.test
llvm/test/tools/llvm-cgdata/error.test
llvm/test/tools/llvm-cgdata/merge-archive.test
llvm/test/tools/llvm-cgdata/merge-concat.test
llvm/test/tools/llvm-cgdata/merge-double.test
llvm/test/tools/llvm-cgdata/merge-single.test
llvm/test/tools/llvm-cgdata/show.test
llvm/tools/llvm-cgdata/CMakeLists.txt
llvm/tools/llvm-cgdata/llvm-cgdata.cpp



diff  --git a/llvm/include/llvm/CodeGenData/CodeGenData.h 
b/llvm/include/llvm/CodeGenData/CodeGenData.h
deleted file mode 100644
index 659008c78abd93..00
--- a/llvm/include/llvm/CodeGenData/CodeGenData.h
+++ /dev/null
@@ -1,204 +0,0 @@
-//===- CodeGenData.h *- C++ 
-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// This file contains support for codegen data that has stable summary which
-// can be used to optimize the code in the subsequent codegen.
-//
-//===--===//
-
-#ifndef LLVM_CODEGENDATA_CODEGENDATA_H
-#define LLVM_CODEGENDATA_CODEGENDATA_H
-
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/CodeGenData/OutlinedHashTree.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/TargetParser/Triple.h"
-#include 
-
-namespace llvm {
-
-enum CGDataSectKind {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
-  Triple::ObjectFormatType OF,
-  bool AddSegmentInfo = true);
-
-enum class CGDataKind {
-  Unknown = 0x0,
-  // A function outlining info.
-  FunctionOutlinedHashTree = 0x1,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
-};
-
-const std::error_category &cgdata_category();
-
-enum class cgdata_error {
-  success = 0,
-  eof,
-  bad_magic,
-  bad_header,
-  empty_cgdata,
-  malformed,
-  unsupported_version,
-};
-
-inline std::error_code make_error_code(cgdata_error E) {
-  return std::error_code(static_cast(E), cgdata_category());
-}
-
-class CGDataError : public ErrorInfo {
-public:
-  CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
-  : Err(Err), Msg(ErrStr.str()) {
-assert(Err != cgdata_error::success && "Not an error");
-  }
-
-  std::string message() const override;
-
-  void log(raw_ostream &OS) const override { OS << message(); }
-
-  std::error_code convertToErrorCode() const override {
-return make_error_code(Err);
-  }
-
-  cgdata_error get() const { return Err; }
-  const std::string &getMessage() const { return Msg; }
-
-  /// Consume an Error and return the raw enum value contained within it, and
-  /// the optional error message. The Error must either be a success value, or
-  /// contain a single CGDataError.
-  static std::pair take(Error E) {
-auto Err = cgdat

[llvm-branch-commits] [clang] [AArch64] Add GCS release notes (PR #103866)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@john-brawn-arm (or anyone else). If you would like to add a note about this 
fix in the release notes (completely optional). Please reply to this comment 
with a one or two sentence description of the fix.  When you are done, please 
add the release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/103866
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/103886
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Revert "[CGData] llvm-cgdata (#89884)" (PR #103886)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@amy-kwan (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/103886
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

This PR is pretty messy: Several commits instead of a single that can be 
cherry-picked, merge commit that makes it harder to cherry-pick and squash.

Please update the PR to contain a single commit that fixes the issue and 
doesn't contain any merge commits, then I can merge it easily.

Thanks.

https://github.com/llvm/llvm-project/pull/102438
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)

2024-08-19 Thread Daniel Kiss via llvm-branch-commits

https://github.com/DanielKristofKiss approved this pull request.

Some Distros(e.g. Fedora) are build by default branch-protection=standard.
Without this patch every application and library that links libopenmp.a will 
not be protected with BTI ( CFI protection for JOP attacks)
Also libopenmp.so won't be protected as not all file is BTI compatible.
So in security view this is an issue.
Not a regression as this was the case in previous releases.
Not a bug, as code will work just won't be CFI protected.
There are systems out there with BTI as of today with such a distro.


@tuliom do you have usecase for this?
@tru Change is simple and straightforward. I support the backport but leave it 
up to you.

HTH

https://github.com/llvm/llvm-project/pull/103491
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/104602

>From 02cafa895c917a4b1726e64a5870877c95826be4 Mon Sep 17 00:00:00 2001
From: Spencer Abson 
Date: Fri, 16 Aug 2024 14:39:43 +
Subject: [PATCH] [AArch64] Adopt updated B16B16 target flags

The enablement of SVE/SME non-widening BFloat16 instructions was recently
changed in response to an architecture update, in which:
- FEAT_SVE_B16B16 was weakened
- FEAT_SME_B16B16 was introduced
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the
existing 'b16b16'. This was acheived in the below two patches.
- https://github.com/llvm/llvm-project/pull/101480
- https://github.com/llvm/llvm-project/pull/102501
Ideally, the interface change introduced here will be valid in LLVM-19.
We do not see it necessary to back-port the entire change, but just to add
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged)
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also
fixed in this change.
---
 clang/include/clang/Basic/arm_sve.td  | 26 +++
 .../print-supported-extensions-aarch64.c  |  2 ++
 llvm/lib/Target/AArch64/AArch64Features.td|  9 +++
 .../TargetParser/TargetParserTest.cpp | 15 ++-
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 94c093d8911562..fb11d743fd6479 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", 
MergeNone, "aarch64_sve_
 multiclass MinMaxIntr {
   def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
   def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
-  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", 
 MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
 }
 
 multiclass SInstMinMaxByVector {
-  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
-  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
 
-  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
-  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
   def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
 }
 
+multiclass BfSingleMultiVector {
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>;
+
+  def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, 
"aarch64_sve_f" # name # "_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, 
"aarch64_sve_f" # name # "_x4", [IsStreaming], []>;
+}
+
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in {
   def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "b",  
MergeNone, "aarch64_sve_bfclamp_single_x2",  [IsStreaming], []>;
   def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "b",  
MergeNone, "aarch64_sve_bfclamp_single_x4

[llvm-branch-commits] [clang] 02cafa8 - [AArch64] Adopt updated B16B16 target flags

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Spencer Abson
Date: 2024-08-19T09:15:08+02:00
New Revision: 02cafa895c917a4b1726e64a5870877c95826be4

URL: 
https://github.com/llvm/llvm-project/commit/02cafa895c917a4b1726e64a5870877c95826be4
DIFF: 
https://github.com/llvm/llvm-project/commit/02cafa895c917a4b1726e64a5870877c95826be4.diff

LOG: [AArch64] Adopt updated B16B16 target flags

The enablement of SVE/SME non-widening BFloat16 instructions was recently
changed in response to an architecture update, in which:
- FEAT_SVE_B16B16 was weakened
- FEAT_SME_B16B16 was introduced
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the
existing 'b16b16'. This was acheived in the below two patches.
- https://github.com/llvm/llvm-project/pull/101480
- https://github.com/llvm/llvm-project/pull/102501
Ideally, the interface change introduced here will be valid in LLVM-19.
We do not see it necessary to back-port the entire change, but just to add
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged)
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also
fixed in this change.

Added: 


Modified: 
clang/include/clang/Basic/arm_sve.td
clang/test/Driver/print-supported-extensions-aarch64.c
llvm/lib/Target/AArch64/AArch64Features.td
llvm/unittests/TargetParser/TargetParserTest.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 94c093d8911562..fb11d743fd6479 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "", "b", 
MergeNone, "aarch64_sve_
 multiclass MinMaxIntr {
   def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
   def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
-  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", 
 MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
 }
 
 multiclass SInstMinMaxByVector {
-  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
-  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
 
-  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
-  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", 
MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2" in {
   def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",  
MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
 }
 
+multiclass BfSingleMultiVector {
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", 
MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>;
+
+  def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, 
"aarch64_sve_f" # name # "_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, 
"aarch64_sve_f" # name # "_x4", [IsStreaming], []>;
+}
+
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in {
   def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "b",  
MergeNone, "aarch64_sve_bfclamp_single_x2",  [IsStreaming], []>;
   def SVBFCLAMP_X4 : SI

[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/104602
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/104603

>From 90f2d48965ca8a27f4b814ada987d169ca6a6f44 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Fri, 16 Aug 2024 11:08:34 -0400
Subject: [PATCH] [libc++] Fix rejects-valid in std::span copy construction
 (#104500)

Trying to copy-construct a std::span from another std::span holding an
incomplete type would fail as we evaluate the SFINAE for the range-based
constructor. The problem was that we checked for __is_std_span after
checking for the range being a contiguous_range, which hard-errored
because of arithmetic on a pointer to incomplete type.

As a drive-by, refactor the whole test and format it.

Fixes #104496

(cherry picked from commit 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b)
---
 libcxx/include/span   |   2 +-
 .../views/views.span/span.cons/copy.pass.cpp  | 126 --
 2 files changed, 86 insertions(+), 42 deletions(-)

diff --git a/libcxx/include/span b/libcxx/include/span
index 60d76d830f0f31..da631cdc3f90e6 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span
@@ -206,10 +206,10 @@ struct __is_std_span> : true_type {};
 
 template 
 concept __span_compatible_range =
+!__is_std_span>::value &&//
 ranges::contiguous_range<_Range> && //
 ranges::sized_range<_Range> &&  //
 (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && //
-!__is_std_span>::value &&//
 !__is_std_array>::value &&   //
 !is_array_v> &&  //
 is_convertible_v> 
(*)[], _ElementType (*)[]>;
diff --git 
a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp 
b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
index 28f13e122ddc5e..d3990fd60a459a 100644
--- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
+++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===--===//
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
 // 
@@ -14,58 +15,101 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_macros.h"
 
-template 
-constexpr bool doCopy(const T &rhs)
-{
-ASSERT_NOEXCEPT(T{rhs});
-T lhs{rhs};
-return lhs.data() == rhs.data()
- &&lhs.size() == rhs.size();
-}
+template 
+constexpr void test() {
+  ASSERT_NOEXCEPT(std::span(std::declval const&>()));
+  ASSERT_NOEXCEPT(std::span{std::declval const&>()});
 
-struct A{};
-
-template 
-void testCV ()
-{
-int  arr[] = {1,2,3};
-assert((doCopy(std::span  ()  )));
-assert((doCopy(std::span()  )));
-assert((doCopy(std::span  (&arr[0], 1;
-assert((doCopy(std::span(&arr[0], 1;
-assert((doCopy(std::span  (&arr[0], 2;
-assert((doCopy(std::span(&arr[0], 2;
+  // dynamic_extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array, 3);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[3] = {};
+std::span x(array, 2);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
+
+  // static extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[2] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
 }
 
+struct Foo {};
+
+constexpr bool test_all() {
+  test();
+  test();
+  test();
+  test();
 
-int main(int, char**)
-{
-constexpr int carr[] = {1,2,3};
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span<  int>  ()),"");
-static_assert(doCopy(std::span<  int,0>()),"");
-static_assert(doCopy(std::span  (&carr[0], 1)), "");
-static_assert(doCopy(std::span(&carr[0], 1)), "");
-static_assert(doCopy(std::span  (&carr[0], 2)), "");
-static_assert(doCopy(std::span(&carr[0], 2)), "");
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span()),   "");
-static_assert(doCopy(std::span()), "");
-static_assert(doCopy(std::span()),  "");
+  // Note: Can't test non-fundamental types with volatile because we require 
`T*` to be indirectly_readable,
+  //   which isn't the case when T is volatile.
+  test();
+  test();
 
-std::string s;
-assert(doCopy(std::span   () ));
-assert(doCopy(std::span() ));
-assert(doCopy(std::span   (&s, 1))

[llvm-branch-commits] [libcxx] 90f2d48 - [libc++] Fix rejects-valid in std::span copy construction (#104500)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Louis Dionne
Date: 2024-08-19T09:17:27+02:00
New Revision: 90f2d48965ca8a27f4b814ada987d169ca6a6f44

URL: 
https://github.com/llvm/llvm-project/commit/90f2d48965ca8a27f4b814ada987d169ca6a6f44
DIFF: 
https://github.com/llvm/llvm-project/commit/90f2d48965ca8a27f4b814ada987d169ca6a6f44.diff

LOG: [libc++] Fix rejects-valid in std::span copy construction (#104500)

Trying to copy-construct a std::span from another std::span holding an
incomplete type would fail as we evaluate the SFINAE for the range-based
constructor. The problem was that we checked for __is_std_span after
checking for the range being a contiguous_range, which hard-errored
because of arithmetic on a pointer to incomplete type.

As a drive-by, refactor the whole test and format it.

Fixes #104496

(cherry picked from commit 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b)

Added: 


Modified: 
libcxx/include/span
libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp

Removed: 




diff  --git a/libcxx/include/span b/libcxx/include/span
index 60d76d830f0f31..da631cdc3f90e6 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span
@@ -206,10 +206,10 @@ struct __is_std_span> : true_type {};
 
 template 
 concept __span_compatible_range =
+!__is_std_span>::value &&//
 ranges::contiguous_range<_Range> && //
 ranges::sized_range<_Range> &&  //
 (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && //
-!__is_std_span>::value &&//
 !__is_std_array>::value &&   //
 !is_array_v> &&  //
 is_convertible_v> 
(*)[], _ElementType (*)[]>;

diff  --git 
a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp 
b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
index 28f13e122ddc5e..d3990fd60a459a 100644
--- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
+++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===--===//
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
 // 
@@ -14,58 +15,101 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_macros.h"
 
-template 
-constexpr bool doCopy(const T &rhs)
-{
-ASSERT_NOEXCEPT(T{rhs});
-T lhs{rhs};
-return lhs.data() == rhs.data()
- &&lhs.size() == rhs.size();
-}
+template 
+constexpr void test() {
+  ASSERT_NOEXCEPT(std::span(std::declval const&>()));
+  ASSERT_NOEXCEPT(std::span{std::declval const&>()});
 
-struct A{};
-
-template 
-void testCV ()
-{
-int  arr[] = {1,2,3};
-assert((doCopy(std::span  ()  )));
-assert((doCopy(std::span()  )));
-assert((doCopy(std::span  (&arr[0], 1;
-assert((doCopy(std::span(&arr[0], 1;
-assert((doCopy(std::span  (&arr[0], 2;
-assert((doCopy(std::span(&arr[0], 2;
+  // dynamic_extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array, 3);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[3] = {};
+std::span x(array, 2);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
+
+  // static extent
+  {
+std::span x;
+std::span copy(x);
+assert(copy.data() == x.data());
+assert(copy.size() == x.size());
+  }
+  {
+T array[3] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 3);
+  }
+  {
+T array[2] = {};
+std::span x(array);
+std::span copy(x);
+assert(copy.data() == array);
+assert(copy.size() == 2);
+  }
 }
 
+struct Foo {};
+
+constexpr bool test_all() {
+  test();
+  test();
+  test();
+  test();
 
-int main(int, char**)
-{
-constexpr int carr[] = {1,2,3};
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span<  int>  ()),"");
-static_assert(doCopy(std::span<  int,0>()),"");
-static_assert(doCopy(std::span  (&carr[0], 1)), "");
-static_assert(doCopy(std::span(&carr[0], 1)), "");
-static_assert(doCopy(std::span  (&carr[0], 2)), "");
-static_assert(doCopy(std::span(&carr[0], 2)), "");
+  test();
+  test();
+  test();
+  test();
 
-static_assert(doCopy(std::span()),   "");
-static_assert(doCopy(std::span()), "");
-static_assert(doCopy(std::span()),  "");
+  // Note: Can't test non-fundamental types with volatile because we require 
`T*` to be indirectly_readable,
+  //   which isn't the case when T is volatile.
+  test();
+  test();
 
-std::string s;
-as

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/104603
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Adopt updated B16B16 target flags (PR #104602)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@SpencerAbson (or anyone else). If you would like to add a note about this fix 
in the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/104602
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/19.x: [libc++] Fix rejects-valid in std::span copy construction (#104500) (PR #104603)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@ldionne (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/104603
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) (PR #101878)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101878

>From b45f75295e3038ef79dce4ac63fbf95b659eebe5 Mon Sep 17 00:00:00 2001
From: Piotr Zegar 
Date: Thu, 25 Jul 2024 17:26:01 +0200
Subject: [PATCH] [clang-tidy] Fix crash in C language in
 readability-non-const-parameter (#100461)

Fix crash that happen when redeclaration got
different number of parameters than definition.

Fixes #100340

(cherry picked from commit a27f816fe56af9cc7f4f296ad6c577f6ea64349f)
---
 .../clang-tidy/readability/NonConstParameterCheck.cpp |  5 -
 clang-tools-extra/docs/ReleaseNotes.rst   |  4 
 .../checkers/readability/non-const-parameter.c| 11 +++
 3 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c

diff --git 
a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp 
b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
index 95a3a5165e2e82..43b69a24bdb16d 100644
--- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
@@ -157,9 +157,12 @@ void NonConstParameterCheck::diagnoseNonConstParameters() {
 if (!Function)
   continue;
 unsigned Index = Par->getFunctionScopeIndex();
-for (FunctionDecl *FnDecl : Function->redecls())
+for (FunctionDecl *FnDecl : Function->redecls()) {
+  if (FnDecl->getNumParams() <= Index)
+continue;
   Fixes.push_back(FixItHint::CreateInsertion(
   FnDecl->getParamDecl(Index)->getBeginLoc(), "const "));
+}
 
 diag(Par->getLocation(), "pointer parameter '%0' can be pointer to const")
 << Par->getName() << Fixes;
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst 
b/clang-tools-extra/docs/ReleaseNotes.rst
index 083b098d05d4ae..71461968629868 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -496,6 +496,10 @@ Changes in existing checks
   ``static_cast``. Fixed false positives in C++20 spaceship operator by 
ignoring
   casts in implicit and defaulted functions.
 
+- Improved :doc:`readability-non-const-parameter
+  ` check to not crash when
+  redeclaration have fewer parameters than expected.
+
 - Improved :doc:`readability-redundant-inline-specifier
   ` check to properly
   emit warnings for static data member with an in-class initializer.
diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c 
b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c
new file mode 100644
index 00..db50467f3dd94e
--- /dev/null
+++ 
b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c
@@ -0,0 +1,11 @@
+// RUN: %check_clang_tidy %s readability-non-const-parameter %t
+
+static int f();
+
+int f(p)
+  int *p;
+// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: pointer parameter 'p' can be 
pointer to const [readability-non-const-parameter]
+// CHECK-FIXES: {{^}}  const int *p;{{$}}
+{
+return *p;
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] b45f752 - [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Piotr Zegar
Date: 2024-08-19T09:18:56+02:00
New Revision: b45f75295e3038ef79dce4ac63fbf95b659eebe5

URL: 
https://github.com/llvm/llvm-project/commit/b45f75295e3038ef79dce4ac63fbf95b659eebe5
DIFF: 
https://github.com/llvm/llvm-project/commit/b45f75295e3038ef79dce4ac63fbf95b659eebe5.diff

LOG: [clang-tidy] Fix crash in C language in readability-non-const-parameter 
(#100461)

Fix crash that happen when redeclaration got
different number of parameters than definition.

Fixes #100340

(cherry picked from commit a27f816fe56af9cc7f4f296ad6c577f6ea64349f)

Added: 
clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c

Modified: 
clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
clang-tools-extra/docs/ReleaseNotes.rst

Removed: 




diff  --git 
a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp 
b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
index 95a3a5165e2e82..43b69a24bdb16d 100644
--- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
@@ -157,9 +157,12 @@ void NonConstParameterCheck::diagnoseNonConstParameters() {
 if (!Function)
   continue;
 unsigned Index = Par->getFunctionScopeIndex();
-for (FunctionDecl *FnDecl : Function->redecls())
+for (FunctionDecl *FnDecl : Function->redecls()) {
+  if (FnDecl->getNumParams() <= Index)
+continue;
   Fixes.push_back(FixItHint::CreateInsertion(
   FnDecl->getParamDecl(Index)->getBeginLoc(), "const "));
+}
 
 diag(Par->getLocation(), "pointer parameter '%0' can be pointer to const")
 << Par->getName() << Fixes;

diff  --git a/clang-tools-extra/docs/ReleaseNotes.rst 
b/clang-tools-extra/docs/ReleaseNotes.rst
index 083b098d05d4ae..71461968629868 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -496,6 +496,10 @@ Changes in existing checks
   ``static_cast``. Fixed false positives in C++20 spaceship operator by 
ignoring
   casts in implicit and defaulted functions.
 
+- Improved :doc:`readability-non-const-parameter
+  ` check to not crash when
+  redeclaration have fewer parameters than expected.
+
 - Improved :doc:`readability-redundant-inline-specifier
   ` check to properly
   emit warnings for static data member with an in-class initializer.

diff  --git 
a/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c 
b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c
new file mode 100644
index 00..db50467f3dd94e
--- /dev/null
+++ 
b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c
@@ -0,0 +1,11 @@
+// RUN: %check_clang_tidy %s readability-non-const-parameter %t
+
+static int f();
+
+int f(p)
+  int *p;
+// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: pointer parameter 'p' can be 
pointer to const [readability-non-const-parameter]
+// CHECK-FIXES: {{^}}  const int *p;{{$}}
+{
+return *p;
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) (PR #101878)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/101878
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in C language in readability-non-const-parameter (#100461) (PR #101878)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@nikic (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/101878
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Ok - I am always open to accept things that improve security unless the risk is 
huge. But it sounds like this will only affect applications that are linking to 
openmp? Do you see any other risks of accepting this now?

https://github.com/llvm/llvm-project/pull/103491
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: Fix codegen of consteval functions returning an empty class, and related issues (#93115) (PR #102070)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Ping on this one. Can someone review this change?

https://github.com/llvm/llvm-project/pull/102070
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Thanks for the discussion. I am going to allow this since it's pretty contained 
and have a big upside for some certain types of users. I think this really 
skirts the line, for the future I hope things like this can hit the main branch 
before the branching.

https://github.com/llvm/llvm-project/pull/102168
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102168

>From 8fbe69a407b2784c7e9d91a3c69daa9786b14391 Mon Sep 17 00:00:00 2001
From: Hari Limaye 
Date: Tue, 6 Aug 2024 11:39:01 +0100
Subject: [PATCH] [AArch64] Add streaming-mode stack hazard optimization
 remarks (#101695)

Emit an optimization remark when objects in the stack frame may cause
hazards in a streaming mode function. The analysis requires either the
`aarch64-stack-hazard-size` or `aarch64-stack-hazard-remark-size` flag
to be set by the user, with the former flag taking precedence.

(cherry picked from commit a98a0dcf63f54c54c5601a34c9f8c10cde0162d6)
---
 .../llvm/CodeGen/TargetFrameLowering.h|   6 +
 llvm/lib/CodeGen/PrologEpilogInserter.cpp |   3 +
 .../Target/AArch64/AArch64FrameLowering.cpp   | 204 +-
 .../lib/Target/AArch64/AArch64FrameLowering.h |   6 +-
 .../AArch64/ssve-stack-hazard-remarks.ll  | 152 +
 .../CodeGen/AArch64/sve-stack-frame-layout.ll |   4 +-
 6 files changed, 364 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll

diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h 
b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0656c0d739fdfa..d8c9d0a432ad8f 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/Support/TypeSize.h"
 #include 
 
@@ -473,6 +474,11 @@ class TargetFrameLowering {
   /// Return the frame base information to be encoded in the DWARF subprogram
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
+
+  /// This method is called at the end of prolog/epilog code insertion, so
+  /// targets can emit remarks based on the final frame layout.
+  virtual void emitRemarks(const MachineFunction &MF,
+   MachineOptimizationRemarkEmitter *ORE) const {};
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp 
b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cd5d877e53d827..f4490873cfdcdb 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
<< ore::NV("Function", MF.getFunction().getName()) << "'";
   });
 
+  // Emit any remarks implemented for the target, based on final frame layout.
+  TFI->emitRemarks(MF, ORE);
+
   delete RS;
   SaveBlocks.clear();
   RestoreBlocks.clear();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd530903bb664a..ba46ededc63a83 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -240,6 +240,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt EnableHomogeneousPrologEpilog(
 // Stack hazard padding size. 0 = disabled.
 static cl::opt StackHazardSize("aarch64-stack-hazard-size",
  cl::init(0), cl::Hidden);
+// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
+static cl::opt
+StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
+  cl::Hidden);
 // Whether to insert padding into non-streaming functions (for testing).
 static cl::opt
 StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const 
MachineFunction &MF,
   const auto &MFI = MF.getFrameInfo();
 
   int64_t ObjectOffset = MFI.getObjectOffset(FI);
+  StackOffset SVEStackSize = getSVEStackSize(MF);
+
+  // For VLA-area objects, just emit an offset at the end of the stack frame.
+  // Whilst not quite correct, these objects do live at the end of the frame 
and
+  // so it is more useful for analysis for the offset to reflect this.
+  if (MFI.isVariableSizedObjectIndex(FI)) {
+return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - 
SVEStackSize;
+  }
 
   // This is correct in the absence of any SVE stack objects.
-  StackOffset SVEStackSize = getSVEStackSize(MF);
   if (!SVEStackSize)
 return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
 
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   return true;
 }
 
-// Return the FrameID for a Load/Store instruction by looking at the MMO.
-static std::optional getLdStFrameID(const MachineInstr &MI,
- const MachineF

[llvm-branch-commits] [llvm] 8fbe69a - [AArch64] Add streaming-mode stack hazard optimization remarks (#101695)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Hari Limaye
Date: 2024-08-19T09:27:19+02:00
New Revision: 8fbe69a407b2784c7e9d91a3c69daa9786b14391

URL: 
https://github.com/llvm/llvm-project/commit/8fbe69a407b2784c7e9d91a3c69daa9786b14391
DIFF: 
https://github.com/llvm/llvm-project/commit/8fbe69a407b2784c7e9d91a3c69daa9786b14391.diff

LOG: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695)

Emit an optimization remark when objects in the stack frame may cause
hazards in a streaming mode function. The analysis requires either the
`aarch64-stack-hazard-size` or `aarch64-stack-hazard-remark-size` flag
to be set by the user, with the former flag taking precedence.

(cherry picked from commit a98a0dcf63f54c54c5601a34c9f8c10cde0162d6)

Added: 
llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll

Modified: 
llvm/include/llvm/CodeGen/TargetFrameLowering.h
llvm/lib/CodeGen/PrologEpilogInserter.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h 
b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0656c0d739fdfa..d8c9d0a432ad8f 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/Support/TypeSize.h"
 #include 
 
@@ -473,6 +474,11 @@ class TargetFrameLowering {
   /// Return the frame base information to be encoded in the DWARF subprogram
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
+
+  /// This method is called at the end of prolog/epilog code insertion, so
+  /// targets can emit remarks based on the final frame layout.
+  virtual void emitRemarks(const MachineFunction &MF,
+   MachineOptimizationRemarkEmitter *ORE) const {};
 };
 
 } // End llvm namespace

diff  --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp 
b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cd5d877e53d827..f4490873cfdcdb 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
<< ore::NV("Function", MF.getFunction().getName()) << "'";
   });
 
+  // Emit any remarks implemented for the target, based on final frame layout.
+  TFI->emitRemarks(MF, ORE);
+
   delete RS;
   SaveBlocks.clear();
   RestoreBlocks.clear();

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd530903bb664a..ba46ededc63a83 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -240,6 +240,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt EnableHomogeneousPrologEpilog(
 // Stack hazard padding size. 0 = disabled.
 static cl::opt StackHazardSize("aarch64-stack-hazard-size",
  cl::init(0), cl::Hidden);
+// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
+static cl::opt
+StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
+  cl::Hidden);
 // Whether to insert padding into non-streaming functions (for testing).
 static cl::opt
 StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const 
MachineFunction &MF,
   const auto &MFI = MF.getFrameInfo();
 
   int64_t ObjectOffset = MFI.getObjectOffset(FI);
+  StackOffset SVEStackSize = getSVEStackSize(MF);
+
+  // For VLA-area objects, just emit an offset at the end of the stack frame.
+  // Whilst not quite correct, these objects do live at the end of the frame 
and
+  // so it is more useful for analysis for the offset to reflect this.
+  if (MFI.isVariableSizedObjectIndex(FI)) {
+return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - 
SVEStackSize;
+  }
 
   // This is correct in the absence of any SVE stack objects.
-  StackOffset SVEStackSize = getSVEStackSize(MF);
   if (!SVEStackSize)
 return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
 
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   return true;
 }
 
-// Return the FrameID for a Load/Store instruction by looking at the MMO.
-static std::optional getLdStFrameID(const MachineInstr &MI,
-

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102168
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: Reland [C++20] [Modules] [Itanium ABI] Generate the vtable in the mod… (#102287) (PR #102561)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Any update on this one?

https://github.com/llvm/llvm-project/pull/102561
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@hazzlim (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/102168
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) (PR #102924)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/102924

>From 6e3026883d77124e32a2a7be72c3361fba3e7457 Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva 
Date: Mon, 12 Aug 2024 09:08:46 +0200
Subject: [PATCH] [clang] Avoid triggering vtable instantiation for C++23
 constexpr dtor (#102605)

In C++23 anything can be constexpr, including a dtor of a class whose
members and bases don't have constexpr dtors. Avoid early triggering of
vtable instantiation int this case.

Fixes https://github.com/llvm/llvm-project/issues/102293

(cherry picked from commit d469794d0cdfd2fea50a6ce0c0e33abb242d744c)
---
 clang/lib/Sema/SemaDeclCXX.cpp  | 29 -
 clang/test/SemaCXX/gh102293.cpp | 22 ++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/SemaCXX/gh102293.cpp

diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 66ca62f5d7c4cd..ecf8754143a49e 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7042,11 +7042,38 @@ void Sema::CheckCompletedCXXClass(Scope *S, 
CXXRecordDecl *Record) {
   }
 }
 
+bool EffectivelyConstexprDestructor = true;
+// Avoid triggering vtable instantiation due to a dtor that is not
+// "effectively constexpr" for better compatibility.
+// See https://github.com/llvm/llvm-project/issues/102293 for more info.
+if (isa(M)) {
+  auto Check = [](QualType T, auto &&Check) -> bool {
+const CXXRecordDecl *RD =
+T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
+if (!RD || !RD->isCompleteDefinition())
+  return true;
+
+if (!RD->hasConstexprDestructor())
+  return false;
+
+for (const CXXBaseSpecifier &B : RD->bases())
+  if (!Check(B.getType(), Check))
+return false;
+for (const FieldDecl *FD : RD->fields())
+  if (!Check(FD->getType(), Check))
+return false;
+return true;
+  };
+  EffectivelyConstexprDestructor =
+  Check(QualType(Record->getTypeForDecl(), 0), Check);
+}
+
 // Define defaulted constexpr virtual functions that override a base class
 // function right away.
 // FIXME: We can defer doing this until the vtable is marked as used.
 if (CSM != CXXSpecialMemberKind::Invalid && !M->isDeleted() &&
-M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods())
+M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods() &&
+EffectivelyConstexprDestructor)
   DefineDefaultedFunction(*this, M, M->getLocation());
 
 if (!Incomplete)
diff --git a/clang/test/SemaCXX/gh102293.cpp b/clang/test/SemaCXX/gh102293.cpp
new file mode 100644
index 00..30629fc03bf6a9
--- /dev/null
+++ b/clang/test/SemaCXX/gh102293.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+template  static void destroy() {
+T t;
+++t;
+}
+
+struct Incomplete;
+
+template  struct HasD {
+  ~HasD() { destroy(); }
+};
+
+struct HasVT {
+  virtual ~HasVT();
+};
+
+struct S : HasVT {
+  HasD<> v;
+};
+

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 6e30268 - [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

Author: Mariya Podchishchaeva
Date: 2024-08-19T09:28:25+02:00
New Revision: 6e3026883d77124e32a2a7be72c3361fba3e7457

URL: 
https://github.com/llvm/llvm-project/commit/6e3026883d77124e32a2a7be72c3361fba3e7457
DIFF: 
https://github.com/llvm/llvm-project/commit/6e3026883d77124e32a2a7be72c3361fba3e7457.diff

LOG: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor 
(#102605)

In C++23 anything can be constexpr, including a dtor of a class whose
members and bases don't have constexpr dtors. Avoid early triggering of
vtable instantiation int this case.

Fixes https://github.com/llvm/llvm-project/issues/102293

(cherry picked from commit d469794d0cdfd2fea50a6ce0c0e33abb242d744c)

Added: 
clang/test/SemaCXX/gh102293.cpp

Modified: 
clang/lib/Sema/SemaDeclCXX.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 66ca62f5d7c4cd..ecf8754143a49e 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7042,11 +7042,38 @@ void Sema::CheckCompletedCXXClass(Scope *S, 
CXXRecordDecl *Record) {
   }
 }
 
+bool EffectivelyConstexprDestructor = true;
+// Avoid triggering vtable instantiation due to a dtor that is not
+// "effectively constexpr" for better compatibility.
+// See https://github.com/llvm/llvm-project/issues/102293 for more info.
+if (isa(M)) {
+  auto Check = [](QualType T, auto &&Check) -> bool {
+const CXXRecordDecl *RD =
+T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
+if (!RD || !RD->isCompleteDefinition())
+  return true;
+
+if (!RD->hasConstexprDestructor())
+  return false;
+
+for (const CXXBaseSpecifier &B : RD->bases())
+  if (!Check(B.getType(), Check))
+return false;
+for (const FieldDecl *FD : RD->fields())
+  if (!Check(FD->getType(), Check))
+return false;
+return true;
+  };
+  EffectivelyConstexprDestructor =
+  Check(QualType(Record->getTypeForDecl(), 0), Check);
+}
+
 // Define defaulted constexpr virtual functions that override a base class
 // function right away.
 // FIXME: We can defer doing this until the vtable is marked as used.
 if (CSM != CXXSpecialMemberKind::Invalid && !M->isDeleted() &&
-M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods())
+M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods() &&
+EffectivelyConstexprDestructor)
   DefineDefaultedFunction(*this, M, M->getLocation());
 
 if (!Incomplete)

diff  --git a/clang/test/SemaCXX/gh102293.cpp b/clang/test/SemaCXX/gh102293.cpp
new file mode 100644
index 00..30629fc03bf6a9
--- /dev/null
+++ b/clang/test/SemaCXX/gh102293.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+template  static void destroy() {
+T t;
+++t;
+}
+
+struct Incomplete;
+
+template  struct HasD {
+  ~HasD() { destroy(); }
+};
+
+struct HasVT {
+  virtual ~HasVT();
+};
+
+struct S : HasVT {
+  HasD<> v;
+};
+



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) (PR #102924)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/102924
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Avoid triggering vtable instantiation for C++23 constexpr dtor (#102605) (PR #102924)

2024-08-19 Thread via llvm-branch-commits

github-actions[bot] wrote:

@llvmbot (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/102924
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)

2024-08-19 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/104745

Backport ebe7265b142f370f0a563fece5db22f57383ba2d

Requested by: @nikic

>From 9263d00e6bcbd1408b4c8c5b98b61332460911b5 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Fri, 16 Aug 2024 14:54:51 -0700
Subject: [PATCH] [Mips] Fix fast isel for i16 bswap. (#103398)

We need to mask the SRL result to 8 bits before ORing in the SLL. This
is needed in case bits 23:16 of the input aren't zero. They will have
been shifted into bits 15:8.

We don't need to AND the result with 0x. It's ok if the upper 16
bits of the register are garbage.

Fixes #103035.

(cherry picked from commit ebe7265b142f370f0a563fece5db22f57383ba2d)
---
 llvm/lib/Target/Mips/MipsFastISel.cpp  | 4 ++--
 llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp 
b/llvm/lib/Target/Mips/MipsFastISel.cpp
index bd8ef43da625c3..64a0e9321598ff 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1608,8 +1608,8 @@ bool MipsFastISel::fastLowerIntrinsicCall(const 
IntrinsicInst *II) {
 }
 emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8);
 emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8);
-emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]);
-emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0x);
+emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[1]).addImm(0xFF);
+emitInst(Mips::OR, DestReg).addReg(TempReg[0]).addReg(TempReg[2]);
 updateValueMap(II, DestReg);
 return true;
   }
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll 
b/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll
index bd762a0e1d741f..ce664c78e86c2a 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll
@@ -21,8 +21,8 @@ define void @b16() {
 
   ; 32R1:   sll   $[[TMP1:[0-9]+]], $[[A_VAL]], 8
   ; 32R1:   srl   $[[TMP2:[0-9]+]], $[[A_VAL]], 8
-  ; 32R1:   or$[[TMP3:[0-9]+]], $[[TMP1]], $[[TMP2]]
-  ; 32R1:   andi  $[[TMP4:[0-9]+]], $[[TMP3]], 65535
+  ; 32R1:   andi  $[[TMP3:[0-9]+]], $[[TMP2]], 255
+  ; 32R1:   or$[[RESULT:[0-9]+]], $[[TMP1]], $[[TMP3]]
 
   ; 32R2:   wsbh  $[[RESULT:[0-9]+]], $[[A_VAL]]
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)

2024-08-19 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/104745
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:

@dtcxzyw What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/104745
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 38c581c8defc81105160a69bb46a9e489b56f10e Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && 

[llvm-branch-commits] [llvm] release/19.x: [SLP]Fix PR104422: Wrong value truncation (PR #104747)

2024-08-19 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/104747

Backport 65ac12d3c9877ecf5b97552364e7eead887d94eb 
56140a8258a3498cfcd9f0f05c182457d43cbfd2

Requested by: @nikic

>From 92cec47736ec4ba1ef33649660a4fc21bdd8e57e Mon Sep 17 00:00:00 2001
From: Alexey Bataev 
Date: Thu, 15 Aug 2024 07:21:10 -0700
Subject: [PATCH 1/2] [SLP][NFC]Add a test with incorrect minbitwidth analysis
 for reduced operands

(cherry picked from commit 65ac12d3c9877ecf5b97552364e7eead887d94eb)
---
 .../X86/operand-is-reduced-val.ll | 46 +++
 1 file changed, 46 insertions(+)
 create mode 100644 
llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
new file mode 100644
index 00..5fb93e27539d8e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s 
-slp-threshold=-10 | FileCheck %s
+
+define i64 @src(i32 %a) {
+; CHECK-LABEL: define i64 @src(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:[[TMP17:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> 
poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:[[TMP3:%.*]] = add <4 x i32> [[TMP2]], 
+; CHECK-NEXT:[[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64>
+; CHECK-NEXT:[[TMP5:%.*]] = and <4 x i32> [[TMP3]], 
+; CHECK-NEXT:[[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64>
+; CHECK-NEXT:[[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x 
i64> [[TMP6]])
+; CHECK-NEXT:[[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x 
i64> [[TMP4]])
+; CHECK-NEXT:[[TMP19:%.*]] = add i64 [[TMP18]], [[TMP16]]
+; CHECK-NEXT:[[OP_RDX1:%.*]] = add i64 [[TMP19]], 4294967297
+; CHECK-NEXT:[[TMP21:%.*]] = add i64 [[OP_RDX1]], [[TMP17]]
+; CHECK-NEXT:ret i64 [[TMP21]]
+;
+entry:
+  %0 = sext i32 %a to i64
+  %1 = add nsw i64 %0, 4294967297
+  %2 = sext i32 %a to i64
+  %3 = add nsw i64 %2, 4294967297
+  %4 = add i64 %3, %1
+  %5 = and i64 %3, 1
+  %6 = add i64 %4, %5
+  %7 = sext i32 %a to i64
+  %8 = add nsw i64 %7, 4294967297
+  %9 = add i64 %8, %6
+  %10 = and i64 %8, 1
+  %11 = add i64 %9, %10
+  %12 = sext i32 %a to i64
+  %13 = add nsw i64 %12, 4294967297
+  %14 = add i64 %13, %11
+  %15 = and i64 %13, 1
+  %16 = add i64 %14, %15
+  %17 = sext i32 %a to i64
+  %18 = add nsw i64 %17, 4294967297
+  %19 = add i64 %18, %16
+  %20 = and i64 %18, 1
+  %21 = add i64 %19, %20
+  ret i64 %21
+}

>From 60b6cb6403168fbb62f1dd79083ac768d747edb9 Mon Sep 17 00:00:00 2001
From: Alexey Bataev 
Date: Thu, 15 Aug 2024 07:57:37 -0700
Subject: [PATCH 2/2] [SLP]Fix PR104422: Wrong value truncation

The minbitwidth restrictions can be skipped only for immediate reduced
values, for other nodes still need to check if external users allow
bitwidth reduction.

Fixes https://github.com/llvm/llvm-project/issues/104422

(cherry picked from commit 56140a8258a3498cfcd9f0f05c182457d43cbfd2)
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp |  3 ++-
 .../SLPVectorizer/X86/operand-is-reduced-val.ll | 17 ++---
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cca9eeebaa53f0..0cddc510d36dac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
   if (any_of(E.Scalars, [&](Value *V) {
 return !all_of(V->users(), [=](User *U) {
   return getTreeEntry(U) ||
- (UserIgnoreList && UserIgnoreList->contains(U)) ||
+ (E.Idx == 0 && UserIgnoreList &&
+  UserIgnoreList->contains(U)) ||
  (!isa(U) && U->getType()->isSized() &&
   !U->getType()->isScalableTy() &&
   DL->getTypeSizeInBits(U->getType()) <= BitWidth);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
index 5fb93e27539d8e..5fcac3fbf3bafe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
@@ -8,15 +8,18 @@ define i64 @src(i32 %a) {
 ; CHECK-NEXT:[[TMP17:%.*]] = sext i32 [[A]] to i64
 ; CHECK-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
 ; CHECK-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> 
poison, <4 x i32> zeroinitializer
-; C

[llvm-branch-commits] [llvm] release/19.x: [SLP]Fix PR104422: Wrong value truncation (PR #104747)

2024-08-19 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/104747
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [SLP]Fix PR104422: Wrong value truncation (PR #104747)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)


Changes

Backport 65ac12d3c9877ecf5b97552364e7eead887d94eb 
56140a8258a3498cfcd9f0f05c182457d43cbfd2

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/104747.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+2-1) 
- (added) llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll 
(+49) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cca9eeebaa53f0..0cddc510d36dac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
   if (any_of(E.Scalars, [&](Value *V) {
 return !all_of(V->users(), [=](User *U) {
   return getTreeEntry(U) ||
- (UserIgnoreList && UserIgnoreList->contains(U)) ||
+ (E.Idx == 0 && UserIgnoreList &&
+  UserIgnoreList->contains(U)) ||
  (!isa(U) && U->getType()->isSized() &&
   !U->getType()->isScalableTy() &&
   DL->getTypeSizeInBits(U->getType()) <= BitWidth);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
new file mode 100644
index 00..5fcac3fbf3bafe
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s 
-slp-threshold=-10 | FileCheck %s
+
+define i64 @src(i32 %a) {
+; CHECK-LABEL: define i64 @src(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:[[TMP17:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:[[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT:[[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> 
poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:[[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
+; CHECK-NEXT:[[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], 
+; CHECK-NEXT:[[TMP6:%.*]] = and <4 x i64> [[TMP4]], 
+; CHECK-NEXT:[[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x 
i64> [[TMP6]])
+; CHECK-NEXT:[[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x 
i64> [[TMP4]])
+; CHECK-NEXT:[[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], 
i32 0
+; CHECK-NEXT:[[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 
[[TMP18]], i32 1
+; CHECK-NEXT:[[TMP10:%.*]] = insertelement <2 x i64> , i64 [[TMP17]], i32 0
+; CHECK-NEXT:[[TMP11:%.*]] = add <2 x i64> [[TMP9]], [[TMP10]]
+; CHECK-NEXT:[[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT:[[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT:[[TMP21:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:ret i64 [[TMP21]]
+;
+entry:
+  %0 = sext i32 %a to i64
+  %1 = add nsw i64 %0, 4294967297
+  %2 = sext i32 %a to i64
+  %3 = add nsw i64 %2, 4294967297
+  %4 = add i64 %3, %1
+  %5 = and i64 %3, 1
+  %6 = add i64 %4, %5
+  %7 = sext i32 %a to i64
+  %8 = add nsw i64 %7, 4294967297
+  %9 = add i64 %8, %6
+  %10 = and i64 %8, 1
+  %11 = add i64 %9, %10
+  %12 = sext i32 %a to i64
+  %13 = add nsw i64 %12, 4294967297
+  %14 = add i64 %13, %11
+  %15 = and i64 %13, 1
+  %16 = add i64 %14, %15
+  %17 = sext i32 %a to i64
+  %18 = add nsw i64 %17, 4294967297
+  %19 = add i64 %18, %16
+  %20 = and i64 %18, 1
+  %21 = add i64 %19, %20
+  ret i64 %21
+}

``




https://github.com/llvm/llvm-project/pull/104747
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] Add some brief LLVM 19 release notes for Pointer Authentication ABI support (PR #104657)

2024-08-19 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/104657
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [llvm][CodeGen] Address the issue discovered In window scheduling (#101665) (PR #102881)

2024-08-19 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/102881
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)

2024-08-19 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/102741
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AArch64][ARM] Add a release note about _BitInt (PR #101521)

2024-08-19 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/101521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov created 
https://github.com/llvm/llvm-project/pull/104748

WIP I will be adding unit tests and I am considering if we should have 
integrations tests for the entire omp.workshare pipeline.

>From 793ae50dd00c4347bea78ca6ecd33783c69de354 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 1/5] Add workshare loop wrapper lowerings

---
 .../lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp  |  6 --
 .../HLFIR/Transforms/OptimizedBufferization.cpp| 10 +++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index b608677c526310..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,12 +26,13 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 namespace hlfir {
@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));
 auto insPt = builder.saveInsertionPoint();
 builder.setInsertionPointToStart(loopNest.body);
 auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c4aed6b79df923..150e3e91197241 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult 
ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
 loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult 
BroadcastAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto arrayElement =
   hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
@@ -648,7 +651,8 @@ llvm::LogicalResult 
VariableAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.body);
   auto rhsArrayElement =
   hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);

>From d7ba8a1598f517a5a3c8401d22b81b50114112f1 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Mon, 19 Aug 2024 15:01:31 +0900
Subject: [PATCH 2/5] Bufferize test

---
 flang/test/HLFIR/bufferize-workshare.fir | 58 
 1 file changed, 58 insertions(+)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir

diff --git a/flang/test/HLFIR/bufferize-workshare.fir 
b/flang/test/HLFIR/bufferize-workshare.fir
new file mode 100644
index 00..86a2f031478dd7
--- /dev

[llvm-branch-commits] [llvm] release/19.x: [Mips] Fix fast isel for i16 bswap. (#103398) (PR #104745)

2024-08-19 Thread Yingwei Zheng via llvm-branch-commits

https://github.com/dtcxzyw approved this pull request.


https://github.com/llvm/llvm-project/pull/104745
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/104748
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101443
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Introduce custom HLFIR lowering for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Ivan R. Ivanov (ivanradanov)


Changes

1/4 in stack for workshare implementation

1/4 https://github.com/llvm/llvm-project/pull/101443
2/4 https://github.com/llvm/llvm-project/pull/101444
3/4 https://github.com/llvm/llvm-project/pull/101445
4/4 https://github.com/llvm/llvm-project/pull/101446



---
Full diff: https://github.com/llvm/llvm-project/pull/101443.diff


5 Files Affected:

- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h (+2) 
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+43) 
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+23) 
- (modified) mlir/test/Dialect/OpenMP/invalid.mlir (+42) 
- (modified) mlir/test/Dialect/OpenMP/ops.mlir (+69) 


``diff
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..896ca9581c3fc8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -316,6 +316,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+
+This operation is used for the intermediate representation of the workshare
+block before the work gets divided between the threads. See the flang
+LowerWorkshare pass for details.
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+}
+
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+  let description = [{
+This operation wraps a loop nest that is marked for dividing into units of
+work by an encompassing omp.workshare operation.
+  }];
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+  let assemblyFormat = "$region attr-dict";
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..90f9a19ebe32b5 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1683,6 +1683,29 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+//===--===//
+// WorkshareLoopWrapperOp
+//===--===//
+
+LogicalResult WorkshareLoopWrapperOp::verify() {
+  if (!isWrapper())
+return emitOpError() << "must be a loop wrapper";
+  if (getNestedWrapper())
+return emitError() << "nested wrappers not supported";
+  if (!(*this)->getParentOfType())
+return emitError() << "must be nested in an omp.workshare";
+  return success();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir 
b/mlir/test/Dialect/OpenMP/invalid.mlir
index 1d1d93f0977588..ee7c448c467cf5 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2383,3 +2383,45 @@ func.func @masked_arg_count_mismatch(%arg0: i32, %arg1: 
i32) {
 }) : (i32, i32) -> ()
   return
 }
+
+// -
+func.func @nested_wrapper(%idx : index) {
+  omp.workshare {
+// expected-error @below {{nested wrappers not supported}}
+omp.workshare_loop_wrapper {
+  omp.simd {
+omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
+  omp.yield
+}
+omp.terminator
+  }
+  omp.terminator
+}
+omp.terminator
+  }
+  return
+}
+
+// -
+func.func @not_wrapper() {
+  omp.workshare {
+// expected-error @below {{must be a loop wrapper}}
+omp.workshare_loop_w

[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-openmp

Author: Ivan R. Ivanov (ivanradanov)


Changes

1/4 in stack for workshare implementation

1/4 https://github.com/llvm/llvm-project/pull/101443
2/4 https://github.com/llvm/llvm-project/pull/101444
3/4 https://github.com/llvm/llvm-project/pull/101445
4/4 https://github.com/llvm/llvm-project/pull/101446



---
Full diff: https://github.com/llvm/llvm-project/pull/101443.diff


5 Files Affected:

- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h (+2) 
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+43) 
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+23) 
- (modified) mlir/test/Dialect/OpenMP/invalid.mlir (+42) 
- (modified) mlir/test/Dialect/OpenMP/ops.mlir (+69) 


``diff
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..896ca9581c3fc8 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -316,6 +316,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+
+This operation is used for the intermediate representation of the workshare
+block before the work gets divided between the threads. See the flang
+LowerWorkshare pass for details.
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+}
+
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+  let description = [{
+This operation wraps a loop nest that is marked for dividing into units of
+work by an encompassing omp.workshare operation.
+  }];
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+  let assemblyFormat = "$region attr-dict";
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..90f9a19ebe32b5 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1683,6 +1683,29 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+//===--===//
+// WorkshareLoopWrapperOp
+//===--===//
+
+LogicalResult WorkshareLoopWrapperOp::verify() {
+  if (!isWrapper())
+return emitOpError() << "must be a loop wrapper";
+  if (getNestedWrapper())
+return emitError() << "nested wrappers not supported";
+  if (!(*this)->getParentOfType())
+return emitError() << "must be nested in an omp.workshare";
+  return success();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir 
b/mlir/test/Dialect/OpenMP/invalid.mlir
index 1d1d93f0977588..ee7c448c467cf5 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2383,3 +2383,45 @@ func.func @masked_arg_count_mismatch(%arg0: i32, %arg1: 
i32) {
 }) : (i32, i32) -> ()
   return
 }
+
+// -
+func.func @nested_wrapper(%idx : index) {
+  omp.workshare {
+// expected-error @below {{nested wrappers not supported}}
+omp.workshare_loop_wrapper {
+  omp.simd {
+omp.loop_nest (%iv) : index = (%idx) to (%idx) step (%idx) {
+  omp.yield
+}
+omp.terminator
+  }
+  omp.terminator
+}
+omp.terminator
+  }
+  return
+}
+
+// -
+func.func @not_wrapper() {
+  omp.workshare {
+// expected-error @below {{must be a loop wrapper}}
+omp.workshare_loop_wr

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Ivan R. Ivanov (ivanradanov)


Changes

2/4

1/4 https://github.com/llvm/llvm-project/pull/101443
2/4 https://github.com/llvm/llvm-project/pull/101444
3/4 https://github.com/llvm/llvm-project/pull/101445
4/4 https://github.com/llvm/llvm-project/pull/101446



---
Full diff: https://github.com/llvm/llvm-project/pull/101444.diff


2 Files Affected:

- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+26-4) 
- (modified) flang/test/Lower/OpenMP/workshare.f90 (+3-3) 


``diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4f..f7bc565ea8cbc1 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs
diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

``




https://github.com/llvm/llvm-project/pull/101444
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov edited 
https://github.com/llvm/llvm-project/pull/101445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Ivan R. Ivanov (ivanradanov)


Changes

3/4

1/4 https://github.com/llvm/llvm-project/pull/101443
2/4 https://github.com/llvm/llvm-project/pull/101444
3/4 https://github.com/llvm/llvm-project/pull/101445
4/4 https://github.com/llvm/llvm-project/pull/101446

This alternative loop nest generation is used in 4/4 for the workshare lowering.



---
Full diff: https://github.com/llvm/llvm-project/pull/101445.diff


7 Files Affected:

- (modified) flang/include/flang/Optimizer/Builder/HLFIRTools.h (+7-5) 
- (modified) flang/lib/Lower/ConvertCall.cpp (+1-1) 
- (modified) flang/lib/Lower/OpenMP/ReductionProcessor.cpp (+2-2) 
- (modified) flang/lib/Optimizer/Builder/HLFIRTools.cpp (+39-13) 
- (modified) flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp (+2-1) 
- (modified) 
flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp (+15-15) 
- (modified) flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
(+3-3) 


``diff
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..f073f494b3fb21 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp = nullptr;
+  mlir::Block *body = nullptr;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWorkshareLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWorkshareLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWorkshareLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..31378841ed 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder 

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov ready_for_review 
https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


@@ -2,3 +2,4 @@ add_subdirectory(CodeGen)
 add_subdirectory(Dialect)
 add_subdirectory(HLFIR)
 add_subdirectory(Transforms)
+add_subdirectory(OpenMP)

ivanradanov wrote:

PR for this up here https://github.com/llvm/llvm-project/pull/104732

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Introduce custom loop nest generation for loops in workshare construct (PR #101445)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101445

>From 5e470922405b735d63b4aded76450cc52e94e003 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:12:34 +0900
Subject: [PATCH 1/4] [flang] Introduce ws loop nest generation for HLFIR
 lowering

---
 .../flang/Optimizer/Builder/HLFIRTools.h  | 12 +++--
 flang/lib/Lower/ConvertCall.cpp   |  2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  4 +-
 flang/lib/Optimizer/Builder/HLFIRTools.cpp| 52 ++-
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |  3 +-
 .../LowerHLFIROrderedAssignments.cpp  | 30 +--
 .../Transforms/OptimizedBufferization.cpp |  6 +--
 7 files changed, 69 insertions(+), 40 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h 
b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6b41025eea0780..14e42c6f358e46 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp(
 
 /// Structure to describe a loop nest.
 struct LoopNest {
-  fir::DoLoopOp outerLoop;
-  fir::DoLoopOp innerLoop;
+  mlir::Operation *outerOp;
+  mlir::Block *body;
   llvm::SmallVector oneBasedIndices;
 };
 
@@ -366,11 +366,13 @@ struct LoopNest {
 /// \p isUnordered specifies whether the loops in the loop nest
 /// are unordered.
 LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
- mlir::ValueRange extents, bool isUnordered = false);
+ mlir::ValueRange extents, bool isUnordered = false,
+ bool emitWsLoop = false);
 inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-mlir::Value shape, bool isUnordered = false) {
+mlir::Value shape, bool isUnordered = false,
+bool emitWsLoop = false) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
- isUnordered);
+ isUnordered, emitWsLoop);
 }
 
 /// Inline the body of an hlfir.elemental at the current insertion point
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fd873f55dd844e..0689d6e033dd9c 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -2128,7 +2128,7 @@ class ElementalCallBuilder {
   hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered);
   mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
   auto insPt = builder.saveInsertionPoint();
-  builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+  builder.setInsertionPointToStart(loopNest.body);
   callContext.stmtCtx.pushScope();
   for (auto &preparedActual : loweredActuals)
 if (preparedActual)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp 
b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index c3c1f363033c27..72a90dd0d6f29d 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   // know this won't miss any opportuinties for clever elemental inlining
   hlfir::LoopNest nest = hlfir::genLoopNest(
   loc, builder, shapeShift.getExtents(), /*isUnordered=*/true);
-  builder.setInsertionPointToStart(nest.innerLoop.getBody());
+  builder.setInsertionPointToStart(nest.body);
   mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
   auto lhsEleAddr = builder.create(
   loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
@@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, 
mlir::Location loc,
   builder, loc, redId, refTy, lhsEle, rhsEle);
   builder.create(loc, scalarReduction, lhsEleAddr);
 
-  builder.setInsertionPointAfter(nest.outerLoop);
+  builder.setInsertionPointAfter(nest.outerOp);
   builder.create(loc, lhsAddr);
 }
 
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp 
b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8d0ae2f195178c..cd07cb741eb4bb 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -20,6 +20,7 @@
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include 
 #include 
 
 // Return explicit extents. If the base is a fir.box, this won't read it to
@@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp(
 
 hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
-   mlir::ValueRange extents, bool isUnordered) 
{
+   mlir::ValueRange extents, bool isUnordered,
+   bool emitWsLoop) {
   hlfir::LoopNest loopNest;
   assert(!extents.empty() && 

[llvm-branch-commits] [flang] [flang][omp] Emit omp.workshare in frontend (PR #101444)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101444

>From 63d49e4dcd128b470ee77006c594673203dd2df2 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:11:47 +0900
Subject: [PATCH 1/2] [flang][omp] Emit omp.workshare in frontend

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 2b1839b5270d4f..f7bc565ea8cbc1 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter 
&converter,
   loc, llvm::omp::Directive::OMPD_taskwait);
 }
 
+static void genWorkshareClauses(lower::AbstractConverter &converter,
+semantics::SemanticsContext &semaCtx,
+lower::StatementContext &stmtCtx,
+const List &clauses, mlir::Location 
loc,
+mlir::omp::WorkshareOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processNowait(clauseOps);
+}
+
 static void genTeamsClauses(lower::AbstractConverter &converter,
 semantics::SemanticsContext &semaCtx,
 lower::StatementContext &stmtCtx,
@@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return converter.getFirOpBuilder().create(loc);
 }
 
+static mlir::omp::WorkshareOp
+genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+   semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+   mlir::Location loc, const ConstructQueue &queue,
+   ConstructQueue::iterator item) {
+  lower::StatementContext stmtCtx;
+  mlir::omp::WorkshareOperands clauseOps;
+  genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 
clauseOps);
+
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workshare)
+  .setClauses(&item->clauses),
+  queue, item, clauseOps);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   llvm::omp::getOpenMPDirectiveName(dir) + ")");
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
-// FIXME: Workshare is not a commonly used OpenMP construct, an
-// implementation for this feature will come later. For the codes
-// that use this construct, add a single construct for now.
-genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
+genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
 break;
 
   // Composite constructs

>From 621b01775171a4718fa405f201b58c3dca005e5a Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 16:02:37 +0900
Subject: [PATCH 2/2] Fix lower test for workshare

---
 flang/test/Lower/OpenMP/workshare.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/test/Lower/OpenMP/workshare.f90 
b/flang/test/Lower/OpenMP/workshare.f90
index 1e11677a15e1f0..8e771952f5b6da 100644
--- a/flang/test/Lower/OpenMP/workshare.f90
+++ b/flang/test/Lower/OpenMP/workshare.f90
@@ -6,7 +6,7 @@ subroutine sb1(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single  {
+!CHECK: omp.workshare {
   !$omp workshare
 arr = 0
   !$omp end workshare
@@ -20,7 +20,7 @@ subroutine sb2(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
   !$omp parallel
-!CHECK: omp.single nowait {
+!CHECK: omp.workshare nowait {
   !$omp workshare
 arr = 0
   !$omp end workshare nowait
@@ -33,7 +33,7 @@ subroutine sb2(arr)
 subroutine sb3(arr)
   integer :: arr(:)
 !CHECK: omp.parallel  {
-!CHECK: omp.single  {
+!CHECK: omp.workshare  {
   !$omp parallel workshare
 arr = 0
   !$omp end parallel workshare

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][omp] Add omp.workshare op (PR #101443)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101443

>From 604b0293e0574e9d697d4071c2b853a5a27af1e1 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Wed, 31 Jul 2024 14:09:09 +0900
Subject: [PATCH 1/7] [MLIR][omp] Add omp.workshare op

---
 .../Dialect/OpenMP/OpenMPClauseOperands.h |  3 +++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 22 +++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 13 +++
 3 files changed, 38 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index 38e4d8f245e4fa..d14e5e17afbb08 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -17,6 +17,7 @@
 
 #include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/ADT/SmallVector.h"
+#include 
 
 #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc"
 
@@ -316,6 +317,8 @@ using TeamsOperands =
 detail::Clauses;
 
+using WorkshareOperands = detail::Clauses;
+
 using WsloopOperands =
 detail::Clauses {
+  let summary = "workshare directive";
+  let description = [{
+The workshare construct divides the execution of the enclosed structured
+block into separate units of work, and causes the threads of the team to
+share the work such that each unit is executed only once by one thread, in
+the context of its implicit task
+  }] # clausesDescription;
+
+  let builders = [
+OpBuilder<(ins CArg<"const WorkshareOperands &">:$clauses)>
+  ];
+
+  let hasVerifier = 1;
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..9a189eb2059e01 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1683,6 +1683,19 @@ LogicalResult SingleOp::verify() {
   getCopyprivateSyms());
 }
 
+//===--===//
+// WorkshareOp
+//===--===//
+
+void WorkshareOp::build(OpBuilder &builder, OperationState &state,
+const WorkshareOperands &clauses) {
+  WorkshareOp::build(builder, state, clauses.nowait);
+}
+
+LogicalResult WorkshareOp::verify() {
+  return (*this)->getRegion(0).getBlocks().size() == 1 ? success() : failure();
+}
+
 
//===--===//
 // WsloopOp
 
//===--===//

>From f2fd4f278c23ec99dae3ac44e1c05fcb629f707d Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:10:25 +0900
Subject: [PATCH 2/7] Add custom omp loop wrapper

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5199ff50abb959..76f0c472cfdb14 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [
   let hasVerifier = 1;
 }
 
+def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [
+DeclareOpInterfaceMethods,
+RecursiveMemoryEffects, SingleBlock
+  ], singleRegion = true> {
+  let summary = "contains loop nests to be parallelized by workshare";
+
+  let builders = [
+OpBuilder<(ins), [{ build($_builder, $_state, {}); }]>
+  ];
+}
+
 
//===--===//
 // Loop Nest
 
//===--===//

>From 22c66e6db3997e38254d9848661a38627cd7bb19 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Fri, 2 Aug 2024 16:08:58 +0900
Subject: [PATCH 3/7] Add recursive memory effects trait to workshare

---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 76f0c472cfdb14..7d1c80333855e7 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [
 // 2.8.3 Workshare Construct
 
//===--===//
 
-def WorkshareOp : OpenMP_Op<"workshare", clauses = [
+def WorkshareOp : OpenMP_Op<"workshare", traits = [
+RecursiveMemoryEffects,
+  ], clauses = [
 OpenMP_NowaitClause,
   ], singleRegion = true> {
   let summar

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-08-19 Thread Ivan R. Ivanov via llvm-branch-commits


@@ -344,6 +345,7 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  pm.addPass(flangomp::createLowerWorkshare());

ivanradanov wrote:

I opted to keep the rest of the openmp passes as they are and have added a bool 
argument to control whether to run the lower-workshare pass

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Can we get a review on this so that it can be included before final is done if 
it should be?

https://github.com/llvm/llvm-project/pull/102741
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [BOLT] Fix relocations handling (PR #102741)

2024-08-19 Thread Vladislav Khmelevsky via llvm-branch-commits

yota9 wrote:

I hope someone can approve this, as it is a major fix 

https://github.com/llvm/llvm-project/pull/102741
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm created 
https://github.com/llvm/llvm-project/pull/104752

This adds a check that all ExtensionWithMArch which are marked as implied 
features for an architecture are also present in the list of default features. 
It doesn't make sense to have something mandatory but not on by default.

There were a number of existing cases that violated this rule, and some changes 
to which features are mandatory (indicated by the Implies field).

This resulted in a bug where if a feature was marked as `Implies` but was not 
added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would 
consider `feat` to have never been added and therefore would do nothing to 
disable it (no `-target-feature -feat` would be added, but the backend would 
enable the feature by default because of `Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture 
DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the 
Architecture definition, the feature needs to be added to all processor 
definitions (that are based on that architecture) in order to preserve the 
existing behaviour. I have checked the TRMs for many cases (see specific commit 
messages) but in other cases I have just kept the current behaviour and not 
tried to fix it.

>From b523150d05242d9e00dc2dcf1694a1cf7dde088f Mon Sep 17 00:00:00 2001
From: Tomas Matheson 
Date: Sat, 17 Aug 2024 13:36:40 +0100
Subject: [PATCH] [AArch64] Add a check for invalid default features (#104435)

This adds a check that all ExtensionWithMArch which are marked as
implied features for an architecture are also present in the list of
default features. It doesn't make sense to have something mandatory but
not on by default.

There were a number of existing cases that violated this rule, and some
changes to which features are mandatory (indicated by the Implies
field).

This resulted in a bug where if a feature was marked as `Implies` but
was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the
Driver would consider `feat` to have never been added and therefore
would do nothing to disable it (no `-target-feature -feat` would be
added, but the backend would enable the feature by default because of
`Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture
DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on
the Architecture definition, the feature needs to be added to all
processor definitions (that are based on that architecture) in order to
preserve the existing behaviour. I have checked the TRMs for many cases
(see specific commit messages) but in other cases I have just kept the
current behaviour and not tried to fix it.
---
 clang/test/CodeGen/aarch64-targetattr.c   | 12 +--
 ...-negative-modifiers-for-default-features.c | 12 +++
 clang/test/Driver/arm-sb.c|  2 +-
 .../aarch64-apple-a12.c   |  1 -
 .../aarch64-apple-a13.c   |  1 -
 .../aarch64-apple-a14.c   |  1 -
 .../aarch64-apple-a15.c   |  1 -
 .../aarch64-apple-a16.c   |  1 -
 .../aarch64-apple-a17.c   |  1 -
 .../aarch64-apple-m4.c|  2 -
 .../aarch64-cortex-r82.c  |  1 -
 .../aarch64-cortex-r82ae.c|  1 -
 llvm/lib/Target/AArch64/AArch64Features.td| 19 ++--
 llvm/lib/Target/AArch64/AArch64Processors.td  | 46 +++--
 llvm/test/MC/AArch64/arm64-system-encoding.s  |  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs-error.s|  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs.s  |  2 +-
 .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt |  2 +-
 .../AArch64/basic-a64-instructions.txt|  2 +-
 .../TargetParser/TargetParserTest.cpp | 97 +++
 llvm/utils/TableGen/ARMTargetDefEmitter.cpp   | 32 +-
 21 files changed, 156 insertions(+), 84 deletions(-)
 create mode 100644 
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c

diff --git a/clang/test/CodeGen/aarch64-targetattr.c 
b/clang/test/CodeGen/aarch64-targetattr.c
index 4f891f938b6186..d6227be2ebef83 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -195,19 +195,19 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a"
 }
 // CHECK: attributes #[[AT

[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Tomas Matheson (tmatheson-arm)


Changes

This adds a check that all ExtensionWithMArch which are marked as implied 
features for an architecture are also present in the list of default features. 
It doesn't make sense to have something mandatory but not on by default.

There were a number of existing cases that violated this rule, and some changes 
to which features are mandatory (indicated by the Implies field).

This resulted in a bug where if a feature was marked as `Implies` but was not 
added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would 
consider `feat` to have never been added and therefore would do nothing to 
disable it (no `-target-feature -feat` would be added, but the backend would 
enable the feature by default because of `Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture 
DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the 
Architecture definition, the feature needs to be added to all processor 
definitions (that are based on that architecture) in order to preserve the 
existing behaviour. I have checked the TRMs for many cases (see specific commit 
messages) but in other cases I have just kept the current behaviour and not 
tried to fix it.

---

Patch is 66.28 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/104752.diff


21 Files Affected:

- (modified) clang/test/CodeGen/aarch64-targetattr.c (+6-6) 
- (added) clang/test/Driver/aarch64-negative-modifiers-for-default-features.c 
(+12) 
- (modified) clang/test/Driver/arm-sb.c (+1-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c (-2) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c 
(-1) 
- (modified) llvm/lib/Target/AArch64/AArch64Features.td (+9-10) 
- (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+37-9) 
- (modified) llvm/test/MC/AArch64/arm64-system-encoding.s (+1-1) 
- (modified) llvm/test/MC/AArch64/armv8.5a-ssbs-error.s (+1-1) 
- (modified) llvm/test/MC/AArch64/armv8.5a-ssbs.s (+1-1) 
- (modified) llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt (+1-1) 
- (modified) llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt 
(+1-1) 
- (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+57-40) 
- (modified) llvm/utils/TableGen/ARMTargetDefEmitter.cpp (+29-3) 


``diff
diff --git a/clang/test/CodeGen/aarch64-targetattr.c 
b/clang/test/CodeGen/aarch64-targetattr.c
index 4f891f938b6186..d6227be2ebef83 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -195,19 +195,19 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a"
 }
 // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a"
 }
-// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"
 }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="cortex-a710" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a"
 }
+// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,

[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)

2024-08-19 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mc

Author: Tomas Matheson (tmatheson-arm)


Changes

This adds a check that all ExtensionWithMArch which are marked as implied 
features for an architecture are also present in the list of default features. 
It doesn't make sense to have something mandatory but not on by default.

There were a number of existing cases that violated this rule, and some changes 
to which features are mandatory (indicated by the Implies field).

This resulted in a bug where if a feature was marked as `Implies` but was not 
added to `DefaultExt`, then for `-march=base_arch+nofeat` the Driver would 
consider `feat` to have never been added and therefore would do nothing to 
disable it (no `-target-feature -feat` would be added, but the backend would 
enable the feature by default because of `Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture 
DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on the 
Architecture definition, the feature needs to be added to all processor 
definitions (that are based on that architecture) in order to preserve the 
existing behaviour. I have checked the TRMs for many cases (see specific commit 
messages) but in other cases I have just kept the current behaviour and not 
tried to fix it.

---

Patch is 66.28 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/104752.diff


21 Files Affected:

- (modified) clang/test/CodeGen/aarch64-targetattr.c (+6-6) 
- (added) clang/test/Driver/aarch64-negative-modifiers-for-default-features.c 
(+12) 
- (modified) clang/test/Driver/arm-sb.c (+1-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c (-2) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c 
(-1) 
- (modified) clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c 
(-1) 
- (modified) llvm/lib/Target/AArch64/AArch64Features.td (+9-10) 
- (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+37-9) 
- (modified) llvm/test/MC/AArch64/arm64-system-encoding.s (+1-1) 
- (modified) llvm/test/MC/AArch64/armv8.5a-ssbs-error.s (+1-1) 
- (modified) llvm/test/MC/AArch64/armv8.5a-ssbs.s (+1-1) 
- (modified) llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt (+1-1) 
- (modified) llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt 
(+1-1) 
- (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+57-40) 
- (modified) llvm/utils/TableGen/ARMTargetDefEmitter.cpp (+29-3) 


``diff
diff --git a/clang/test/CodeGen/aarch64-targetattr.c 
b/clang/test/CodeGen/aarch64-targetattr.c
index 4f891f938b6186..d6227be2ebef83 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -195,19 +195,19 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a"
 }
 // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a"
 }
-// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"
 }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="cortex-a710" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a"
 }
+// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp

[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm milestoned 
https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (#104435) (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm edited 
https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm edited 
https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm edited 
https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread Jonathan Thackray via llvm-branch-commits

https://github.com/jthackray approved this pull request.

LGTM. This fix should definitely be in llvm19.

https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread David Spickett via llvm-branch-commits

DavidSpickett wrote:

This needs a summary on the impact of not including the change, for folks who 
are not familiar with Arm's extension details.

> This resulted in a bug where if a feature was marked as Implies but was not 
> added to DefaultExt, then for -march=base_arch+nofeat the Driver would 
> consider feat to have never been added and therefore would do nothing to 
> disable it (no -target-feature -feat would be added, but the backend would 
> enable the feature by default because of Implies). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

So I think the impact is:
This could result in a binary including instructions from extensions that the 
user has explicitly requested be disabled. This binary will fault at runtime on 
hardware that does not have these extensions.

https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread David Spickett via llvm-branch-commits

DavidSpickett wrote:

> This adds a check that all ExtensionWithMArch which are marked as implied 
> features for an architecture are also present in the list of default features.

And do I understand correctly that though this PR is titled "Add a check", it 
also fixes instances that the check discovered? So the backport is primarily to 
include those fixes rather than the check itself.

https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

tmatheson-arm wrote:

Yes both of those are correct.

https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Add a check for invalid default features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm edited 
https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Fix a bug where user could not disable certain architecture features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm edited 
https://github.com/llvm/llvm-project/pull/104752
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)

2024-08-19 Thread Sharadh Rajaraman via llvm-branch-commits

https://github.com/sharadhr updated 
https://github.com/llvm/llvm-project/pull/102438

>From 909706ce5474c40eeb6355233c891cd0fd335347 Mon Sep 17 00:00:00 2001
From: Sharadh Rajaraman 
Date: Mon, 19 Aug 2024 12:17:58 +0100
Subject: [PATCH] [clang][driver] `TY_ModuleFile` should be a 'CXX' file type

---
 clang/lib/Driver/Types.cpp  | 4 +++-
 clang/test/Driver/cl-cxx20-modules.cppm | 8 
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index a7b6b9000e1d2b..2b9b391c19c9fd 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -242,7 +242,9 @@ bool types::isCXX(ID Id) {
   case TY_CXXHUHeader:
   case TY_PP_CXXHeaderUnit:
   case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
-  case TY_CXXModule: case TY_PP_CXXModule:
+  case TY_CXXModule:
+  case TY_PP_CXXModule:
+  case TY_ModuleFile:
   case TY_PP_CLCXX:
   case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
   case TY_HIP:
diff --git a/clang/test/Driver/cl-cxx20-modules.cppm 
b/clang/test/Driver/cl-cxx20-modules.cppm
index 06df929c42342f..43dbf517485a05 100644
--- a/clang/test/Driver/cl-cxx20-modules.cppm
+++ b/clang/test/Driver/cl-cxx20-modules.cppm
@@ -1,3 +1,6 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
 // RUN: %clang_cl /std:c++20 --precompile -### -- %s 2>&1 | FileCheck 
--check-prefix=PRECOMPILE %s
 // PRECOMPILE: -emit-module-interface
 
@@ -6,3 +9,8 @@
 
 // RUN: %clang_cl /std:c++20 --fprebuilt-module-path=. -### -- %s 2>&1 | 
FileCheck --check-prefix=FPREBUILT %s
 // FPREBUILT: -fprebuilt-module-path=.
+
+// RUN: %clang_cl %t/test.pcm /std:c++20 -### 2>&1 | FileCheck 
--check-prefix=CPP20WARNING %t/test.pcm
+
+//--- test.pcm
+// CPP20WARNING-NOT: clang-cl: warning: argument unused during compilation: 
'/std:c++20' [-Wunused-command-line-argument]

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)

2024-08-19 Thread Sharadh Rajaraman via llvm-branch-commits

sharadhr wrote:

@tru, thanks for the feedback; I hope this is better. 

https://github.com/llvm/llvm-project/pull/102438
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][driver][clang-cl] Fix unused argument warning for `/std:c++20` for precompiled module inputs to `clang-cl` (PR #102438)

2024-08-19 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Looks much better. I'll merge it later. 

https://github.com/llvm/llvm-project/pull/102438
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Fix a bug where user could not disable certain architecture features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm updated 
https://github.com/llvm/llvm-project/pull/104752

>From b523150d05242d9e00dc2dcf1694a1cf7dde088f Mon Sep 17 00:00:00 2001
From: Tomas Matheson 
Date: Sat, 17 Aug 2024 13:36:40 +0100
Subject: [PATCH 1/2] [AArch64] Add a check for invalid default features
 (#104435)

This adds a check that all ExtensionWithMArch which are marked as
implied features for an architecture are also present in the list of
default features. It doesn't make sense to have something mandatory but
not on by default.

There were a number of existing cases that violated this rule, and some
changes to which features are mandatory (indicated by the Implies
field).

This resulted in a bug where if a feature was marked as `Implies` but
was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the
Driver would consider `feat` to have never been added and therefore
would do nothing to disable it (no `-target-feature -feat` would be
added, but the backend would enable the feature by default because of
`Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture
DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on
the Architecture definition, the feature needs to be added to all
processor definitions (that are based on that architecture) in order to
preserve the existing behaviour. I have checked the TRMs for many cases
(see specific commit messages) but in other cases I have just kept the
current behaviour and not tried to fix it.
---
 clang/test/CodeGen/aarch64-targetattr.c   | 12 +--
 ...-negative-modifiers-for-default-features.c | 12 +++
 clang/test/Driver/arm-sb.c|  2 +-
 .../aarch64-apple-a12.c   |  1 -
 .../aarch64-apple-a13.c   |  1 -
 .../aarch64-apple-a14.c   |  1 -
 .../aarch64-apple-a15.c   |  1 -
 .../aarch64-apple-a16.c   |  1 -
 .../aarch64-apple-a17.c   |  1 -
 .../aarch64-apple-m4.c|  2 -
 .../aarch64-cortex-r82.c  |  1 -
 .../aarch64-cortex-r82ae.c|  1 -
 llvm/lib/Target/AArch64/AArch64Features.td| 19 ++--
 llvm/lib/Target/AArch64/AArch64Processors.td  | 46 +++--
 llvm/test/MC/AArch64/arm64-system-encoding.s  |  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs-error.s|  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs.s  |  2 +-
 .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt |  2 +-
 .../AArch64/basic-a64-instructions.txt|  2 +-
 .../TargetParser/TargetParserTest.cpp | 97 +++
 llvm/utils/TableGen/ARMTargetDefEmitter.cpp   | 32 +-
 21 files changed, 156 insertions(+), 84 deletions(-)
 create mode 100644 
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c

diff --git a/clang/test/CodeGen/aarch64-targetattr.c 
b/clang/test/CodeGen/aarch64-targetattr.c
index 4f891f938b6186..d6227be2ebef83 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -195,19 +195,19 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a"
 }
 // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a"
 }
-// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"
 }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="cortex-a710" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a"
 }
+// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"
 }
+// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone 
"no-

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)

2024-08-19 Thread David Green via llvm-branch-commits

davemgreen wrote:

Thanks @tru 

https://github.com/llvm/llvm-project/pull/102168
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AArch64] Fix a bug where user could not disable certain architecture features (PR #104752)

2024-08-19 Thread Tomas Matheson via llvm-branch-commits

https://github.com/tmatheson-arm updated 
https://github.com/llvm/llvm-project/pull/104752

>From b523150d05242d9e00dc2dcf1694a1cf7dde088f Mon Sep 17 00:00:00 2001
From: Tomas Matheson 
Date: Sat, 17 Aug 2024 13:36:40 +0100
Subject: [PATCH 1/3] [AArch64] Add a check for invalid default features
 (#104435)

This adds a check that all ExtensionWithMArch which are marked as
implied features for an architecture are also present in the list of
default features. It doesn't make sense to have something mandatory but
not on by default.

There were a number of existing cases that violated this rule, and some
changes to which features are mandatory (indicated by the Implies
field).

This resulted in a bug where if a feature was marked as `Implies` but
was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the
Driver would consider `feat` to have never been added and therefore
would do nothing to disable it (no `-target-feature -feat` would be
added, but the backend would enable the feature by default because of
`Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture
DefaultExts. These apply only when specifying `-march=`. So when a feature is moved from `Implies` to `DefaultExts` on
the Architecture definition, the feature needs to be added to all
processor definitions (that are based on that architecture) in order to
preserve the existing behaviour. I have checked the TRMs for many cases
(see specific commit messages) but in other cases I have just kept the
current behaviour and not tried to fix it.
---
 clang/test/CodeGen/aarch64-targetattr.c   | 12 +--
 ...-negative-modifiers-for-default-features.c | 12 +++
 clang/test/Driver/arm-sb.c|  2 +-
 .../aarch64-apple-a12.c   |  1 -
 .../aarch64-apple-a13.c   |  1 -
 .../aarch64-apple-a14.c   |  1 -
 .../aarch64-apple-a15.c   |  1 -
 .../aarch64-apple-a16.c   |  1 -
 .../aarch64-apple-a17.c   |  1 -
 .../aarch64-apple-m4.c|  2 -
 .../aarch64-cortex-r82.c  |  1 -
 .../aarch64-cortex-r82ae.c|  1 -
 llvm/lib/Target/AArch64/AArch64Features.td| 19 ++--
 llvm/lib/Target/AArch64/AArch64Processors.td  | 46 +++--
 llvm/test/MC/AArch64/arm64-system-encoding.s  |  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs-error.s|  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs.s  |  2 +-
 .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt |  2 +-
 .../AArch64/basic-a64-instructions.txt|  2 +-
 .../TargetParser/TargetParserTest.cpp | 97 +++
 llvm/utils/TableGen/ARMTargetDefEmitter.cpp   | 32 +-
 21 files changed, 156 insertions(+), 84 deletions(-)
 create mode 100644 
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c

diff --git a/clang/test/CodeGen/aarch64-targetattr.c 
b/clang/test/CodeGen/aarch64-targetattr.c
index 4f891f938b6186..d6227be2ebef83 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -195,19 +195,19 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a"
 }
 // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a"
 }
-// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"
 }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="cortex-a710" 
"target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a"
 }
+// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a"
 }
+// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone 
"no-

[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)

2024-08-19 Thread Tulio Magno Quites Machado Filho via llvm-branch-commits

tuliom wrote:

> @tuliom do you have usecase for this?

My usecase is indeed having Fedora and CentOS/RHEL to be fully protected 
against JOP attacks.
The lack of BTI support causes 
[annocheck](https://sourceware.org/annobin/annobin.html/Test-dynamic-tags.html) 
to report this issue, e.g. https://issues.redhat.com/browse/RHEL-50807

https://github.com/llvm/llvm-project/pull/103491
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [openmp] [OpenMP][AArch64] Fix branch protection in microtasks (#102317) (PR #103491)

2024-08-19 Thread Tulio Magno Quites Machado Filho via llvm-branch-commits

tuliom wrote:

> But it sounds like this will only affect applications that are linking to 
> openmp?

Correct.

> Do you see any other risks of accepting this now?

IMHO, no. But if you prefer to delay this to 19.1.1, that looks good to me.

https://github.com/llvm/llvm-project/pull/103491
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][R600] Move R600CodeGenPassBuilder into R600TargetMachine(NFC). (PR #103721)

2024-08-19 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas updated 
https://github.com/llvm/llvm-project/pull/103721

>From f2095f23eaa5c3876bf7f8d5706881e404c5aa1b Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan 
Date: Wed, 14 Aug 2024 14:18:59 +0530
Subject: [PATCH 1/3] [AMDGPU][R600] Move R600TargetMachine into
 R600CodeGenPassBuilder(NFC).

---
 llvm/lib/Target/AMDGPU/CMakeLists.txt |   1 -
 .../Target/AMDGPU/R600CodeGenPassBuilder.cpp  | 149 -
 .../Target/AMDGPU/R600CodeGenPassBuilder.h|  38 -
 llvm/lib/Target/AMDGPU/R600ISelLowering.cpp   |   3 +-
 llvm/lib/Target/AMDGPU/R600TargetMachine.cpp  | 154 --
 5 files changed, 186 insertions(+), 159 deletions(-)
 delete mode 100644 llvm/lib/Target/AMDGPU/R600TargetMachine.cpp

diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt 
b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index f493076f5bb8a3..16186f1f1bbed0 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -137,7 +137,6 @@ add_llvm_target(AMDGPUCodeGen
   R600Packetizer.cpp
   R600RegisterInfo.cpp
   R600Subtarget.cpp
-  R600TargetMachine.cpp
   R600TargetTransformInfo.cpp
   SIAnnotateControlFlow.cpp
   SIFixSGPRCopies.cpp
diff --git a/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp
index a57b3aa0adb158..1b182e17add9c0 100644
--- a/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/R600CodeGenPassBuilder.cpp
@@ -5,12 +5,159 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===--===//
+//
+/// \file
+/// This file contains both AMDGPU-R600 target machine and the CodeGen pass
+/// builder. The target machine contains all of the hardware specific
+/// information needed to emit code for R600 GPUs and the CodeGen pass builder
+/// handles the same for new pass manager infrastructure.
+//
+//===--===//
 
 #include "R600CodeGenPassBuilder.h"
-#include "R600TargetMachine.h"
+#include "R600.h"
+#include "R600MachineScheduler.h"
+#include "R600TargetTransformInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include 
 
 using namespace llvm;
 
+static cl::opt
+EnableR600StructurizeCFG("r600-ir-structurize",
+ cl::desc("Use StructurizeCFG IR pass"),
+ cl::init(true));
+
+static cl::opt EnableR600IfConvert("r600-if-convert",
+ cl::desc("Use if conversion pass"),
+ cl::ReallyHidden, cl::init(true));
+
+static cl::opt EnableAMDGPUFunctionCallsOpt(
+"amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"),
+cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true),
+cl::Hidden);
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMILive(C, std::make_unique());
+}
+
+static MachineSchedRegistry R600SchedRegistry("r600",
+  "Run R600's custom scheduler",
+  createR600MachineScheduler);
+
+//===--===//
+// R600 Target Machine (R600 -> Cayman) - Legacy Pass Manager interface.
+//===--===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ std::optional RM,
+ std::optional CM,
+ CodeGenOptLevel OL, bool JIT)
+: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+  setRequiresStructuredCFG(true);
+
+  // Override the default since calls aren't supported for r600.
+  if (EnableFunctionCalls &&
+  EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
+EnableFunctionCalls = false;
+}
+
+const TargetSubtargetInfo *
+R600TargetMachine::getSubtargetImpl(const Function &F) const {
+  StringRef GPU = getGPUName(F);
+  StringRef FS = getFeatureString(F);
+
+  SmallString<128> SubtargetKey(GPU);
+  SubtargetKey.append(FS);
+
+  auto &I = SubtargetMap[SubtargetKey];
+  if (!I) {
+// This needs to be done before we create a new subtarget since any
+// creation will depend on the TM and the code generation flags on the
+// function that reside in TargetOptions.
+resetTargetOptions(F);
+I = std::make_unique(TargetTriple, GPU, FS, *this);
+  }
+
+  return I.get();
+}
+
+TargetTransformInfo
+R600TargetMachine::getTargetTransformInfo(const Function &F) const {
+  return TargetTransformInfo(R600TTIImpl(this, F));
+}
+
+namespace {
+class R600PassConfig final : public AMDGPUPassConfig {
+pu

  1   2   >